In [None]:
import numpy
import matplotlib.pyplot as plt
import pandas
import seaborn as sns

In [None]:
import matplotlib.colors as mcolors
tableau_colors = list(mcolors.TABLEAU_COLORS.values())

##### data exploration

In [None]:
from sklearn import datasets
iris = datasets.load_iris()
print(iris.keys())

In [None]:
print(iris.feature_names)
print(iris.target_names)

In [None]:
print(iris.DESCR)

In [None]:
# build dataframe
df = pandas.DataFrame(data = iris.data, columns = iris.feature_names)
df['species'] = iris.target
df

In [None]:
# keep only first two feature columns
df = df.drop(columns = ["petal length (cm)", "petal width (cm)"])
df

In [None]:
# keep only two target classes
df = df[df["species"].isin([0,2])]
df

In [None]:
for s in df['species'].unique():
    plt.scatter(df[df['species']==s]["sepal length (cm)"], df[df['species']==s]["sepal width (cm)"], label=f'species {s}' )
plt.legend()
plt.show()

In [None]:
sns.lmplot( x="sepal length (cm)", y="sepal width (cm)", data=df, fit_reg=False, hue='species', legend=True);

##### data preprocessing

In [None]:
X = numpy.array(df.iloc[:, :2])
y = numpy.array(df.iloc[:,2])

In [None]:
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)
# print(X_train)
# print(y_train)
# print(X_test)
# print(y_test)

# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
# print(X_train)
# print(X_test)

##### Logistic regression

In [None]:
# Training the Logistic Regression model on the Training set
from sklearn.linear_model import LogisticRegression
logistic_clf = LogisticRegression(random_state = 0)
logistic_clf.fit(X_train, y_train)

In [None]:
# Predicting the Test set results
y_pred = logistic_clf.predict(X_test)
# print(numpy.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

In [None]:
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix, accuracy_score, ConfusionMatrixDisplay
cm = confusion_matrix(y_test, y_pred)
print("Accuracy: {:.2f}".format(accuracy_score(y_test, y_pred)))


In [None]:
ConfusionMatrixDisplay(cm).plot()

##### Support Vector Machine

In [None]:
# Training the Logistic Regression model on the Training set
from sklearn.svm import SVC
svc = SVC(random_state = 0, kernel='linear')
svc.fit(X_train, y_train)

In [None]:
# Predicting the Test set results
y_pred = svc.predict(X_test)
# print(numpy.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

In [None]:
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print("Accuracy: {:.2f}".format(accuracy_score(y_test, y_pred)))


In [None]:
ConfusionMatrixDisplay(cm).plot()

##### Compare classifiers

In [None]:
# Retrieve the model parameters.
b = logistic_clf.intercept_[0]
w1, w2 = logistic_clf.coef_.T
# Calculate the intercept and gradient of the decision boundary.
c1 = -b/w2
m1 = -w1/w2

# Retrieve the model parameters.
b = svc.intercept_[0]
w1, w2 = svc.coef_.T
# Calculate the intercept and gradient of the decision boundary.
c2 = -b/w2
m2 = -w1/w2

In [None]:
colors = [tableau_colors[i] for i in y_train]
plt.scatter(X_train[:,0], X_train[:,1], c = colors)
x = numpy.linspace(X_train[:,0].min(), X_train[:,0].max(), 100)
y1 = m1*x+c1
y2 = m2*x+c2
plt.plot(x, y1, label = 'Logistic')
plt.plot(x, y2, label = 'SVC')
plt.legend()