In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

from sklearn.tree import DecisionTreeClassifier 
from sklearn.ensemble import RandomForestClassifier

from sklearn import svm
from sklearn.svm import LinearSVC

In [None]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning)

In [None]:
names = [ "Logistic Regression", "Naive Bayes", 
         "Nearest Neighbors 3","Nearest Neighbors 5", "Nearest Neighbors 7", 
         "Linear Discriminant Analysis", "Quadratic Discriminant Analysis", 
         "Decision Tree 2", "Decision Tree Max", "Random Forest", 
         "SVM linear",  "SVM RBF kernel", "SVM Poly",  "SVM Sigmoid", 'LinearSVC '
         ]
classifiers = [
    LogisticRegression(C =  10000),
    GaussianNB(),
    KNeighborsClassifier(3),
    KNeighborsClassifier(5),
    KNeighborsClassifier(7),
    LinearDiscriminantAnalysis(solver="svd"),
    QuadraticDiscriminantAnalysis(),
    
    DecisionTreeClassifier(max_depth = 2,  random_state = 123),
    DecisionTreeClassifier( random_state = 123),
    RandomForestClassifier(n_estimators=25, random_state = 123),
    
    svm.SVC(kernel='linear'),
    svm.SVC(kernel='rbf', gamma=0.7),
    svm.SVC(kernel='poly', C=10, gamma=1),
    svm.SVC(kernel='sigmoid',C=100, gamma=0.01),
    LinearSVC(C =  100, max_iter = 30000),

    ]

In [None]:
import pandas as pd
import numpy as np 

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from matplotlib.colors import ListedColormap, BoundaryNorm
from sklearn import preprocessing

iris = load_iris(as_frame=True)

In [None]:
x_label='sepal length (cm)'
y_label='petal length (cm)'

X=iris.frame[[x_label, y_label]]
y = iris.frame['target']
target_names = iris.target_names
# перемешиваем данные с равномерным распределение по выборкам

scaler = preprocessing.MinMaxScaler()
scaled_features = scaler.fit_transform(X)
X = pd.DataFrame(scaled_features, index=X.index, columns=X.columns)

In [None]:
#попробуем сделать равномерным распределение по выборкам
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=123, stratify=y)
print(f"Количество строк в y_train по классам: {np.bincount(y_train)}")
print(f"Количество строк в y_test по классам: {np.bincount(y_test)}")

In [None]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

# iterate over classifiers

for name, clf in zip(names, classifiers):

    clf.fit(X_train.values, y_train)
    score = clf.score(X_test.values, y_test)
    
    clf_pred = clf.predict(X_test.values)
    clf_acc = accuracy_score(y_test, clf_pred)
    print(name)
    print(classification_report(y_test, clf_pred, target_names = target_names))
        
    

In [None]:
#%matplotlib notebook
%matplotlib inline
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay

for name, clf in zip(names, classifiers):
    clf.fit(X_train.values, y_train)
    clf_pred = clf.predict(X_test.values)
    cm = confusion_matrix(y_test, clf_pred, labels=clf.classes_)
    disp = ConfusionMatrixDisplay.from_estimator(clf, X_test.values, y_test, 
                                                 display_labels=clf.classes_,
                                                cmap=plt.cm.Blues)
    disp.ax_.set_title(name + " Confusion Matrix")  

In [None]:
h = .01  # step size in the mesh
x_min, x_max = X.min()[x_label] - 0.1, X.max()[x_label] + 0.1
y_min, y_max = X.min()[y_label] - 0.1, X.max()[y_label] + 0.1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
 
cm = plt.cm.RdBu
cm_bright = ListedColormap(['#FF0000', '#0000FF'])
cmap_light = ListedColormap(['blue', 'orange', 'green'])
cmap_bold = ListedColormap(['darkblue', 'darkorange', 'darkgreen'])

In [None]:
#%matplotlib notebook
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
import time

fig = plt.figure(figsize=(10,25))

# Draw dataset

ax = plt.subplot(8, 2, 1)

# Plot the training points
ax.scatter(x=X_train[x_label], y=X_train[y_label], c=y_train, cmap=cmap_bold, alpha=1.0, edgecolor="black")
ax.scatter(x=X_test[x_label], y=X_test[y_label], c=y_test, cmap=cmap_light, alpha=0.6, edgecolor="black")
ax.set_xlim(xx.min(), xx.max())
ax.set_ylim(yy.min(), yy.max())
ax.set_title("Iris dataset")

res_df=pd.DataFrame(columns=['Classificator', 'Score','Time'])
# iterate over classifiers
i = 2
for name, clf in zip(names, classifiers):
    ax = plt.subplot(8, 2, i)
    t0 = time.time()
    
    clf.fit(X_train.values, y_train)
    score = clf.score(X_test.values, y_test)
    
    clf_pred = clf.predict(X_test.values)
    clf_acc = accuracy_score(y_test, clf_pred)
        
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
    t1 = time.time()
  
    # Put the result into a color plot
    Z = Z.reshape(xx.shape)
    ax.contourf(xx, yy, Z, cmap=cmap_light, alpha=.8)
   
    # Plot the training points
    ax.scatter(x=X_train[x_label], y=X_train[y_label], c=y_train, cmap=cmap_bold, alpha=1.0, edgecolor="black")
    ax.scatter(x=X_test[x_label], y=X_test[y_label], c=y_test, cmap=cmap_light, alpha=0.6, edgecolor="black")
    
    ax.set_xlim(xx.min(), xx.max())
    ax.set_ylim(yy.min(), yy.max())
    ax.set_title(name)
    
    dt=(t1 - t0)*1000
    res_df.loc[len(res_df.index)] = [name, score, dt] 
    ax.text(xx.max() - .1, yy.min() + .1, ('Score= %.2f %.3f ms' % (score,dt)).lstrip('0'), 
            size=15, horizontalalignment='right', color='w')
    i += 1
plt.subplots_adjust(wspace=0.2, hspace=0.3)
plt.show()

In [None]:
res_df

In [None]:
iris = load_iris(as_frame=True)

x_label='sepal length (cm)'
y_label='petal width (cm)'

X=iris.frame[[x_label, y_label]]
y = iris.frame['target']

# prepare for binary classification
y=np.where(y == 1, 0, y)
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.9, random_state=111, stratify=y)

In [None]:
from sklearn.metrics import RocCurveDisplay 
# prepare plots
fig, ax_roc = plt.subplots(figsize=(7, 7))

r1, r2 = 0 , 15
r1, r2 = 0 , 5  
r1, r2 = 0 , 15   

for name, clf in zip(names[r1:r2], classifiers[r1:r2]):
    clf.fit(X_train, y_train)   
    RocCurveDisplay.from_estimator(clf, X_test, y_test, ax=ax_roc, name=name)
    
ax_roc.set_title("Receiver Operating Characteristic (ROC) curves")
ax_roc.grid(linestyle="--")
