In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.dummy import  DummyClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [2]:
X, y = load_iris(return_X_y = True)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 0)
print(f"X_train.shape = {X_train.shape}")
print(f"y_train.shape = {y_train.shape}")
print(f"X_test.shape = {X_test.shape}")
print(f"y_test.shape = {y_test.shape}")

X_train.shape = (112, 4)
y_train.shape = (112,)
X_test.shape = (38, 4)
y_test.shape = (38,)


In [3]:
def Evaluate(clf, X_train, X_test, y_train, y_test, X, y):
    print("train accuracy= {:.3%}".format(clf.score (X_train, y_train)))
    print("test accuracy= {:.3%}".format(clf.score (X_test, y_test)))
    print('Cross-validation (accuracy)', cross_val_score(clf, X, y, cv=5))
    y_predicted = clf.predict(X_test)
    print ('accuracy = {:.2}'.format(accuracy_score(y_test, y_predicted)))
    print ('recall = {:.2}'.format(recall_score(y_test, y_predicted, average='weighted')))
    print ('precision = {:.2}'.format(precision_score(y_test, y_predicted, average='weighted')))
    print ('f1_score  = {:.2}'.format(f1_score(y_test, y_predicted, average='weighted')))

In [4]:
#Dummy classifier
clf_dummy= DummyClassifier(strategy='stratified').fit(X_train, y_train)
Evaluate(clf_dummy, X_train, X_test, y_train, y_test, X, y)

train accuracy= 28.571%
test accuracy= 31.579%
Cross-validation (accuracy) [0.2        0.4        0.26666667 0.23333333 0.13333333]
accuracy = 0.39
recall = 0.39
precision = 0.45
f1_score  = 0.39


In [5]:
#KNN
clf_knn = KNeighborsClassifier(n_neighbors = 3).fit(X_train, y_train)
Evaluate(clf_knn, X_train, X_test, y_train, y_test, X, y)

train accuracy= 96.429%
test accuracy= 97.368%
Cross-validation (accuracy) [0.96666667 0.96666667 0.93333333 0.96666667 1.        ]
accuracy = 0.97
recall = 0.97
precision = 0.98
f1_score  = 0.97


In [6]:
#SVC
clf_svc = SVC(kernel="poly").fit(X_train, y_train)
Evaluate(clf_svc, X_train, X_test, y_train, y_test, X, y)

train accuracy= 99.107%
test accuracy= 97.368%
Cross-validation (accuracy) [0.96666667 1.         0.96666667 0.96666667 1.        ]
accuracy = 0.97
recall = 0.97
precision = 0.98
f1_score  = 0.97


In [7]:
#Logistic regression
clf_lr = LogisticRegression(max_iter = 300).fit(X_train, y_train)
Evaluate(clf_lr, X_train, X_test, y_train, y_test, X, y)

train accuracy= 98.214%
test accuracy= 97.368%
Cross-validation (accuracy) [0.96666667 1.         0.93333333 0.96666667 1.        ]
accuracy = 0.97
recall = 0.97
precision = 0.98
f1_score  = 0.97


In [8]:
#XGBoost
clf_xgb = XGBClassifier().fit(X_train, y_train)
Evaluate(clf_xgb, X_train, X_test, y_train, y_test, X, y)

train accuracy= 100.000%
test accuracy= 97.368%
Cross-validation (accuracy) [0.96666667 0.96666667 0.93333333 0.93333333 1.        ]
accuracy = 0.97
recall = 0.97
precision = 0.98
f1_score  = 0.97
