In [76]:
import math
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split, GridSearchCV, KFold
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

In [77]:
X, y = load_wine(return_X_y=True)
X.shape, y.shape

((178, 13), (178,))

In [78]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, shuffle=True, test_size=0.3)
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((124, 13), (124,), (54, 13), (54,))

In [79]:
def compute_score(clf, params, X_train, y_train, X_test, y_test, cv):
    clf = GridSearchCV(clf, params, cv=cv)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(f"{accuracy_score(y_test, y_pred):.2f}, {clf.best_estimator_}")

In [80]:
svm_params = {"kernel": ('linear', 'rbf'), 'C': [1e-1, 1e-2, 1, 10]}
knn_params = {"n_neighbors": range(1, 10)}
tree_params = {"max_depth": range(1, 10), "criterion": ("gini", "entropy", "log_loss")}

svm = SVC()
knn = KNeighborsClassifier()
tree = DecisionTreeClassifier()
kf = KFold(n_splits=5)

In [81]:
compute_score(svm, svm_params, X_train, y_train, X_test, y_test, cv=kf)
compute_score(knn, knn_params, X_train, y_train, X_test, y_test, cv=kf)
compute_score(tree, tree_params, X_train, y_train, X_test, y_test, cv=kf)

1.00, SVC(C=0.1, kernel='linear')
0.80, KNeighborsClassifier(n_neighbors=1)
0.96, DecisionTreeClassifier(max_depth=6)


In [82]:
# interval 95%
def estimator(score: float, nb_example: int) -> float:
    return 1.96 *math.sqrt(score * (1 - score) * nb_example)

In [83]:
print(f"SVM: {estimator(score=0.98, nb_example=X_test.shape[0])}, \n"
      f"KNN: {estimator(score=0.8, nb_example=X_test.shape[0])}, \n"
      f"Tree: {estimator(score=0.96, nb_example=X_test.shape[0])} \n")

SVM: 2.016419956259113, 
KNN: 5.761199875026034, 
Tree: 2.8224000000000014 

