# wine

In [1]:
from sklearn.datasets import load_wine
wine = load_wine()
X, y = wine.data, wine.target

In [2]:
print(X[1])

[1.32e+01 1.78e+00 2.14e+00 1.12e+01 1.00e+02 2.65e+00 2.76e+00 2.60e-01
 1.28e+00 4.38e+00 1.05e+00 3.40e+00 1.05e+03]


In [3]:
print(y)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2]


In [4]:
results = {}

In [5]:
wine.data.shape

(178, 13)

In [6]:
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
import numpy as np

# DecisionTreeClassifier

In [7]:
from sklearn.tree import DecisionTreeClassifier

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

param_grid = {'criterion':['gini','entropy'], 'max_depth':[3, 7, 9]}
clf_tree = DecisionTreeClassifier(random_state=0)
clf_tree.fit(X, y)
clf_tree = GridSearchCV(clf_tree, param_grid, scoring=('f1_weighted'), cv=3)
clf_tree.fit(X_train, y_train)


scores = cross_val_score(clf_tree, X, y, cv=3, scoring='f1_weighted')
scores

array([0.7777727 , 0.83042814, 0.98299476])

In [8]:
results["tree"] = np.mean(scores)

In [12]:
clf_tree.predict([[2., 2., 3., 3.,2.,2., 2., 3., 3.,2.,2., 2., 3.]])

array([1])

# k-NN

In [13]:
from sklearn.neighbors import KNeighborsClassifier


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

param_grid = {'n_neighbors':[1, 3, 5, 7], 'p':[1, 2]}
clf_knn = KNeighborsClassifier()
clf_knn.fit(X, y)
clf_knn = GridSearchCV(clf_knn, param_grid, scoring=('f1_weighted'), cv=3)
clf_knn.fit(X_train, y_train)

scores = cross_val_score(clf_knn, X, y, cv=3, scoring='f1_weighted')
scores

array([0.71111111, 0.61384352, 0.77457627])

In [14]:
results["knn"] = np.mean(scores)

In [16]:
clf_knn.predict([[2., 2., 3., 3.,2.,2., 2., 3., 3.,2.,2., 2., 3.]])

array([1])

# GaussianNB

In [38]:
from sklearn.naive_bayes import GaussianNB


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

clf_nb = GaussianNB()
clf_nb.fit(X, y)
clf_nb = GridSearchCV(clf_nb, param_grid, scoring=('f1_weighted'), cv=3)
clf_nb.fit(X_train, y_train)

scores = cross_val_score(clf_nb, X, y, cv=3, scoring='f1_weighted')
scores

TypeError: 'int' object is not iterable

In [19]:
results["nb"] = np.mean(scores)

In [20]:
clf_nb.predict([[2., 2., 3., 3.,2.,2., 2., 3., 3.,2.,2., 2., 3.]])

array([1])

# SVC

In [21]:
from sklearn.svm import SVC


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

param_grid = {'kernel':['linear', 'poly', 'rbf', 'sigmoid'], 'C':[1, 3, 7]
}
clf_svc = SVC()
clf_svc.fit(X, y)
clf_svc = GridSearchCV(clf_svc, param_grid, scoring=('f1_weighted'), cv=3)
clf_svc.fit(X_train, y_train)


scores = cross_val_score(clf_svc, X, y, cv=3, scoring='f1_weighted')
scores

array([0.82981366, 0.94905822, 1.        ])

In [22]:
results["svm"] = np.mean(scores)

In [23]:
clf_svc.predict([[2., 2., 3., 3.,2.,2., 2., 3., 3.,2.,2., 2., 3.]])

array([1])

# RNA

In [24]:
from sklearn.neural_network import MLPClassifier

clf_mlp = MLPClassifier(hidden_layer_sizes=(50, 30, 10), max_iter=5000, random_state=0)
clf_mlp.fit(X, y)

scores = cross_val_score(clf_mlp, X, y, cv=3, scoring='f1_weighted')
scores

array([0.31062271, 0.36916868, 0.40886571])

In [25]:
results["mlp"] = np.mean(scores)

In [27]:
clf_mlp.predict([[2., 2., 3., 3.,2.,2., 2., 3., 3.,2.,2., 2., 3.]])

array([0])

In [28]:
for key, value in results.items():
    print(key, value)

tree 0.863731867759593
knn 0.6998436347104627
nb 0.9604749386595953
svm 0.9262906273502898
mlp 0.36288570186875274
