In [1]:
import numpy as np
import pandas as pd

In [2]:
from IPython.display import display, HTML
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import cross_validate,cross_val_score, train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold

## Data Set

<p>
<i><b>Sursă dataset:</b></i>: A fost folosit data setul numit Seeds care se poate descarca de la adresa <a>http://archive.ics.uci.edu/ml/datasets/seeds</a>
</p>
<p>
<i><b>Articol relevant:</b></i>: M. Charytanowicz, J. Niewczas, P. Kulczycki, P.A. Kowalski, S. Lukasik, S. Zak, 'A Complete Gradient Clustering Algorithm for Features Analysis of X-ray Images', in: Information Technologies in Biomedicine, Ewa Pietka, Jacek Kawa (eds.), Springer-Verlag, Berlin-Heidelberg, 2010, pp. 15-24.
</p>
<p>
<i><b>Scurta decriere:</b></i> Baza de date curenta a fost alcatuita prin scanarea imaginilor unor boabe de grau din trei specii diferite. Cuprinde 209 intrari si 7 atribute obtinute prin cuantificarea parametrilor imaginilor boabelor de grau
</p>
<p>
 <i><b>Parametrii:</b></i>
 <ol>       
    <li>Area</li>
    <li>Perimeter </li>
    <li>Compactness </li>
    <li>Length</li>
    <li>Width </li>
    <li>Asymmetry </li>
    <li>Groove</li> 
    <li>Class</li>
 </ol>
</p>

<img src="./Images/Seeds_Atributes.png" alt="Atributes for image" style="width:600px;height:500px;">
<img src="./Images/Seeds_X_Ray.png" alt="X_Ray image" style="width:606px;height:337px;">
<p>Imaginile au fost preluate din articolul sursa citat mai sus </p>

## Split data

In [5]:
header = ['Area', 'Perimeter', 'Compactness', 'Length', 'Width', 'Asymmetry', 'Groove', 'Class']
data_seeds = pd.read_csv("./Dataset/seeds_dataset.txt", names=header,sep='\t')
display(HTML("<h3><b>Seeds Dataset"))
X = data_seeds.values[:, :7]
y = data_seeds.values[:, -1]
#display(HTML(data_seeds.to_html()))
#print(X)
#print(y)
display(data_seeds)

if np.any(np.isnan(data_seeds))==False:print("Setul de date NU contine valori lipsa")
else: print("Setul de date contine valori lipsa")
    
if np.any(np.isfinite(data_seeds)==True):print("Setul de date NU contine valori infinite")
else: print("Setul de date contine valori infinite")  
    


Unnamed: 0,Area,Perimeter,Compactness,Length,Width,Asymmetry,Groove,Class
0,15.26,14.84,0.8710,5.763,3.312,2.221,5.220,1
1,14.88,14.57,0.8811,5.554,3.333,1.018,4.956,1
2,14.29,14.09,0.9050,5.291,3.337,2.699,4.825,1
3,13.84,13.94,0.8955,5.324,3.379,2.259,4.805,1
4,16.14,14.99,0.9034,5.658,3.562,1.355,5.175,1
...,...,...,...,...,...,...,...,...
205,12.19,13.20,0.8783,5.137,2.981,3.631,4.870,3
206,11.23,12.88,0.8511,5.140,2.795,4.325,5.003,3
207,13.20,13.66,0.8883,5.236,3.232,8.315,5.056,3
208,11.84,13.21,0.8521,5.175,2.836,3.598,5.044,3


Setul de date NU contine valori lipsa
Setul de date NU contine valori infinite


## Print Function

In [10]:
def print_function(data_set:dict):
    df_print = pd.DataFrame({"Test accuracy for each fold":data_set['test_accuracy'], 
                    "Train accuracy for each fold": data_set['train_accuracy'], 
                    "Average test accuracy %": round(data_set['test_accuracy'].mean() * 100, 4),
                    "Average train accuracy %": round(data_set['train_accuracy'].mean() * 100, 4),
                    "Test F1 score for each fold": data_set['test_f1_macro'],
                    "Train F1 score for each fold": data_set['train_f1_macro'],
                    "Average test F1 score %": round(data_set['test_f1_macro'].mean() * 100, 4),
                    "Average train F1 score %":round(data_set['train_f1_macro'].mean() * 100, 4)
                   })
    display(HTML(df_print.to_html())) 
     



## K-Nearest Neighbors Classifier 

In [17]:


# hiperparametri
knn_neighbors = 5
knn_minkowski_p = 3

# scalare date
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# implementare KNN
model = KNeighborsClassifier(n_neighbors=knn_neighbors, p=knn_minkowski_p)
model_cv_stats = cross_validate(model, X_scaled, y, cv=5, scoring=('accuracy', 'f1_macro'), return_train_score=True) 

# statistici
display(HTML(f"<h4>5-fold cross validation for {knn_neighbors}-nearest neighbors classification:</h4>"))
print_function(model_cv_stats)  


Unnamed: 0,Test accuracy for each fold,Train accuracy for each fold,Average test accuracy %,Average train accuracy %,Test F1 score for each fold,Train F1 score for each fold,Average test F1 score %,Average train F1 score %
0,0.97619,0.940476,92.8571,95.5952,0.97616,0.940248,92.89,95.5886
1,0.952381,0.946429,92.8571,95.5952,0.952137,0.94642,92.89,95.5886
2,0.952381,0.946429,92.8571,95.5952,0.95137,0.946319,92.89,95.5886
3,0.952381,0.958333,92.8571,95.5952,0.952381,0.958349,92.89,95.5886
4,0.809524,0.988095,92.8571,95.5952,0.812454,0.988095,92.89,95.5886


## Decision Tree Classifier

In [8]:


# hiperparametri
dt_criterion = 'gini'#default value
dt_splitter = 'best' #intrebarea care reduce cel mai mult incertitudinea
#gini imputity-cantitatea de incertitudine pe un singur nod, cat de amestecate sunt clasificarile din frunze dupa intrebarea din nod
 
# implementare Decision Tree
model = DecisionTreeClassifier(criterion=dt_criterion, splitter=dt_splitter)
model_dc_stats = cross_validate(model, X, y, cv=5, scoring=('accuracy', 'f1_macro'), return_train_score=True)

# statistici
# afisez cateva medii, sa vad daca sunt diferente in functie de clasa

print(data_seeds.groupby('Class')['Area'].mean()),
print(data_seeds.groupby('Class')['Perimeter'].mean()),
print(data_seeds.groupby('Class')['Asymmetry'].mean())


display(HTML(f"<h4>5-fold cross validation for Decision Trees classification:</h4>"))
print_function(model_dc_stats)




Class
1    14.334429
2    18.334286
3    11.873857
Name: Area, dtype: float64
Class
1    14.294286
2    16.135714
3    13.247857
Name: Perimeter, dtype: float64
Class
1    2.667403
2    3.644800
3    4.788400
Name: Asymmetry, dtype: float64


Unnamed: 0,Test accuracy for each fold,Train accuracy for each fold,Average test accuracy %,Average train accuracy %,Test F1 score for each fold,Train F1 score for each fold,Average test F1 score %,Average train F1 score %
0,0.952381,1.0,89.0476,100.0,0.95137,1.0,88.8588,100.0
1,0.880952,1.0,89.0476,100.0,0.873056,1.0,88.8588,100.0
2,0.833333,1.0,89.0476,100.0,0.831478,1.0,88.8588,100.0
3,0.904762,1.0,89.0476,100.0,0.905553,1.0,88.8588,100.0
4,0.880952,1.0,89.0476,100.0,0.881481,1.0,88.8588,100.0


## Random Forest Classifier

In [9]:
# hiperparametri
rfc_n_estimators = 150
rfc_criterion = 'gini'

# implementare Random Forest
model = RandomForestClassifier(n_estimators=rfc_n_estimators, criterion=rfc_criterion)
model_rfc_stats = cross_validate(model, X, y, cv=5, scoring=('accuracy', 'f1_macro'), return_train_score=True)

display(HTML(f"<h4>5-fold cross validation for Random Forest classification</h4>"))
print_function(model_rfc_stats) 


Unnamed: 0,Test accuracy for each fold,Train accuracy for each fold,Average test accuracy %,Average train accuracy %,Test F1 score for each fold,Train F1 score for each fold,Average test F1 score %,Average train F1 score %
0,0.904762,1.0,89.0476,100.0,0.904061,1.0,89.2476,100.0
1,0.928571,1.0,89.0476,100.0,0.927742,1.0,89.2476,100.0
2,0.97619,1.0,89.0476,100.0,0.97616,1.0,89.2476,100.0
3,0.97619,1.0,89.0476,100.0,0.97616,1.0,89.2476,100.0
4,0.666667,1.0,89.0476,100.0,0.678255,1.0,89.2476,100.0


##  Multilayer Perceptron Classifier



In [26]:
# hiperparametri
mlp_solver = 'adam'
mlp_activation = 'logistic'
mlp_alpha=1e-3
mlp_hidden_layer_sizes = (50,50)
max_iter=10000

# implementare MLP
model = MLPClassifier(solver=mlp_solver, activation=mlp_activation, alpha=mlp_alpha, hidden_layer_sizes=mlp_hidden_layer_sizes, max_iter=max_iter, random_state=0)
model_mpc_stats = cross_validate(model, X, y, cv=5, scoring=('accuracy', 'f1_macro'), return_train_score=True)
print_function(model_mpc_stats)


Unnamed: 0,Test accuracy for each fold,Train accuracy for each fold,Average test accuracy %,Average train accuracy %,Test F1 score for each fold,Train F1 score for each fold,Average test F1 score %,Average train F1 score %
0,0.97619,0.964286,91.4286,97.1429,0.97616,0.964286,91.5605,97.1427
1,0.952381,0.97619,91.4286,97.1429,0.952137,0.976292,91.5605,97.1427
2,0.952381,0.97619,91.4286,97.1429,0.952351,0.97619,91.5605,97.1427
3,0.952381,0.964286,91.4286,97.1429,0.952381,0.964286,91.5605,97.1427
4,0.738095,0.97619,91.4286,97.1429,0.744997,0.976079,91.5605,97.1427


## Gaussian Naive Bayes Classifier

In [11]:

# implementare GNB
model = GaussianNB()
model_gnb_stats = cross_validate(model, X, y, cv=5, scoring=('accuracy', 'f1_macro'), return_train_score=True)

# statistici
display(HTML(f"<h4>5-fold cross validation for Gaussian NB classification</h4>"))
print_function(model_gnb_stats)


Unnamed: 0,Test accuracy for each fold,Train accuracy for each fold,Average test accuracy %,Average train accuracy %,Test F1 score for each fold,Train F1 score for each fold,Average test F1 score %,Average train F1 score %
0,0.880952,0.904762,88.5714,91.1905,0.880307,0.904598,88.7466,91.1489
1,0.928571,0.89881,88.5714,91.1905,0.927742,0.898591,88.7466,91.1489
2,0.952381,0.910714,88.5714,91.1905,0.952137,0.910378,88.7466,91.1489
3,0.97619,0.904762,88.5714,91.1905,0.97616,0.903406,88.7466,91.1489
4,0.690476,0.940476,88.5714,91.1905,0.700985,0.940474,88.7466,91.1489


# Optimizarea Hiperparametrilor

## Split Data

In [3]:
header = ['Area', 'Perimeter', 'Compactness', 'Length', 'Width', 'Asymmetry', 'Groove', 'Class']
data_seeds = pd.read_csv("./Dataset/seeds_dataset.txt", names=header,sep='\t')
X = data_seeds.values[:, :7]
y = data_seeds.values[:, -1]


## K-Nearest Neighbors Classifier 

In [19]:

pipe = Pipeline([('scaler', MinMaxScaler()), ('knn', KNeighborsClassifier())])
parameter_grid = {'knn__n_neighbors': list(range(1, 10)), 'knn__p': list(range(1, 5))}
strat_k_fold = StratifiedKFold(n_splits=5, shuffle=True)
grid_search =GridSearchCV(pipe, param_grid=parameter_grid, scoring='accuracy', cv=4, return_train_score=True)

scores = cross_val_score(grid_search, X, y, cv=strat_k_fold )
print("Scorurile rezultate in urma 5-fold cross validation",scores)
print("Media scorurilor",scores.mean())


grid_search.fit(X, y)
print("Cel mai bun set de parametrii...",grid_search.best_params_)

grid_search = pd.DataFrame(grid_search.cv_results_)
display(HTML(grid_search.to_html()))

Scorurile rezultate in urma 5-fold cross validation [0.92857143 0.88095238 0.97619048 0.95238095 0.92857143]
Media scorurilor 0.9333333333333333
Cel mai bun set de parametrii... {'knn__n_neighbors': 9, 'knn__p': 3}


Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_knn__n_neighbors,param_knn__p,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,split3_train_score,mean_train_score,std_train_score
0,0.0,0.0,0.0,0.0,1,1,"{'knn__n_neighbors': 1, 'knn__p': 1}",0.962264,0.981132,0.903846,0.711538,0.889695,0.106732,34,1.0,1.0,1.0,1.0,1.0,0.0
1,0.003906,0.006766,0.011719,0.006766,1,2,"{'knn__n_neighbors': 1, 'knn__p': 2}",0.962264,0.981132,0.923077,0.75,0.904118,0.091411,27,1.0,1.0,1.0,1.0,1.0,0.0
2,0.001869,0.003237,0.005653,0.006423,1,3,"{'knn__n_neighbors': 1, 'knn__p': 3}",0.962264,0.962264,0.923077,0.769231,0.904209,0.079555,25,1.0,1.0,1.0,1.0,1.0,0.0
3,0.001749,0.001089,0.010403,0.004443,1,4,"{'knn__n_neighbors': 1, 'knn__p': 4}",0.962264,0.943396,0.923077,0.769231,0.899492,0.076472,29,1.0,1.0,1.0,1.0,1.0,0.0
4,0.003907,0.006767,0.0,0.0,2,1,"{'knn__n_neighbors': 2, 'knn__p': 1}",0.924528,0.962264,0.961538,0.673077,0.880352,0.120639,36,0.968153,0.974522,0.968354,0.974684,0.971428,0.003176
5,0.0,0.0,0.007813,0.007813,2,2,"{'knn__n_neighbors': 2, 'knn__p': 2}",0.981132,0.962264,0.903846,0.692308,0.884888,0.114779,35,0.968153,0.974522,0.981013,0.974684,0.974593,0.004547
6,0.0,0.0,0.007812,0.007812,2,3,"{'knn__n_neighbors': 2, 'knn__p': 3}",0.981132,0.943396,0.903846,0.730769,0.889786,0.095789,33,0.949045,0.974522,0.981013,0.974684,0.969816,0.012275
7,0.0,0.0,0.007812,0.007812,2,4,"{'knn__n_neighbors': 2, 'knn__p': 4}",0.962264,0.943396,0.923077,0.75,0.894684,0.084675,31,0.955414,0.980892,0.987342,0.974684,0.974583,0.011938
8,0.0,0.0,0.003906,0.006766,3,1,"{'knn__n_neighbors': 3, 'knn__p': 1}",0.924528,0.943396,0.961538,0.769231,0.899673,0.07644,28,0.961783,0.955414,0.974684,0.968354,0.965059,0.007198
9,0.003906,0.006765,0.003907,0.006767,3,2,"{'knn__n_neighbors': 3, 'knn__p': 2}",0.943396,0.962264,0.884615,0.769231,0.889877,0.075312,32,0.961783,0.961783,0.981013,1.0,0.976145,0.015853


## Decision Tree Classifier

In [4]:

pipe = Pipeline([('dtc', DecisionTreeClassifier())])

parameter_grid = { 'dtc__criterion': ['gini', 'entropy'],'dtc__splitter': ['best', 'random'] }
strat_k_fold = StratifiedKFold(n_splits=5, shuffle=True)
grid_search =GridSearchCV(pipe, param_grid=parameter_grid, scoring='accuracy', cv=4)
strat_k_fold = StratifiedKFold(n_splits=5, shuffle=True)
scores = cross_val_score(grid_search, X, y, cv=strat_k_fold )

print("Scorurile rezultate in urma 5-fold cross validation",scores)
print("Media scorurilor",scores.mean())
grid_search.fit(X, y)
print("Cel mai bun set de parametrii...",grid_search.best_params_)
grid_search = pd.DataFrame(grid_search.cv_results_)
display(HTML(grid_search.to_html()))


Scorurile rezultate in urma 5-fold cross validation [0.88095238 0.9047619  0.92857143 1.         0.92857143]
Media scorurilor 0.9285714285714286
Cel mai bun set de parametrii... {'dtc__criterion': 'gini', 'dtc__splitter': 'random'}


Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_dtc__criterion,param_dtc__splitter,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,mean_test_score,std_test_score,rank_test_score
0,0.00175,0.0004314328,0.00025,0.000433,gini,best,"{'dtc__criterion': 'gini', 'dtc__splitter': 'best'}",0.981132,0.90566,0.884615,0.711538,0.870737,0.098671,2
1,0.000999,5.161914e-07,0.0005,0.0005,gini,random,"{'dtc__criterion': 'gini', 'dtc__splitter': 'random'}",0.90566,0.924528,0.980769,0.75,0.890239,0.085552,1
2,0.001999,1.603797e-06,0.0005,0.0005,entropy,best,"{'dtc__criterion': 'entropy', 'dtc__splitter': 'best'}",0.849057,0.90566,0.903846,0.711538,0.842525,0.078972,3
3,0.000999,8.980345e-07,0.000501,0.000501,entropy,random,"{'dtc__criterion': 'entropy', 'dtc__splitter': 'random'}",0.849057,0.943396,0.846154,0.615385,0.813498,0.120886,4


## Random Forest Classifier

In [20]:
pipe = Pipeline([('rfc', RandomForestClassifier())])

parameter_grid = { 'rfc__criterion': ['gini', 'entropy'],'rfc__n_estimators': np.linspace(start=1, stop=150, num=25, dtype=int)}
strat_k_fold = StratifiedKFold(n_splits=5, shuffle=True)
grid_search =GridSearchCV(pipe, param_grid=parameter_grid, scoring='accuracy', cv=4)
strat_k_fold = StratifiedKFold(n_splits=5, shuffle=True)
scores = cross_val_score(grid_search, X, y, cv=strat_k_fold )

print("Scorurile rezultate in urma 5-fold cross validation",scores)
print("Media scorurilor",scores.mean())
grid_search.fit(X, y)
print("Cel mai bun set de parametrii...",grid_search.best_params_)
grid_search = pd.DataFrame(grid_search.cv_results_)
display(HTML(grid_search.to_html()))

#Do not run this it can take a while


Scorurile rezultate in urma 5-fold cross validation [0.92857143 0.9047619  0.9047619  0.95238095 0.95238095]
Media scorurilor 0.9285714285714286
Cel mai bun set de parametrii... {'rfc__criterion': 'gini', 'rfc__n_estimators': 7}


Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_rfc__criterion,param_rfc__n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,mean_test_score,std_test_score,rank_test_score
0,0.00325,0.0004337738,0.000999,1.071227e-06,gini,1,"{'rfc__criterion': 'gini', 'rfc__n_estimators': 1}",0.886792,0.943396,0.865385,0.75,0.861393,0.070346,44
1,0.016989,9.443152e-07,0.001999,1.097438e-06,gini,7,"{'rfc__criterion': 'gini', 'rfc__n_estimators': 7}",0.90566,0.943396,0.980769,0.769231,0.899764,0.079905,1
2,0.032011,0.0009696074,0.003248,0.0004333943,gini,13,"{'rfc__criterion': 'gini', 'rfc__n_estimators': 13}",0.90566,0.924528,0.961538,0.692308,0.871009,0.105113,21
3,0.044972,0.001224121,0.003748,0.0004332223,gini,19,"{'rfc__criterion': 'gini', 'rfc__n_estimators': 19}",0.924528,0.90566,0.942308,0.730769,0.875816,0.08474,9
4,0.058214,0.0004343239,0.005247,0.0004330176,gini,25,"{'rfc__criterion': 'gini', 'rfc__n_estimators': 25}",0.924528,0.924528,0.903846,0.730769,0.870918,0.081354,23
5,0.074703,0.0008276766,0.006247,0.0004316748,gini,32,"{'rfc__criterion': 'gini', 'rfc__n_estimators': 32}",0.924528,0.924528,0.980769,0.673077,0.875726,0.119231,10
6,0.091694,0.003489445,0.006996,1.267212e-06,gini,38,"{'rfc__criterion': 'gini', 'rfc__n_estimators': 38}",0.924528,0.943396,0.980769,0.634615,0.870827,0.137871,27
7,0.105936,0.002547151,0.007744,0.0004317098,gini,44,"{'rfc__criterion': 'gini', 'rfc__n_estimators': 44}",0.924528,0.924528,0.980769,0.673077,0.875726,0.119231,10
8,0.119676,0.002583597,0.008995,0.000707477,gini,50,"{'rfc__criterion': 'gini', 'rfc__n_estimators': 50}",0.924528,0.924528,0.961538,0.653846,0.86611,0.123479,35
9,0.13167,0.001296799,0.009493,0.0004988359,gini,56,"{'rfc__criterion': 'gini', 'rfc__n_estimators': 56}",0.924528,0.924528,0.961538,0.653846,0.86611,0.123479,35


## Gaussian Naive Bayes Classifier

In [7]:
pipe = Pipeline([('gnb', GaussianNB())])

parameter_grid = { 'gnb__var_smoothing': np.linspace(start=1e-9, stop=1e-2, num=100)}
strat_k_fold = StratifiedKFold(n_splits=5, shuffle=True)
grid_search =GridSearchCV(pipe, param_grid=parameter_grid, scoring='accuracy', cv=4)
strat_k_fold = StratifiedKFold(n_splits=5, shuffle=True)
scores = cross_val_score(grid_search, X, y, cv=strat_k_fold )

print("Scorurile rezultate in urma 5-fold cross validation",scores)
print("Media scorurilor",scores.mean())
grid_search.fit(X, y)
print("Cel mai bun set de parametrii...",grid_search.best_params_)
grid_search = pd.DataFrame(grid_search.cv_results_)
display(HTML(grid_search.to_html()))

Scorurile rezultate in urma 5-fold cross validation [0.92857143 0.97619048 0.97619048 0.80952381 0.9047619 ]
Media scorurilor 0.9190476190476191
Cel mai bun set de parametrii... {'gnb__var_smoothing': 0.0028282835454545457}


Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_gnb__var_smoothing,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,mean_test_score,std_test_score,rank_test_score
0,0.0,0.0,0.0,0.0,1e-09,{'gnb__var_smoothing': 1e-09},0.90566,0.943396,0.942308,0.673077,0.86611,0.112478,100
1,0.0,0.0,0.003907,0.006767,0.000101011,{'gnb__var_smoothing': 0.00010101109090909092},0.90566,0.943396,0.942308,0.711538,0.875726,0.096003,95
2,0.00396,0.006294736,0.000501,0.000501,0.000202021,{'gnb__var_smoothing': 0.00020202118181818183},0.90566,0.943396,0.942308,0.711538,0.875726,0.096003,95
3,0.00125,0.0004326723,0.00075,0.000433,0.000303031,{'gnb__var_smoothing': 0.0003030312727272728},0.90566,0.943396,0.942308,0.711538,0.875726,0.096003,95
4,0.003248,0.002770993,0.001499,0.0005,0.000404041,{'gnb__var_smoothing': 0.0004040413636363637},0.90566,0.943396,0.942308,0.711538,0.875726,0.096003,95
5,0.001499,0.0005010404,0.000749,0.000432,0.000505051,{'gnb__var_smoothing': 0.0005050514545454546},0.90566,0.943396,0.942308,0.711538,0.875726,0.096003,95
6,0.001248,0.0004330169,0.000749,0.000433,0.000606062,{'gnb__var_smoothing': 0.0006060615454545455},0.90566,0.943396,0.942308,0.730769,0.880533,0.08779,94
7,0.001061,0.0001059572,0.001,2e-06,0.000707072,{'gnb__var_smoothing': 0.0007070716363636365},0.90566,0.943396,0.942308,0.75,0.885341,0.079602,72
8,0.001261,0.0008396345,0.00025,0.000433,0.000808082,{'gnb__var_smoothing': 0.0008080817272727274},0.90566,0.943396,0.942308,0.75,0.885341,0.079602,72
9,0.0,0.0,0.0,0.0,0.000909092,{'gnb__var_smoothing': 0.0009090918181818183},0.90566,0.943396,0.942308,0.75,0.885341,0.079602,72


## Multilayer Perceptron Classifier

In [None]:
pipe = Pipeline([('mlp', MLPClassifier(max_iter= 10000))])

parameter_grid = { 'mlp__alpha': np.linspace(start=0, stop=1e-1, num=50),
                  'mlp__activation': ['identity', 'logistic', 'tanh', 'relu']
                 }

strat_k_fold = StratifiedKFold(n_splits=5, shuffle=True)
grid_search =GridSearchCV(pipe, param_grid=parameter_grid, scoring='accuracy', cv=4)
strat_k_fold = StratifiedKFold(n_splits=5, shuffle=True)
scores = cross_val_score(grid_search, X, y, cv=strat_k_fold )

print("Scorurile rezultate in urma 5-fold cross validation",scores)
print("Media scorurilor",scores.mean())
grid_search.fit(X, y)
print("Cel mai bun set de parametrii...",grid_search.best_params_)
grid_search = pd.DataFrame(grid_search.cv_results_)
display(HTML(grid_search.to_html()))

#Do not run this it can take a while

