In [2]:
import pandas as pd
import numpy as np
import openml
from sklearn.preprocessing import MinMaxScaler,LabelEncoder 
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier,AdaBoostClassifier,GradientBoostingClassifier
from sklearn.model_selection import KFold,GridSearchCV
from sklearn.metrics import f1_score, make_scorer
import time

# Classification for Iris Dataset

In [2]:
iris = openml.datasets.get_dataset("iris")
iris_df, iris_label, categorical_indicator, attribute_names = iris.get_data(
    target=iris.default_target_attribute, dataset_format="dataframe"
)
iris_df["class"]=iris_label
iris_x=iris_df.iloc[:,:4]
iris_df

Unnamed: 0,sepallength,sepalwidth,petallength,petalwidth,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


In [3]:
iris_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype   
---  ------       --------------  -----   
 0   sepallength  150 non-null    float64 
 1   sepalwidth   150 non-null    float64 
 2   petallength  150 non-null    float64 
 3   petalwidth   150 non-null    float64 
 4   class        150 non-null    category
dtypes: category(1), float64(4)
memory usage: 5.1 KB


In [4]:
iris_df["class"].value_counts()

Iris-setosa        50
Iris-versicolor    50
Iris-virginica     50
Name: class, dtype: int64

In [5]:
le=LabelEncoder()

iris_y=le.fit_transform(iris_label)

In [6]:
scaler = MinMaxScaler()

iris_x_scaled=scaler.fit_transform(iris_x)

In [7]:
def clasification(model,parameters,x,y):
    
    classification_df=pd.DataFrame({"Model":[],
                                    "Random State":[],
                                    "Best Parameters":[],
                                    "Best Score":[],
                                    "Execution Time":[],
                                    "Parameters":[],
                                    "f1 score":[]})
    for i in range(1,11):
        start_time = time.time()
        cv_inner = KFold(n_splits=5, shuffle=True, random_state=i)
        score = ['accuracy' ,'f1_macro']
        grid_search = GridSearchCV(model, parameters, cv=cv_inner, scoring=score, refit='accuracy', n_jobs = -1)
        grid_result = grid_search.fit(x,y)
        f1_score = grid_result.cv_results_['mean_test_f1_macro']
        para = grid_result.cv_results_['params']
        execution_time = time.time() - start_time
        print("Best: %f using %s and Execution time is : %f"% (grid_result.best_score_, grid_result.best_params_,execution_time))
        classification_df=classification_df.append({"Model":model,
                                                    "Random State":i,
                                                    "Best Parameters":grid_result.best_params_,
                                                    "Best Score":grid_result.best_score_,
                                                    "Execution Time":execution_time,
                                                    "Parameters":para,
                                                    "f1 score":f1_score},ignore_index=True)
         
       
    if model == knc:
        classification_df.to_csv("Iris_KNC.csv")
    elif model == lda:
        classification_df.to_csv("Iris_LDA.csv")
    elif model == gnb:
        classification_df.to_csv("Iris_GNB.csv")
    elif model == svc:
        classification_df.to_csv("Iris_SVC.csv")
    elif model == lr:
        classification_df.to_csv("Iris_LR.csv")
    elif model == rfc:
        classification_df.to_csv("Iris_RF.csv")
    elif model== abc:
        classification_df.to_csv("Iris_ABC.csv")
    elif model== gbc:
        classification_df.to_csv("Iris_GBC.csv")


    return classification_df
    

## K-nearest neighbour classification

In [8]:
knc = KNeighborsClassifier()

knc_grid = {"n_neighbors":range(1, 21),
           "weights":['uniform', 'distance'],
           "metric":['manhattan','euclidean', 'minkowski']}

In [9]:
clasification(knc,knc_grid,iris_x_scaled,iris_y)

Best: 0.973333 using {'metric': 'euclidean', 'n_neighbors': 9, 'weights': 'uniform'} and Execution time is : 3.627303
Best: 0.966667 using {'metric': 'euclidean', 'n_neighbors': 6, 'weights': 'uniform'} and Execution time is : 0.832774
Best: 0.966667 using {'metric': 'euclidean', 'n_neighbors': 6, 'weights': 'uniform'} and Execution time is : 0.847737
Best: 0.973333 using {'metric': 'euclidean', 'n_neighbors': 14, 'weights': 'uniform'} and Execution time is : 0.959959
Best: 0.973333 using {'metric': 'euclidean', 'n_neighbors': 14, 'weights': 'uniform'} and Execution time is : 0.863714
Best: 0.966667 using {'metric': 'euclidean', 'n_neighbors': 5, 'weights': 'uniform'} and Execution time is : 0.894205
Best: 0.966667 using {'metric': 'euclidean', 'n_neighbors': 2, 'weights': 'uniform'} and Execution time is : 0.874659
Best: 0.966667 using {'metric': 'euclidean', 'n_neighbors': 6, 'weights': 'uniform'} and Execution time is : 0.853718
Best: 0.966667 using {'metric': 'manhattan', 'n_neighb

Unnamed: 0,Model,Random State,Best Parameters,Best Score,Execution Time,Parameters,f1 score
0,KNeighborsClassifier(),1.0,"{'metric': 'euclidean', 'n_neighbors': 9, 'wei...",0.973333,3.627303,"[{'metric': 'manhattan', 'n_neighbors': 1, 'we...","[0.9221037336254728, 0.9221037336254728, 0.928..."
1,KNeighborsClassifier(),2.0,"{'metric': 'euclidean', 'n_neighbors': 6, 'wei...",0.966667,0.832774,"[{'metric': 'manhattan', 'n_neighbors': 1, 'we...","[0.9401705761969593, 0.9401705761969593, 0.931..."
2,KNeighborsClassifier(),3.0,"{'metric': 'euclidean', 'n_neighbors': 6, 'wei...",0.966667,0.847737,"[{'metric': 'manhattan', 'n_neighbors': 1, 'we...","[0.9384541910331384, 0.9384541910331384, 0.938..."
3,KNeighborsClassifier(),4.0,"{'metric': 'euclidean', 'n_neighbors': 14, 'we...",0.973333,0.959959,"[{'metric': 'manhattan', 'n_neighbors': 1, 'we...","[0.9308426681677456, 0.9308426681677456, 0.942..."
4,KNeighborsClassifier(),5.0,"{'metric': 'euclidean', 'n_neighbors': 14, 'we...",0.973333,0.863714,"[{'metric': 'manhattan', 'n_neighbors': 1, 'we...","[0.9471420500151151, 0.9471420500151151, 0.960..."
5,KNeighborsClassifier(),6.0,"{'metric': 'euclidean', 'n_neighbors': 5, 'wei...",0.966667,0.894205,"[{'metric': 'manhattan', 'n_neighbors': 1, 'we...","[0.9403688419685189, 0.9403688419685189, 0.945..."
6,KNeighborsClassifier(),7.0,"{'metric': 'euclidean', 'n_neighbors': 2, 'wei...",0.966667,0.874659,"[{'metric': 'manhattan', 'n_neighbors': 1, 'we...","[0.9416315383514199, 0.9416315383514199, 0.955..."
7,KNeighborsClassifier(),8.0,"{'metric': 'euclidean', 'n_neighbors': 6, 'wei...",0.966667,0.853718,"[{'metric': 'manhattan', 'n_neighbors': 1, 'we...","[0.9409770067505967, 0.9409770067505967, 0.953..."
8,KNeighborsClassifier(),9.0,"{'metric': 'manhattan', 'n_neighbors': 18, 'we...",0.966667,0.820805,"[{'metric': 'manhattan', 'n_neighbors': 1, 'we...","[0.9388938790677919, 0.9388938790677919, 0.952..."
9,KNeighborsClassifier(),10.0,"{'metric': 'euclidean', 'n_neighbors': 6, 'wei...",0.96,0.798866,"[{'metric': 'manhattan', 'n_neighbors': 1, 'we...","[0.9305542939583861, 0.9305542939583861, 0.941..."


## Linear discriminant analysis 

In [10]:
lda = LinearDiscriminantAnalysis()

lda_grid = {"n_components": list(range(1,3,1)),
           "solver":['lsqr','eigen','svd']}

In [11]:
clasification(lda,lda_grid,iris_x_scaled,iris_y)

Best: 0.973333 using {'n_components': 1, 'solver': 'lsqr'} and Execution time is : 0.144614
Best: 0.980000 using {'n_components': 1, 'solver': 'lsqr'} and Execution time is : 0.061837
Best: 0.973333 using {'n_components': 1, 'solver': 'lsqr'} and Execution time is : 0.060832
Best: 0.980000 using {'n_components': 1, 'solver': 'lsqr'} and Execution time is : 0.061836
Best: 0.980000 using {'n_components': 1, 'solver': 'lsqr'} and Execution time is : 0.078791
Best: 0.980000 using {'n_components': 1, 'solver': 'lsqr'} and Execution time is : 0.056849
Best: 0.980000 using {'n_components': 1, 'solver': 'lsqr'} and Execution time is : 0.054856
Best: 0.980000 using {'n_components': 1, 'solver': 'lsqr'} and Execution time is : 0.047873
Best: 0.980000 using {'n_components': 1, 'solver': 'lsqr'} and Execution time is : 0.048870
Best: 0.980000 using {'n_components': 1, 'solver': 'lsqr'} and Execution time is : 0.054853


Unnamed: 0,Model,Random State,Best Parameters,Best Score,Execution Time,Parameters,f1 score
0,LinearDiscriminantAnalysis(),1.0,"{'n_components': 1, 'solver': 'lsqr'}",0.973333,0.144614,"[{'n_components': 1, 'solver': 'lsqr'}, {'n_co...","[0.9695119929800237, 0.9695119929800237, 0.969..."
1,LinearDiscriminantAnalysis(),2.0,"{'n_components': 1, 'solver': 'lsqr'}",0.98,0.061837,"[{'n_components': 1, 'solver': 'lsqr'}, {'n_co...","[0.9817150170091347, 0.9817150170091347, 0.981..."
2,LinearDiscriminantAnalysis(),3.0,"{'n_components': 1, 'solver': 'lsqr'}",0.973333,0.060832,"[{'n_components': 1, 'solver': 'lsqr'}, {'n_co...","[0.9719973954494078, 0.9719973954494078, 0.971..."
3,LinearDiscriminantAnalysis(),4.0,"{'n_components': 1, 'solver': 'lsqr'}",0.98,0.061836,"[{'n_components': 1, 'solver': 'lsqr'}, {'n_co...","[0.9726710239651417, 0.9726710239651417, 0.972..."
4,LinearDiscriminantAnalysis(),5.0,"{'n_components': 1, 'solver': 'lsqr'}",0.98,0.078791,"[{'n_components': 1, 'solver': 'lsqr'}, {'n_co...","[0.9808612440191388, 0.9808612440191388, 0.980..."
5,LinearDiscriminantAnalysis(),6.0,"{'n_components': 1, 'solver': 'lsqr'}",0.98,0.056849,"[{'n_components': 1, 'solver': 'lsqr'}, {'n_co...","[0.9786384289795247, 0.9786384289795247, 0.978..."
6,LinearDiscriminantAnalysis(),7.0,"{'n_components': 1, 'solver': 'lsqr'}",0.98,0.054856,"[{'n_components': 1, 'solver': 'lsqr'}, {'n_co...","[0.9805184914545528, 0.9805184914545528, 0.980..."
7,LinearDiscriminantAnalysis(),8.0,"{'n_components': 1, 'solver': 'lsqr'}",0.98,0.047873,"[{'n_components': 1, 'solver': 'lsqr'}, {'n_co...","[0.9820687695368002, 0.9820687695368002, 0.982..."
8,LinearDiscriminantAnalysis(),9.0,"{'n_components': 1, 'solver': 'lsqr'}",0.98,0.04887,"[{'n_components': 1, 'solver': 'lsqr'}, {'n_co...","[0.9804526079206386, 0.9804526079206386, 0.980..."
9,LinearDiscriminantAnalysis(),10.0,"{'n_components': 1, 'solver': 'lsqr'}",0.98,0.054853,"[{'n_components': 1, 'solver': 'lsqr'}, {'n_co...","[0.9800379572118703, 0.9800379572118703, 0.980..."


## Naïve Bayes classifier

In [12]:
gnb = GaussianNB()

gnb_grid={"var_smoothing":np.logspace(0,-9, num=100)}

In [13]:
clasification(gnb,gnb_grid,iris_x_scaled,iris_y)

Best: 0.953333 using {'var_smoothing': 0.04328761281083057} and Execution time is : 0.539559
Best: 0.953333 using {'var_smoothing': 0.0657933224657568} and Execution time is : 0.525587
Best: 0.946667 using {'var_smoothing': 0.02848035868435802} and Execution time is : 0.520609
Best: 0.953333 using {'var_smoothing': 0.2848035868435802} and Execution time is : 0.544552
Best: 0.953333 using {'var_smoothing': 0.2848035868435802} and Execution time is : 0.518613
Best: 0.953333 using {'var_smoothing': 0.23101297000831597} and Execution time is : 0.696138
Best: 0.953333 using {'var_smoothing': 0.15199110829529336} and Execution time is : 0.611368
Best: 0.953333 using {'var_smoothing': 0.1873817422860384} and Execution time is : 0.583441
Best: 0.953333 using {'var_smoothing': 0.1873817422860384} and Execution time is : 0.546540
Best: 0.946667 using {'var_smoothing': 0.1873817422860384} and Execution time is : 0.543549


Unnamed: 0,Model,Random State,Best Parameters,Best Score,Execution Time,Parameters,f1 score
0,GaussianNB(),1.0,{'var_smoothing': 0.04328761281083057},0.953333,0.539559,"[{'var_smoothing': 1.0}, {'var_smoothing': 0.8...","[0.8878386136280871, 0.8878386136280871, 0.901..."
1,GaussianNB(),2.0,{'var_smoothing': 0.0657933224657568},0.953333,0.525587,"[{'var_smoothing': 1.0}, {'var_smoothing': 0.8...","[0.9164969017910194, 0.9308645747861434, 0.930..."
2,GaussianNB(),3.0,{'var_smoothing': 0.02848035868435802},0.946667,0.520609,"[{'var_smoothing': 1.0}, {'var_smoothing': 0.8...","[0.9246210170081486, 0.9246210170081486, 0.917..."
3,GaussianNB(),4.0,{'var_smoothing': 0.2848035868435802},0.953333,0.544552,"[{'var_smoothing': 1.0}, {'var_smoothing': 0.8...","[0.8940059853001031, 0.8871326821326821, 0.912..."
4,GaussianNB(),5.0,{'var_smoothing': 0.2848035868435802},0.953333,0.518613,"[{'var_smoothing': 1.0}, {'var_smoothing': 0.8...","[0.9414967357166846, 0.9414967357166846, 0.941..."
5,GaussianNB(),6.0,{'var_smoothing': 0.23101297000831597},0.953333,0.696138,"[{'var_smoothing': 1.0}, {'var_smoothing': 0.8...","[0.9002068302068302, 0.9115728297467427, 0.917..."
6,GaussianNB(),7.0,{'var_smoothing': 0.15199110829529336},0.953333,0.611368,"[{'var_smoothing': 1.0}, {'var_smoothing': 0.8...","[0.9429218428281558, 0.9429218428281558, 0.942..."
7,GaussianNB(),8.0,{'var_smoothing': 0.1873817422860384},0.953333,0.583441,"[{'var_smoothing': 1.0}, {'var_smoothing': 0.8...","[0.9099783131522262, 0.9175479725949506, 0.931..."
8,GaussianNB(),9.0,{'var_smoothing': 0.1873817422860384},0.953333,0.54654,"[{'var_smoothing': 1.0}, {'var_smoothing': 0.8...","[0.9086023773666794, 0.9086023773666794, 0.915..."
9,GaussianNB(),10.0,{'var_smoothing': 0.1873817422860384},0.946667,0.543549,"[{'var_smoothing': 1.0}, {'var_smoothing': 0.8...","[0.904059434929, 0.904059434929, 0.91029201242..."


## Support vector machine

In [14]:
svc=SVC()

svc_grid={"C":[0.01, 0.1, 1, 10, 100, 1000],
          "kernel":["linear","poly","rbf","sigmoid"],
          "gamma":[1, 0.1, 0.01, 0.001, 0.0001]}


In [15]:
clasification(svc,svc_grid,iris_x_scaled,iris_y)

Best: 0.966667 using {'C': 10, 'gamma': 1, 'kernel': 'rbf'} and Execution time is : 0.756978
Best: 0.973333 using {'C': 1000, 'gamma': 1, 'kernel': 'linear'} and Execution time is : 0.740022
Best: 0.966667 using {'C': 1000, 'gamma': 0.01, 'kernel': 'rbf'} and Execution time is : 0.813825
Best: 0.980000 using {'C': 100, 'gamma': 0.1, 'kernel': 'sigmoid'} and Execution time is : 0.848731
Best: 0.980000 using {'C': 100, 'gamma': 1, 'kernel': 'poly'} and Execution time is : 0.751991
Best: 0.973333 using {'C': 10, 'gamma': 1, 'kernel': 'linear'} and Execution time is : 0.741019
Best: 0.966667 using {'C': 10, 'gamma': 1, 'kernel': 'linear'} and Execution time is : 0.708108
Best: 0.973333 using {'C': 100, 'gamma': 1, 'kernel': 'linear'} and Execution time is : 0.745008
Best: 0.980000 using {'C': 10, 'gamma': 0.1, 'kernel': 'rbf'} and Execution time is : 0.752987
Best: 0.973333 using {'C': 1000, 'gamma': 1, 'kernel': 'linear'} and Execution time is : 0.726059


Unnamed: 0,Model,Random State,Best Parameters,Best Score,Execution Time,Parameters,f1 score
0,SVC(),1.0,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}",0.966667,0.756978,"[{'C': 0.01, 'gamma': 1, 'kernel': 'linear'}, ...","[0.20566412566412567, 0.46701356864669563, 0.2..."
1,SVC(),2.0,"{'C': 1000, 'gamma': 1, 'kernel': 'linear'}",0.973333,0.740022,"[{'C': 0.01, 'gamma': 1, 'kernel': 'linear'}, ...","[0.19587719298245615, 0.47281893787978974, 0.2..."
2,SVC(),3.0,"{'C': 1000, 'gamma': 0.01, 'kernel': 'rbf'}",0.966667,0.813825,"[{'C': 0.01, 'gamma': 1, 'kernel': 'linear'}, ...","[0.3015117436170068, 0.45374375615754925, 0.30..."
3,SVC(),4.0,"{'C': 100, 'gamma': 0.1, 'kernel': 'sigmoid'}",0.98,0.848731,"[{'C': 0.01, 'gamma': 1, 'kernel': 'linear'}, ...","[0.280596987965409, 0.44594248979822737, 0.283..."
4,SVC(),5.0,"{'C': 100, 'gamma': 1, 'kernel': 'poly'}",0.98,0.751991,"[{'C': 0.01, 'gamma': 1, 'kernel': 'linear'}, ...","[0.21230871991512723, 0.4132793610212965, 0.21..."
5,SVC(),6.0,"{'C': 10, 'gamma': 1, 'kernel': 'linear'}",0.973333,0.741019,"[{'C': 0.01, 'gamma': 1, 'kernel': 'linear'}, ...","[0.13705608442450548, 0.4016559297433643, 0.13..."
6,SVC(),7.0,"{'C': 10, 'gamma': 1, 'kernel': 'linear'}",0.966667,0.708108,"[{'C': 0.01, 'gamma': 1, 'kernel': 'linear'}, ...","[0.2909052111431974, 0.4075091236334121, 0.297..."
7,SVC(),8.0,"{'C': 100, 'gamma': 1, 'kernel': 'linear'}",0.973333,0.745008,"[{'C': 0.01, 'gamma': 1, 'kernel': 'linear'}, ...","[0.21565257565257565, 0.40752525309800847, 0.2..."
8,SVC(),9.0,"{'C': 10, 'gamma': 0.1, 'kernel': 'rbf'}",0.98,0.752987,"[{'C': 0.01, 'gamma': 1, 'kernel': 'linear'}, ...","[0.14304993252361672, 0.42181859583762105, 0.1..."
9,SVC(),10.0,"{'C': 1000, 'gamma': 1, 'kernel': 'linear'}",0.973333,0.726059,"[{'C': 0.01, 'gamma': 1, 'kernel': 'linear'}, ...","[0.21610241820768133, 0.44673289113386544, 0.2..."


## Logistic regression

In [16]:
lr=LogisticRegression()

lr_grid = {"solver": ['newton-cg', 'lbfgs', 'liblinear'],
           "penalty": ['l2'],
           "C":[100, 10, 1.0, 0.1, 0.01]}

In [17]:
clasification(lr,lr_grid,iris_x_scaled,iris_y)

Best: 0.966667 using {'C': 100, 'penalty': 'l2', 'solver': 'newton-cg'} and Execution time is : 0.321140
Best: 0.953333 using {'C': 100, 'penalty': 'l2', 'solver': 'liblinear'} and Execution time is : 0.259307
Best: 0.966667 using {'C': 100, 'penalty': 'l2', 'solver': 'newton-cg'} and Execution time is : 0.296206
Best: 0.966667 using {'C': 100, 'penalty': 'l2', 'solver': 'newton-cg'} and Execution time is : 0.318150
Best: 0.973333 using {'C': 100, 'penalty': 'l2', 'solver': 'newton-cg'} and Execution time is : 0.279253
Best: 0.966667 using {'C': 100, 'penalty': 'l2', 'solver': 'newton-cg'} and Execution time is : 0.275264
Best: 0.966667 using {'C': 100, 'penalty': 'l2', 'solver': 'newton-cg'} and Execution time is : 0.291222
Best: 0.966667 using {'C': 100, 'penalty': 'l2', 'solver': 'newton-cg'} and Execution time is : 0.289228
Best: 0.980000 using {'C': 100, 'penalty': 'l2', 'solver': 'newton-cg'} and Execution time is : 0.265289
Best: 0.973333 using {'C': 100, 'penalty': 'l2', 'solve

Unnamed: 0,Model,Random State,Best Parameters,Best Score,Execution Time,Parameters,f1 score
0,LogisticRegression(),1.0,"{'C': 100, 'penalty': 'l2', 'solver': 'newton-...",0.966667,0.32114,"[{'C': 100, 'penalty': 'l2', 'solver': 'newton...","[0.9617171211851518, 0.9617171211851518, 0.934..."
1,LogisticRegression(),2.0,"{'C': 100, 'penalty': 'l2', 'solver': 'libline...",0.953333,0.259307,"[{'C': 100, 'penalty': 'l2', 'solver': 'newton...","[0.9471491337018326, 0.9471491337018326, 0.954..."
2,LogisticRegression(),3.0,"{'C': 100, 'penalty': 'l2', 'solver': 'newton-...",0.966667,0.296206,"[{'C': 100, 'penalty': 'l2', 'solver': 'newton...","[0.9646129211918686, 0.9646129211918686, 0.939..."
3,LogisticRegression(),4.0,"{'C': 100, 'penalty': 'l2', 'solver': 'newton-...",0.966667,0.31815,"[{'C': 100, 'penalty': 'l2', 'solver': 'newton...","[0.9610548623489802, 0.9610548623489802, 0.948..."
4,LogisticRegression(),5.0,"{'C': 100, 'penalty': 'l2', 'solver': 'newton-...",0.973333,0.279253,"[{'C': 100, 'penalty': 'l2', 'solver': 'newton...","[0.9744539213646763, 0.9744539213646763, 0.954..."
5,LogisticRegression(),6.0,"{'C': 100, 'penalty': 'l2', 'solver': 'newton-...",0.966667,0.275264,"[{'C': 100, 'penalty': 'l2', 'solver': 'newton...","[0.965911156252252, 0.965911156252252, 0.94539..."
6,LogisticRegression(),7.0,"{'C': 100, 'penalty': 'l2', 'solver': 'newton-...",0.966667,0.291222,"[{'C': 100, 'penalty': 'l2', 'solver': 'newton...","[0.9689242885560022, 0.9689242885560022, 0.955..."
7,LogisticRegression(),8.0,"{'C': 100, 'penalty': 'l2', 'solver': 'newton-...",0.966667,0.289228,"[{'C': 100, 'penalty': 'l2', 'solver': 'newton...","[0.9678018278018279, 0.9678018278018279, 0.946..."
8,LogisticRegression(),9.0,"{'C': 100, 'penalty': 'l2', 'solver': 'newton-...",0.98,0.265289,"[{'C': 100, 'penalty': 'l2', 'solver': 'newton...","[0.9804526079206386, 0.9804526079206386, 0.948..."
9,LogisticRegression(),10.0,"{'C': 100, 'penalty': 'l2', 'solver': 'newton-...",0.973333,0.288226,"[{'C': 100, 'penalty': 'l2', 'solver': 'newton...","[0.9734497219177525, 0.9734497219177525, 0.958..."


## Random forests

In [18]:
rfc=RandomForestClassifier()

rfc_grid={'criterion' :['gini', 'entropy'],
         'max_depth' : [4,6,8],
         'n_estimators': [100,200,300,400,500],
         "n_jobs":[-1]}

In [19]:
clasification(rfc,rfc_grid,iris_x_scaled,iris_y)

Best: 0.946667 using {'criterion': 'gini', 'max_depth': 6, 'n_estimators': 100, 'n_jobs': -1} and Execution time is : 34.547620
Best: 0.960000 using {'criterion': 'gini', 'max_depth': 4, 'n_estimators': 100, 'n_jobs': -1} and Execution time is : 35.861114
Best: 0.960000 using {'criterion': 'gini', 'max_depth': 4, 'n_estimators': 100, 'n_jobs': -1} and Execution time is : 35.995754
Best: 0.966667 using {'criterion': 'gini', 'max_depth': 4, 'n_estimators': 200, 'n_jobs': -1} and Execution time is : 36.353796
Best: 0.966667 using {'criterion': 'gini', 'max_depth': 6, 'n_estimators': 100, 'n_jobs': -1} and Execution time is : 37.620411
Best: 0.946667 using {'criterion': 'entropy', 'max_depth': 4, 'n_estimators': 100, 'n_jobs': -1} and Execution time is : 37.846804
Best: 0.960000 using {'criterion': 'gini', 'max_depth': 4, 'n_estimators': 100, 'n_jobs': -1} and Execution time is : 37.994412
Best: 0.960000 using {'criterion': 'gini', 'max_depth': 4, 'n_estimators': 100, 'n_jobs': -1} and Exe

Unnamed: 0,Model,Random State,Best Parameters,Best Score,Execution Time,Parameters,f1 score
0,RandomForestClassifier(),1.0,"{'criterion': 'gini', 'max_depth': 6, 'n_estim...",0.946667,34.54762,"[{'criterion': 'gini', 'max_depth': 4, 'n_esti...","[0.9321886753218733, 0.9321886753218733, 0.932..."
1,RandomForestClassifier(),2.0,"{'criterion': 'gini', 'max_depth': 4, 'n_estim...",0.96,35.861114,"[{'criterion': 'gini', 'max_depth': 4, 'n_esti...","[0.9596721081855915, 0.9596721081855915, 0.959..."
2,RandomForestClassifier(),3.0,"{'criterion': 'gini', 'max_depth': 4, 'n_estim...",0.96,35.995754,"[{'criterion': 'gini', 'max_depth': 4, 'n_esti...","[0.9579629629629629, 0.9579629629629629, 0.957..."
3,RandomForestClassifier(),4.0,"{'criterion': 'gini', 'max_depth': 4, 'n_estim...",0.966667,36.353796,"[{'criterion': 'gini', 'max_depth': 4, 'n_esti...","[0.9585160457267067, 0.9642911884583711, 0.964..."
4,RandomForestClassifier(),5.0,"{'criterion': 'gini', 'max_depth': 6, 'n_estim...",0.966667,37.620411,"[{'criterion': 'gini', 'max_depth': 4, 'n_esti...","[0.95418803046263, 0.95418803046263, 0.9541880..."
5,RandomForestClassifier(),6.0,"{'criterion': 'entropy', 'max_depth': 4, 'n_es...",0.946667,37.846804,"[{'criterion': 'gini', 'max_depth': 4, 'n_esti...","[0.9404566107977065, 0.9404566107977065, 0.940..."
6,RandomForestClassifier(),7.0,"{'criterion': 'gini', 'max_depth': 4, 'n_estim...",0.96,37.994412,"[{'criterion': 'gini', 'max_depth': 4, 'n_esti...","[0.963083269671505, 0.963083269671505, 0.96308..."
7,RandomForestClassifier(),8.0,"{'criterion': 'gini', 'max_depth': 4, 'n_estim...",0.96,36.133392,"[{'criterion': 'gini', 'max_depth': 4, 'n_esti...","[0.9600822345719372, 0.9600822345719372, 0.953..."
8,RandomForestClassifier(),9.0,"{'criterion': 'gini', 'max_depth': 4, 'n_estim...",0.96,36.544287,"[{'criterion': 'gini', 'max_depth': 4, 'n_esti...","[0.9611616161616162, 0.9611616161616162, 0.961..."
9,RandomForestClassifier(),10.0,"{'criterion': 'gini', 'max_depth': 4, 'n_estim...",0.96,37.48877,"[{'criterion': 'gini', 'max_depth': 4, 'n_esti...","[0.9435766654487882, 0.9506877765598993, 0.957..."


In [20]:
# 'max_features': ['auto', 'sqrt', 'log2'],

## Ada boost

In [21]:
abc=AdaBoostClassifier()

abc_grid = {"n_estimators":[10, 50, 100, 500, 1000],
           "learning_rate":np.arange(0.1, 2.1, 0.4)}

In [22]:
clasification(abc,abc_grid,iris_x_scaled,iris_y)

Best: 0.953333 using {'learning_rate': 1.7000000000000002, 'n_estimators': 500} and Execution time is : 112.224212
Best: 0.946667 using {'learning_rate': 1.3000000000000003, 'n_estimators': 50} and Execution time is : 115.437111
Best: 0.960000 using {'learning_rate': 1.7000000000000002, 'n_estimators': 100} and Execution time is : 104.575388
Best: 0.953333 using {'learning_rate': 1.3000000000000003, 'n_estimators': 10} and Execution time is : 108.153819
Best: 0.946667 using {'learning_rate': 1.3000000000000003, 'n_estimators': 50} and Execution time is : 111.700338
Best: 0.946667 using {'learning_rate': 1.3000000000000003, 'n_estimators': 100} and Execution time is : 108.509868
Best: 0.966667 using {'learning_rate': 1.3000000000000003, 'n_estimators': 100} and Execution time is : 108.052093
Best: 0.946667 using {'learning_rate': 1.7000000000000002, 'n_estimators': 5000} and Execution time is : 116.844581
Best: 0.960000 using {'learning_rate': 1.3000000000000003, 'n_estimators': 10} and

Unnamed: 0,Model,Random State,Best Parameters,Best Score,Execution Time,Parameters,f1 score
0,AdaBoostClassifier(),1.0,"{'learning_rate': 1.7000000000000002, 'n_estim...",0.953333,112.224212,"[{'learning_rate': 0.1, 'n_estimators': 10}, {...","[0.9198346168893835, 0.9198346168893835, 0.919..."
1,AdaBoostClassifier(),2.0,"{'learning_rate': 1.3000000000000003, 'n_estim...",0.946667,115.437111,"[{'learning_rate': 0.1, 'n_estimators': 10}, {...","[0.9185444802426901, 0.9185444802426901, 0.918..."
2,AdaBoostClassifier(),3.0,"{'learning_rate': 1.7000000000000002, 'n_estim...",0.96,104.575388,"[{'learning_rate': 0.1, 'n_estimators': 10}, {...","[0.9098607747809077, 0.9098607747809077, 0.909..."
3,AdaBoostClassifier(),4.0,"{'learning_rate': 1.3000000000000003, 'n_estim...",0.953333,108.153819,"[{'learning_rate': 0.1, 'n_estimators': 10}, {...","[0.9314194890077243, 0.9314194890077243, 0.931..."
4,AdaBoostClassifier(),5.0,"{'learning_rate': 1.3000000000000003, 'n_estim...",0.946667,111.700338,"[{'learning_rate': 0.1, 'n_estimators': 10}, {...","[0.9194496542578385, 0.9194496542578385, 0.919..."
5,AdaBoostClassifier(),6.0,"{'learning_rate': 1.3000000000000003, 'n_estim...",0.946667,108.509868,"[{'learning_rate': 0.1, 'n_estimators': 10}, {...","[0.9193603541132145, 0.9193603541132145, 0.919..."
6,AdaBoostClassifier(),7.0,"{'learning_rate': 1.3000000000000003, 'n_estim...",0.966667,108.052093,"[{'learning_rate': 0.1, 'n_estimators': 10}, {...","[0.9344828798079572, 0.9344828798079572, 0.934..."
7,AdaBoostClassifier(),8.0,"{'learning_rate': 1.7000000000000002, 'n_estim...",0.946667,116.844581,"[{'learning_rate': 0.1, 'n_estimators': 10}, {...","[0.9247032344870547, 0.9247032344870547, 0.924..."
8,AdaBoostClassifier(),9.0,"{'learning_rate': 1.3000000000000003, 'n_estim...",0.96,110.746887,"[{'learning_rate': 0.1, 'n_estimators': 10}, {...","[0.9283281630650052, 0.9283281630650052, 0.928..."
9,AdaBoostClassifier(),10.0,"{'learning_rate': 1.3000000000000003, 'n_estim...",0.953333,108.488925,"[{'learning_rate': 0.1, 'n_estimators': 10}, {...","[0.9226830406082884, 0.9226830406082884, 0.922..."


## Gradiant boost

In [None]:
gbc = GradientBoostingClassifier()

gbc_grid = {"n_estimators":[10, 50, 100, 500, 1000],
            "max_depth":[1,3,5,7,9],
            "learning_rate":np.arange(0.1, 2.1, 0.4)}

In [None]:
clasification(gbc,gbc_grid,iris_x_scaled,iris_y)

## XGBoost

In [None]:
# def clasification(model,parameters,x,y):
#     for i in range(1,10):
#         cv_inner = KFold(n_splits=5, shuffle=True, random_state=i)
#         score = ['accuracy' ,'f1_macro']

#         grid_search = GridSearchCV(model, parameters, scoring=score, refit="accuracy", error_score=0)
#         grid_result = grid_search.fit(x,y)
#         print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

#         params = grid_result.cv_results_['params']
#         f1_score = grid_result.cv_results_['mean_test_f1_macro']

#     knc_df=pd.DataFrame({"Parameters":[],
#                          "f1_score":[]})

#     for x,y in zip(params,f1_score):
#         knc_df=knc_df.append({"Parameters":x,
#                               "f1_score":y},ignore_index = True)
        
#     return knc_df
    

In [None]:
KNC = KNeighborsClassifier(n_jobs=-1)

n_neighbors = range(1, 21)
weights = ['uniform', 'distance']
metric = ['euclidean', 'manhattan', 'minkowski']
KNC_grid = dict(n_neighbors=n_neighbors,weights=weights,metric=metric)



In [None]:
def clasification(model,parameters,x,y):
    cv_inner = KFold(n_splits=5, shuffle=True, random_state=1)
    score = ['accuracy' ,'f1_macro']

    grid_search = GridSearchCV(model, parameters, scoring=score, refit="accuracy", error_score=0)
    grid_result = grid_search.fit(x,y)
    print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

    params = grid_result.cv_results_['params']
    f1_score = grid_result.cv_results_['mean_test_f1_macro']

    knc_df=pd.DataFrame({"Parameters":[],
                         "f1_score":[]})

    for x,y in zip(params,f1_score):
        knc_df=knc_df.append({"Parameters":x,
                              "f1_score":y},ignore_index = True)
        
    return knc_df

In [None]:
clasification(LDA,LDA_grid,iris_x_scaled,iris_y)

In [None]:
model=AdaBoostClassifier()

parameters= {"n_estimators":[10, 50, 100, 500, 1000, 5000],
             "learning_rate":np.arange(0.1, 2.1, 0.4)}

cv_inner = KFold(n_splits=5, shuffle=True, random_state=1)
score = ['accuracy' ,'f1_macro']

grid_search = GridSearchCV(model, parameters, scoring=score, refit="accuracy", error_score=0)
grid_result = grid_search.fit(iris_x_scaled,iris_y)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

params = grid_result.cv_results_['params']
f1_score = grid_result.cv_results_['mean_test_f1_macro']

knc_df=pd.DataFrame({"Parameters":[],
                         "f1_score":[]})

for x,y in zip(params,f1_score):
    knc_df=knc_df.append({"Parameters":x,
                          "f1_score":y},ignore_index = True)

knc_df


In [None]:
wine = openml.datasets.get_dataset("wine")
wine_df, wine_label, categorical_indicator, attribute_names = wine.get_data(
    target= wine.default_target_attribute, dataset_format="dataframe"
)
wine_df["class"]=wine_label
wine_x=wine_df.iloc[:,:13]
wine_df