In [2]:
import pandas as pd
import numpy as np
import openml
from sklearn.preprocessing import MinMaxScaler,LabelEncoder 
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier,AdaBoostClassifier,GradientBoostingClassifier
from sklearn.model_selection import KFold,GridSearchCV
from sklearn.metrics import f1_score, make_scorer
import time
from xgboost import XGBClassifier

# Classification for Iris Dataset

In [8]:
glass = openml.datasets.get_dataset("glass")
glass_df, glass_label, categorical_indicator, attribute_names = glass.get_data(
    target=glass.default_target_attribute, dataset_format="dataframe"
)
glass_df["class"]=glass_label
glass_x=glass_df.iloc[:,:9]
glass_df

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,class
0,1.51793,12.79,3.50,1.12,73.03,0.64,8.77,0.0,0.00,build wind float
1,1.51643,12.16,3.52,1.35,72.89,0.57,8.53,0.0,0.00,vehic wind float
2,1.51793,13.21,3.48,1.41,72.64,0.59,8.43,0.0,0.00,build wind float
3,1.51299,14.40,1.74,1.54,74.55,0.00,7.59,0.0,0.00,tableware
4,1.53393,12.30,0.00,1.00,70.16,0.12,16.19,0.0,0.24,build wind non-float
...,...,...,...,...,...,...,...,...,...,...
209,1.51610,13.42,3.40,1.22,72.69,0.59,8.32,0.0,0.00,vehic wind float
210,1.51592,12.86,3.52,2.12,72.66,0.69,7.97,0.0,0.00,build wind non-float
211,1.51613,13.92,3.52,1.25,72.88,0.37,7.94,0.0,0.14,build wind non-float
212,1.51689,12.67,2.88,1.71,73.21,0.73,8.54,0.0,0.00,build wind non-float


In [10]:
glass_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 214 entries, 0 to 213
Data columns (total 10 columns):
 #   Column  Non-Null Count  Dtype   
---  ------  --------------  -----   
 0   RI      214 non-null    float64 
 1   Na      214 non-null    float64 
 2   Mg      214 non-null    float64 
 3   Al      214 non-null    float64 
 4   Si      214 non-null    float64 
 5   K       214 non-null    float64 
 6   Ca      214 non-null    float64 
 7   Ba      214 non-null    float64 
 8   Fe      214 non-null    float64 
 9   class   214 non-null    category
dtypes: category(1), float64(9)
memory usage: 15.7 KB


In [11]:
glass_df["class"].value_counts()

build wind non-float    76
build wind float        70
headlamps               29
vehic wind float        17
containers              13
tableware                9
vehic wind non-float     0
Name: class, dtype: int64

In [12]:
le=LabelEncoder()

glass_y=le.fit_transform(glass_label)

In [13]:
scaler = MinMaxScaler()

glass_x_scaled=scaler.fit_transform(glass_x)

In [33]:
def clasification(model,parameters,x,y):
    
    classification_df=pd.DataFrame({"Model":[],
                                    "Random State":[],
                                    "Best Parameters":[],
                                    "f1_Score":[],
                                    "Execution Time":[]})
    
    for i in range(1,11):
        start_time = time.time()
        cv_inner = KFold(n_splits=5, shuffle=True, random_state=i)
#         score = ['accuracy' ,'f1_macro']
        grid_search = GridSearchCV(model, parameters, cv=cv_inner, scoring='accuracy', n_jobs = -1)
        grid_result = grid_search.fit(x,y)
        y_predict = grid_result.predict(x)
        f1score = f1_score(y,y_predict, average="weighted")
        execution_time = time.time() - start_time
        classification_df=classification_df.append({"Model":model,
                                                    "Random State":i,
                                                    "Best Parameters":grid_result.best_params_,
                                                    "f1_Score":f1score,
                                                    "Execution Time":execution_time},ignore_index=True)
         
       
    if model == knc:
        classification_df.to_csv("Glass_KNC.csv")
    elif model == lda:
        classification_df.to_csv("Glass_LDA.csv")
    elif model == gnb:
        classification_df.to_csv("Glass_GNB.csv")
    elif model == svc:
        classification_df.to_csv("Glass_SVC.csv")
    elif model == lr:
        classification_df.to_csv("Glass_LR.csv")
    elif model == rfc:
        classification_df.to_csv("Glass_RFC.csv")
    elif model== abc:
        classification_df.to_csv("Glass_ABC.csv")
    elif model== gbc:
        classification_df.to_csv("Glass_GBC.csv")
    elif model == xgbc:
        classification_df.to_csv("Glass_XGBC.csv")


    return classification_df
    

## K-nearest neighbour classification

In [34]:
knc = KNeighborsClassifier()

knc_grid = {"n_neighbors":range(1, 21),
           "weights":['uniform', 'distance'],
           "metric":['manhattan','euclidean', 'minkowski']}

In [35]:
clasification(knc,knc_grid,glass_x_scaled,glass_y)

Unnamed: 0,Model,Random State,Best Parameters,f1_Score,Execution Time
0,KNeighborsClassifier(),1.0,"{'metric': 'manhattan', 'n_neighbors': 5, 'wei...",1.0,0.775585
1,KNeighborsClassifier(),2.0,"{'metric': 'manhattan', 'n_neighbors': 5, 'wei...",1.0,0.839038
2,KNeighborsClassifier(),3.0,"{'metric': 'manhattan', 'n_neighbors': 12, 'we...",1.0,0.745284
3,KNeighborsClassifier(),4.0,"{'metric': 'manhattan', 'n_neighbors': 1, 'wei...",1.0,0.837892
4,KNeighborsClassifier(),5.0,"{'metric': 'manhattan', 'n_neighbors': 7, 'wei...",1.0,0.733089
5,KNeighborsClassifier(),6.0,"{'metric': 'manhattan', 'n_neighbors': 4, 'wei...",1.0,0.806971
6,KNeighborsClassifier(),7.0,"{'metric': 'manhattan', 'n_neighbors': 13, 'we...",1.0,0.749825
7,KNeighborsClassifier(),8.0,"{'metric': 'manhattan', 'n_neighbors': 8, 'wei...",1.0,0.739252
8,KNeighborsClassifier(),9.0,"{'metric': 'manhattan', 'n_neighbors': 12, 'we...",1.0,0.73057
9,KNeighborsClassifier(),10.0,"{'metric': 'manhattan', 'n_neighbors': 3, 'wei...",1.0,0.768966


## Linear discriminant analysis 

In [36]:
lda = LinearDiscriminantAnalysis()

lda_grid = {"n_components": range(1,6,1),
           "solver":['lsqr','eigen','svd']}

In [37]:
clasification(lda,lda_grid,glass_x_scaled,glass_y)

Unnamed: 0,Model,Random State,Best Parameters,f1_Score,Execution Time
0,LinearDiscriminantAnalysis(),1.0,"{'n_components': 1, 'solver': 'lsqr'}",0.651844,0.104621
1,LinearDiscriminantAnalysis(),2.0,"{'n_components': 1, 'solver': 'lsqr'}",0.651844,0.105991
2,LinearDiscriminantAnalysis(),3.0,"{'n_components': 1, 'solver': 'lsqr'}",0.651844,0.086839
3,LinearDiscriminantAnalysis(),4.0,"{'n_components': 1, 'solver': 'lsqr'}",0.651844,0.093729
4,LinearDiscriminantAnalysis(),5.0,"{'n_components': 1, 'solver': 'lsqr'}",0.651844,0.078115
5,LinearDiscriminantAnalysis(),6.0,"{'n_components': 1, 'solver': 'lsqr'}",0.651844,0.093727
6,LinearDiscriminantAnalysis(),7.0,"{'n_components': 1, 'solver': 'lsqr'}",0.651844,0.109348
7,LinearDiscriminantAnalysis(),8.0,"{'n_components': 1, 'solver': 'svd'}",0.651844,0.093728
8,LinearDiscriminantAnalysis(),9.0,"{'n_components': 1, 'solver': 'svd'}",0.651844,0.10935
9,LinearDiscriminantAnalysis(),10.0,"{'n_components': 1, 'solver': 'lsqr'}",0.651844,0.093729


## Naïve Bayes classifier

In [38]:
gnb = GaussianNB()

gnb_grid={"var_smoothing":np.logspace(0,-9, num=100)}

In [39]:
clasification(gnb,gnb_grid,glass_x_scaled,glass_y)

Unnamed: 0,Model,Random State,Best Parameters,f1_Score,Execution Time
0,GaussianNB(),1.0,{'var_smoothing': 0.04328761281083057},0.471278,0.478367
1,GaussianNB(),2.0,{'var_smoothing': 0.02310129700083159},0.481624,0.421779
2,GaussianNB(),3.0,{'var_smoothing': 0.01519911082952933},0.506794,0.454656
3,GaussianNB(),4.0,{'var_smoothing': 0.01873817422860384},0.479876,0.399241
4,GaussianNB(),5.0,{'var_smoothing': 0.0657933224657568},0.440909,0.498601
5,GaussianNB(),6.0,{'var_smoothing': 0.01873817422860384},0.479876,0.419019
6,GaussianNB(),7.0,{'var_smoothing': 0.03511191734215131},0.472146,0.406153
7,GaussianNB(),8.0,{'var_smoothing': 0.1873817422860384},0.425602,0.453018
8,GaussianNB(),9.0,{'var_smoothing': 0.0657933224657568},0.440909,0.406155
9,GaussianNB(),10.0,{'var_smoothing': 0.02310129700083159},0.481624,0.467813


## Support vector machine

In [40]:
svc=SVC()

svc_grid={"C":[0.01, 0.1, 1, 10, 100, 1000],
          "kernel":["linear","poly","rbf","sigmoid"],
          "gamma":[1, 0.1, 0.01, 0.001, 0.0001]}


In [41]:
clasification(svc,svc_grid,glass_x_scaled,glass_y)

Unnamed: 0,Model,Random State,Best Parameters,f1_Score,Execution Time
0,SVC(),1.0,"{'C': 1000, 'gamma': 1, 'kernel': 'poly'}",0.934797,1.070147
1,SVC(),2.0,"{'C': 1000, 'gamma': 1, 'kernel': 'poly'}",0.934797,1.235699
2,SVC(),3.0,"{'C': 1000, 'gamma': 1, 'kernel': 'poly'}",0.934797,1.475054
3,SVC(),4.0,"{'C': 1000, 'gamma': 1, 'kernel': 'rbf'}",0.930129,1.301525
4,SVC(),5.0,"{'C': 1000, 'gamma': 1, 'kernel': 'rbf'}",0.930129,1.107042
5,SVC(),6.0,"{'C': 100, 'gamma': 1, 'kernel': 'rbf'}",0.857794,1.122
6,SVC(),7.0,"{'C': 100, 'gamma': 1, 'kernel': 'poly'}",0.873395,1.53091
7,SVC(),8.0,"{'C': 1000, 'gamma': 1, 'kernel': 'rbf'}",0.930129,1.446138
8,SVC(),9.0,"{'C': 1000, 'gamma': 1, 'kernel': 'rbf'}",0.930129,1.079115
9,SVC(),10.0,"{'C': 100, 'gamma': 1, 'kernel': 'poly'}",0.873395,1.075126


## Logistic regression

In [64]:
lr=LogisticRegression()

lr_grid = {"solver": ['newton-cg', 'liblinear','lbfgs','sag','saga'],
           "C":[1250,1000,750,500,250,100]}

In [65]:
clasification(lr,lr_grid,glass_x_scaled,glass_y)



Unnamed: 0,Model,Random State,Best Parameters,f1_Score,Execution Time
0,LogisticRegression(),1.0,"{'C': 500, 'solver': 'newton-cg'}",0.677329,1.495004
1,LogisticRegression(),2.0,"{'C': 1000, 'solver': 'sag'}",0.668833,1.325456
2,LogisticRegression(),3.0,"{'C': 750, 'solver': 'liblinear'}",0.671015,1.362359
3,LogisticRegression(),4.0,"{'C': 1250, 'solver': 'newton-cg'}",0.695762,1.341416
4,LogisticRegression(),5.0,"{'C': 250, 'solver': 'sag'}",0.678368,1.308501
5,LogisticRegression(),6.0,"{'C': 1000, 'solver': 'liblinear'}",0.674963,1.307505
6,LogisticRegression(),7.0,"{'C': 1000, 'solver': 'liblinear'}",0.674963,1.321467
7,LogisticRegression(),8.0,"{'C': 1250, 'solver': 'sag'}",0.668833,1.326456
8,LogisticRegression(),9.0,"{'C': 1250, 'solver': 'liblinear'}",0.670652,1.311497
9,LogisticRegression(),10.0,"{'C': 500, 'solver': 'newton-cg'}",0.677329,1.35438


## Random forests

In [68]:
rfc=RandomForestClassifier()

rfc_grid={'criterion' :['gini', 'entropy'],
         'max_depth' : [4,6,8],
         'n_estimators': [100,200,300,400,500],
         "n_jobs":[-1]}

In [69]:
clasification(rfc,rfc_grid,glass_x_scaled,glass_y)

Unnamed: 0,Model,Random State,Best Parameters,f1_Score,Execution Time
0,RandomForestClassifier(),1.0,"{'criterion': 'entropy', 'max_depth': 8, 'n_es...",0.995273,51.839448
1,RandomForestClassifier(),2.0,"{'criterion': 'gini', 'max_depth': 8, 'n_estim...",0.990608,42.764313
2,RandomForestClassifier(),3.0,"{'criterion': 'gini', 'max_depth': 8, 'n_estim...",0.981219,43.045947
3,RandomForestClassifier(),4.0,"{'criterion': 'gini', 'max_depth': 8, 'n_estim...",0.986001,41.7719
4,RandomForestClassifier(),5.0,"{'criterion': 'gini', 'max_depth': 8, 'n_estim...",0.981219,40.740113
5,RandomForestClassifier(),6.0,"{'criterion': 'entropy', 'max_depth': 8, 'n_es...",0.995273,41.408324
6,RandomForestClassifier(),7.0,"{'criterion': 'entropy', 'max_depth': 8, 'n_es...",0.995273,39.806015
7,RandomForestClassifier(),8.0,"{'criterion': 'gini', 'max_depth': 8, 'n_estim...",0.985795,39.412646
8,RandomForestClassifier(),9.0,"{'criterion': 'entropy', 'max_depth': 8, 'n_es...",0.995273,40.129544
9,RandomForestClassifier(),10.0,"{'criterion': 'entropy', 'max_depth': 8, 'n_es...",0.995273,40.628411


## Ada boost

In [70]:
abc=AdaBoostClassifier()

abc_grid = {"n_estimators":[10, 50, 100, 500, 1000],
           "learning_rate":np.arange(0.1, 2.1, 0.4)}

In [71]:
clasification(abc,abc_grid,glass_x_scaled,glass_y)

Unnamed: 0,Model,Random State,Best Parameters,f1_Score,Execution Time
0,AdaBoostClassifier(),1.0,"{'learning_rate': 0.5, 'n_estimators': 500}",0.687738,30.990974
1,AdaBoostClassifier(),2.0,"{'learning_rate': 0.5, 'n_estimators': 500}",0.687738,28.464216
2,AdaBoostClassifier(),3.0,"{'learning_rate': 0.5, 'n_estimators': 500}",0.687738,29.91178
3,AdaBoostClassifier(),4.0,"{'learning_rate': 0.1, 'n_estimators': 1000}",0.561609,32.222877
4,AdaBoostClassifier(),5.0,"{'learning_rate': 0.5, 'n_estimators': 1000}",0.699438,31.256459
5,AdaBoostClassifier(),6.0,"{'learning_rate': 0.5, 'n_estimators': 500}",0.687738,30.2681
6,AdaBoostClassifier(),7.0,"{'learning_rate': 0.5, 'n_estimators': 500}",0.676307,31.582589
7,AdaBoostClassifier(),8.0,"{'learning_rate': 0.5, 'n_estimators': 500}",0.687738,31.664514
8,AdaBoostClassifier(),9.0,"{'learning_rate': 0.5, 'n_estimators': 1000}",0.689312,31.783053
9,AdaBoostClassifier(),10.0,"{'learning_rate': 0.5, 'n_estimators': 100}",0.679428,28.825053


## Gradiant boost

In [72]:
gbc = GradientBoostingClassifier()

gbc_grid = {"n_estimators":[10, 50, 100, 500, 1000],
            "max_depth":[1,3,5,7,9],
            "learning_rate":np.arange(0.1, 2.1, 0.4)}

In [73]:
clasification(gbc,gbc_grid,glass_x_scaled,glass_y)

Unnamed: 0,Model,Random State,Best Parameters,f1_Score,Execution Time
0,GradientBoostingClassifier(),1.0,"{'learning_rate': 0.5, 'max_depth': 5, 'n_esti...",1.0,825.895982
1,GradientBoostingClassifier(),2.0,"{'learning_rate': 0.5, 'max_depth': 5, 'n_esti...",1.0,817.118283
2,GradientBoostingClassifier(),3.0,"{'learning_rate': 0.9, 'max_depth': 5, 'n_esti...",1.0,814.103609
3,GradientBoostingClassifier(),4.0,"{'learning_rate': 0.1, 'max_depth': 5, 'n_esti...",1.0,819.987479
4,GradientBoostingClassifier(),5.0,"{'learning_rate': 0.5, 'max_depth': 3, 'n_esti...",1.0,810.76594
5,GradientBoostingClassifier(),6.0,"{'learning_rate': 0.5, 'max_depth': 5, 'n_esti...",1.0,801.706451
6,GradientBoostingClassifier(),7.0,"{'learning_rate': 1.7000000000000002, 'max_dep...",1.0,801.23225
7,GradientBoostingClassifier(),8.0,"{'learning_rate': 0.5, 'max_depth': 5, 'n_esti...",1.0,803.975569
8,GradientBoostingClassifier(),9.0,"{'learning_rate': 0.5, 'max_depth': 3, 'n_esti...",1.0,815.647122
9,GradientBoostingClassifier(),10.0,"{'learning_rate': 0.1, 'max_depth': 3, 'n_esti...",1.0,801.289512


## XGBoost

In [74]:
xgbc = XGBClassifier()

xgbc_grid = {"max_depth":range(1,10,2),
            "min_child_weight":range(1,6,2),
            "learning_rate":np.arange(0.1, 2.1, 0.4),
            "n_estimators":[10, 50, 100, 500, 1000]}

In [75]:
clasification(xgbc,xgbc_grid,glass_x_scaled,glass_y)







































Unnamed: 0,Model,Random State,Best Parameters,f1_Score,Execution Time
0,"XGBClassifier(base_score=None, booster=None, c...",1.0,"{'learning_rate': 0.5, 'max_depth': 9, 'min_ch...",0.971694,593.037285
1,"XGBClassifier(base_score=None, booster=None, c...",2.0,"{'learning_rate': 0.1, 'max_depth': 7, 'min_ch...",0.995273,594.476117
2,"XGBClassifier(base_score=None, booster=None, c...",3.0,"{'learning_rate': 0.1, 'max_depth': 7, 'min_ch...",0.966749,588.615597
3,"XGBClassifier(base_score=None, booster=None, c...",4.0,"{'learning_rate': 0.5, 'max_depth': 7, 'min_ch...",1.0,588.406801
4,"XGBClassifier(base_score=None, booster=None, c...",5.0,"{'learning_rate': 0.5, 'max_depth': 3, 'min_ch...",1.0,589.068572
5,"XGBClassifier(base_score=None, booster=None, c...",6.0,"{'learning_rate': 0.5, 'max_depth': 7, 'min_ch...",1.0,588.569904
6,"XGBClassifier(base_score=None, booster=None, c...",7.0,"{'learning_rate': 0.1, 'max_depth': 7, 'min_ch...",0.966749,591.05104
7,"XGBClassifier(base_score=None, booster=None, c...",8.0,"{'learning_rate': 0.5, 'max_depth': 9, 'min_ch...",0.971694,589.398861
8,"XGBClassifier(base_score=None, booster=None, c...",9.0,"{'learning_rate': 0.9, 'max_depth': 9, 'min_ch...",1.0,587.507743
9,"XGBClassifier(base_score=None, booster=None, c...",10.0,"{'learning_rate': 0.9, 'max_depth': 3, 'min_ch...",0.995326,590.931424


In [None]:
# def clasification(model,parameters,x,y):
#     for i in range(1,10):
#         cv_inner = KFold(n_splits=5, shuffle=True, random_state=i)
#         score = ['accuracy' ,'f1_macro']

#         grid_search = GridSearchCV(model, parameters, scoring=score, refit="accuracy", error_score=0)
#         grid_result = grid_search.fit(x,y)
#         print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

#         params = grid_result.cv_results_['params']
#         f1_score = grid_result.cv_results_['mean_test_f1_macro']

#     knc_df=pd.DataFrame({"Parameters":[],
#                          "f1_score":[]})

#     for x,y in zip(params,f1_score):
#         knc_df=knc_df.append({"Parameters":x,
#                               "f1_score":y},ignore_index = True)
        
#     return knc_df
    

In [None]:
wine = openml.datasets.get_dataset("wine")
wine_df, wine_label, categorical_indicator, attribute_names = wine.get_data(
    target= wine.default_target_attribute, dataset_format="dataframe"
)
wine_df["class"]=wine_label
wine_x=wine_df.iloc[:,:13]
wine_df