In [37]:
# Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler, StandardScaler

# Load dataset
data = pd.read_csv('EEG_Eye_State.csv')
# df1 = data[data.Class==1].sample(100)
# df0 = data[data.Class==0].sample(100)

# # Preprocess dataset
# data.dropna(inplace=True)

X = data.iloc[:,:-1].values
y = data.iloc[:,-1].values


# Define Up or Down Sampling
# from imblearn.over_sampling import SMOTE
# su = SMOTE(random_state=42)
# X, y= su.fit_resample(X, y)

scaler = RobustScaler()
# X= scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0)

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [38]:
from sklearn.metrics import classification_report
# from sklearn.neighbors import KNeighborsClassifier
# model = KNeighborsClassifier()
# y_pred= model.fit(X_train, y_train).predict(X_test)
# print(classification_report(y_test, y_pred, digits=4))

In [39]:
from statistics import LinearRegression
from logitboost import LogitBoost
from sklearn.tree import DecisionTreeRegressor
model1 = LogitBoost()
y_pred1= model1.fit(X_train, y_train).predict(X_test)
print(classification_report(y_test, y_pred1, digits=4))



              precision    recall  f1-score   support

           0     0.7220    0.6686    0.6943       676
           1     0.7431    0.7883    0.7651       822

    accuracy                         0.7343      1498
   macro avg     0.7326    0.7285    0.7297      1498
weighted avg     0.7336    0.7343    0.7331      1498



In [46]:
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB  
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from logitboost import LogitBoost

In [47]:
from scipy.stats import uniform as sp_randFloat
from scipy.stats import randint as sp_randInt

# Define parameters for optimization using dictionaries {parameter name: parameter list}
LR_params =  {'C':[0.001, 0.1, 1, 5, 10]}
KNN_params = {'n_neighbors':[1, 5, 10, 20, 50]}
SVM_params = {'C':[0.001, 0.1, 1, 10, 100], 'kernel':['rbf' ,'linear', 'poly']} # 'sigmoid'
NB_params = {'var_smoothing': np.logspace(0,-9, num=100)}
DTC_params = {'criterion':['entropy', 'gini'], 'max_depth':[10, 50, 100]}
GBT_params = {  'learning_rate': sp_randFloat(), # sp_randFloat(),
				'subsample'    : sp_randFloat(), # sp_randFloat(),
				'n_estimators' : sp_randInt(100, 1000), # sp_randInt(100, 1000),
				'max_depth'    : sp_randInt(4, 10)} # sp_randInt(4, 10)}
LB_params=  {'n_estimators':[10, 50, 100], 
             'base_estimator': [DecisionTreeRegressor(max_depth=1), LinearRegression()]}
RF_params =  {'n_estimators':[10,50,100], 'criterion':['entropy', 'gini']} 
LDA_params = {'n_components':[None, 1], 'solver': ['svd', 'lsqr', 'eigen']}

# Append list of models with parameter dictionaries
models_opt = []
models_opt.append(('LR',  LogisticRegression(), LR_params))
models_opt.append(('KNN', KNeighborsClassifier(),KNN_params))
# models_opt.append(('SVM', SVC(), SVM_params))
models_opt.append(('NB', GaussianNB(), NB_params))
models_opt.append(('DTC', DecisionTreeClassifier(), DTC_params))
models_opt.append(('GBT', GradientBoostingClassifier(), GBT_params))
models_opt.append(('LB', LogitBoost(), LB_params))
models_opt.append(('RFC', RandomForestClassifier(), RF_params))
models_opt.append(('LDA', LinearDiscriminantAnalysis(), LDA_params))

In [48]:
# Make predictions on the train and test data
from sklearn.metrics import accuracy_score

for name, model, _ in models_opt:
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    # Calculate the accuracy of the model
    accuracy = accuracy_score(y_test, y_pred)

    print(f"{name}_train_Accuracy:", model.score(X_train, y_train))
    print(f"{name}_test_Accuracy:", accuracy)
    print()

LR_train_Accuracy: 0.640231419670672
LR_test_Accuracy: 0.6381842456608812

KNN_train_Accuracy: 0.9705384957721407
KNN_test_Accuracy: 0.9465954606141522

NB_train_Accuracy: 0.5546061415220294
NB_test_Accuracy: 0.5409879839786382

DTC_train_Accuracy: 1.0
DTC_test_Accuracy: 0.8347129506008011

LB_train_Accuracy: 0.7610146862483311
LB_test_Accuracy: 0.7471295060080106

RFC_train_Accuracy: 1.0
RFC_test_Accuracy: 0.92630173564753

LDA_train_Accuracy: 0.6410324877614597
LDA_test_Accuracy: 0.636849132176235



In [49]:
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, cross_val_score #, KFold, LeaveOneOut, StratifiedKFold, cross_val_score

def estimator_function(X_train, y_train, parameter_dictionary, scoring = 'accuracy'):
	for name, model, params in parameter_dictionary:
		# CV = KFold(n_splits=5, random_state=2, shuffle=True)
		# CV = StratifiedKFold(n_splits=5,  shuffle=True)
		# CV = LeaveOneOut()
		
		# model_grid = GridSearchCV(model, params)
		model_grid = RandomizedSearchCV(model, params, n_iter=5)
		
		cv_results = cross_val_score(model, X_train, y_train, cv = 4, scoring=scoring)

		model_grid.fit(X_train, y_train)
		best_params = model_grid.best_params_

		mean_acc= cv_results.mean()
		sd_acc = cv_results.std()

		model_cv_res_df = pd.DataFrame(model_grid.cv_results_)
		model_cv_res_df.to_csv(f"{name}_cv_results.csv")
		
		print(f"\nCross Validation {name}->\n Mean Accuracy:{mean_acc}\n Std. Accuracy:{sd_acc}\n Best Parameters:{best_params}")

In [50]:
# Printing Accuracy
estimator_function(X_train, y_train, models_opt, scoring = 'accuracy')


Cross Validation LR->
 Mean Accuracy:0.6395188276084502
 Std. Accuracy:0.0061175681990744135
 Best Parameters:{'C': 0.1}

Cross Validation KNN->
 Mean Accuracy:0.9330666259956043
 Std. Accuracy:0.0032174827887807476
 Best Parameters:{'n_neighbors': 1}

Cross Validation NB->
 Mean Accuracy:0.533691214594116
 Std. Accuracy:0.037656232594995294
 Best Parameters:{'var_smoothing': 1.2328467394420658e-05}

Cross Validation DTC->
 Mean Accuracy:0.8283941383465236
 Std. Accuracy:0.0068164682877037905
 Best Parameters:{'max_depth': 50, 'criterion': 'entropy'}

Cross Validation LB->
 Mean Accuracy:0.7429470951631864
 Std. Accuracy:0.008542592197517795
 Best Parameters:{'n_estimators': 100, 'base_estimator': DecisionTreeRegressor(max_depth=1)}

Cross Validation RFC->
 Mean Accuracy:0.9182022274759903
 Std. Accuracy:0.004802146967654383
 Best Parameters:{'n_estimators': 100, 'criterion': 'gini'}

Cross Validation LDA->
 Mean Accuracy:0.6391627339473548
 Std. Accuracy:0.0051549146810212715
 Best P