# Team Members Names:
    1. Achyuth Reddy Kanumanthu (11664532)
    2. Yuvaraj Sriramoju(11710243)

In [1]:
import numpy as np
from sklearn import datasets
import math
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import confusion_matrix, accuracy_score
from warnings import filterwarnings
filterwarnings("ignore")

# Load data
data = datasets.load_digits()
n_samples = len(data.images)
data_final = data.images.reshape((n_samples, -1))

# Split data into 80% train and 20% test subsets
X_train, X_test, y_train, y_test = train_test_split(data_final, data.target, test_size=0.2, shuffle=False)

results_data_frame=pd.DataFrame(columns=['Model','Default_validation_accuracy','Tuned_validation_accuracy',
                                         'Selected hyperparameters','Final test set accuracy'])

# Support Vector Machine

In [2]:
# SVM Default Parameters
model='SVM'
from sklearn.svm import SVC
svm_model=SVC()
svm_model.fit(X_train,y_train)
default_acc=np.round(accuracy_score(y_train,svm_model.predict(X_train))*100,2)

# Creating SVM With GridSearchCV

svm_model2 = SVC()

hyper_param = {
    'gamma': [10**x for x in range(-5, 6)],
    'C': [10**x for x in range(-5, 6)]
}
grid_search = GridSearchCV(svm_model2, hyper_param, cv=5, scoring='accuracy')
grid_search.fit(X_train,y_train)

dict_para=grid_search.best_params_

# Model with best parameters
model3=SVC(gamma=dict_para.get('gamma'),C=dict_para.get('C'))
model3.fit(X_train,y_train)
tuning_acc=np.round(accuracy_score(y_train,model3.predict(X_train))*100,2)
test_acc=np.round(accuracy_score(y_test,model3.predict(X_test))*100,2)
results_data_frame=results_data_frame._append({'Model':model,'Default_validation_accuracy':default_acc,
                                               'Tuned_validation_accuracy':tuning_acc,
                                               'Selected hyperparameters':grid_search.best_params_,
                                               'Final test set accuracy':test_acc},ignore_index=True)

# K-NN Model

In [3]:
from sklearn.neighbors import KNeighborsClassifier
Current_model='K-NN'
default_model=KNeighborsClassifier()
default_model.fit(X_train,y_train)
default_model_acc=np.round(accuracy_score(y_train,default_model.predict(X_train))*100,2)


# Creating KNN With GridSearchCV
knn_model2 = KNeighborsClassifier()
hyper_param = {
    'n_neighbors': [1,3,5,7,9]
}

grid_search = GridSearchCV(knn_model2, hyper_param, cv=5, scoring='accuracy')
grid_search.fit(X_train,y_train)

dict_para=grid_search.best_params_
knn_model3=KNeighborsClassifier(n_neighbors=dict_para.get('n_neighbors'))
knn_model3.fit(X_train,y_train)
tuning_acc=np.round(accuracy_score(y_train,knn_model3.predict(X_train))*100,2)
test_acc=np.round(accuracy_score(y_test,knn_model3.predict(X_test))*100,2)
results_data_frame=results_data_frame._append({'Model':Current_model,'Default_validation_accuracy':default_model_acc,
                                               'Tuned_validation_accuracy':tuning_acc,
                                               'Selected hyperparameters':grid_search.best_params_,
                                               'Final test set accuracy':test_acc},ignore_index=True)

# Decision Trees

In [4]:

from sklearn.tree import DecisionTreeClassifier
Current_model='Decision Trees'
default_model=DecisionTreeClassifier()
default_model.fit(X_train,y_train)
default_model_acc=np.round(accuracy_score(y_train,default_model.predict(X_train))*100,2)


# Creating DTC With GridSearchCV

knn_model2 = DecisionTreeClassifier()

hyper_param = {
    'min_samples_split' : [3,5,7,9]
}
grid_search = GridSearchCV(knn_model2, hyper_param, cv=5, scoring='accuracy')
grid_search.fit(X_train,y_train)

dict_para=grid_search.best_params_

dtc_model3=DecisionTreeClassifier(min_samples_split=dict_para.get('min_samples_split'))
dtc_model3.fit(X_train,y_train)

tuning_acc=np.round(accuracy_score(y_train,dtc_model3.predict(X_train))*100,2)
test_acc=np.round(accuracy_score(y_test,dtc_model3.predict(X_test))*100,2)
results_data_frame=results_data_frame._append({'Model':Current_model,'Default_validation_accuracy':default_model_acc,
                                               'Tuned_validation_accuracy':tuning_acc,
                                               'Selected hyperparameters':grid_search.best_params_,
                                               'Final test set accuracy':test_acc},ignore_index=True)

# Logistic Regression

In [5]:

from sklearn.linear_model import LogisticRegression
Current_model='Logistic Regression'
default_model=LogisticRegression()
default_model.fit(X_train,y_train)
default_model_acc=np.round(accuracy_score(y_train,default_model.predict(X_train))*100,2)
print(default_model_acc)

# Creating LR With GridSearchCV

knn_model2 = LogisticRegression()

hyper_param = {
    'C' : [10**x for x in (-5,5)]
}
grid_search = GridSearchCV(knn_model2, hyper_param, cv=5, scoring='accuracy')
grid_search.fit(X_train,y_train)

dict_para=grid_search.best_params_

lr_model3=LogisticRegression(C=dict_para.get('C'))
lr_model3.fit(X_train,y_train)
tuning_acc=np.round(accuracy_score(y_train,lr_model3.predict(X_train))*100,2)
test_acc=np.round(accuracy_score(y_test,lr_model3.predict(X_test))*100,2)
results_data_frame=results_data_frame._append({'Model':Current_model,'Default_validation_accuracy':default_model_acc,
                                               'Tuned_validation_accuracy':tuning_acc,
                                               'Selected hyperparameters':grid_search.best_params_,
                                               'Final test set accuracy':test_acc},ignore_index=True)

100.0


In [6]:
print(results_data_frame)

                 Model  Default_validation_accuracy  \
0                  SVM                        99.79   
1                 K-NN                        99.10   
2       Decision Trees                       100.00   
3  Logistic Regression                       100.00   

   Tuned_validation_accuracy   Selected hyperparameters  \
0                     100.00  {'C': 10, 'gamma': 0.001}   
1                     100.00         {'n_neighbors': 1}   
2                      99.03   {'min_samples_split': 3}   
3                     100.00              {'C': 100000}   

   Final test set accuracy  
0                    96.39  
1                    95.56  
2                    79.17  
3                    90.56  
