In [None]:
import keras
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
%matplotlib inline

# Import Data

In [None]:
data = pd.read_csv('train.csv').astype('int32')
test_data = pd.read_csv('test.csv')/255.0
y = data['label']
X = data.drop(['label'], axis=1)/255.0
X.shape
# X.head()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

In [None]:
# function to plot a digit given index
def plot_digit(index):
    num1 = X[X.index == index]
    num1_list = list(num1.values)[0].reshape(28,28)
    print(label[index])
    plt.imshow(num1_list)
    
# Plor confusion matrix
def plot_confusion_matrix(predicted, labels=y_test):
    confusion_matrix = tf.math.confusion_matrix(labels=y_test, predictions=predicted)
    plt.figure(figsize=(11,10))
    sns.heatmap(confusion_matrix,annot=True, fmt='d')
    plt.xlabel('Predicted')
    plt.ylabel('Truth')
    
# Finds the best K for KNN classifier
def find_best_k(k=10):
    acc_list = []
    for n in range(1, k+1):
        KNN = KNeighborsClassifier(n_neighbors=n)
        KNN.fit(X_train, y_train)
        KNN_predict = KNN.predict(X_test)
        acc_list.append(round(accuracy_score(KNN_predict, y_test),3))
    
    best = np.argmax(acc_list)+1
    plt.figure(figsize=(10, 9))
    plt.plot(range(1,k+1), acc_list, '-og')
    print(f'best K for KNN is: {best}')
    return best


# Prints the accuracy
def accuracy(model_name):
    print(f'Accuracy of {model_name} is: {round(acc*100, 3)}%')

# Simple Neural Network Solution

In [None]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout, Reshape

In [None]:
NN = Sequential([
    Dense(784, input_shape=(784,), activation='relu'),
    Dense(512, activation='relu'),
    Dense(256, activation='relu'),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(10, activation='softmax')
])

In [None]:
NN.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
NN.fit(X_train, y_train,
          batch_size=60,
         epochs=20)
acc = NN.evaluate(X_test, y_test)[1]
accuracy('NN')

In [None]:
y_pred=NN.predict(X_test) # 98.242% accuracy on test_data
NN_predict = [np.argmax(i) for i in y_pred]
plot_confusion_matrix(NN_predict)

# Convolutional Neural Network Solution

In [None]:
from keras.layers.convolutional import Conv2D
from keras import layers

CNN = keras.Sequential()

CNN.add(Reshape((28,28, 1), input_shape=(784,)))
CNN.add(layers.Conv2D(32,(3,3), activation='relu', input_shape = (28,28,1)))
CNN.add(layers.MaxPooling2D((2,2)))
CNN.add(layers.Conv2D(64,(3,3), activation='relu'))
CNN.add(layers.MaxPooling2D((2,2)))
CNN.add(layers.Conv2D(64,(3,3), activation='relu'))
CNN.add(layers.Flatten())
CNN.add(layers.Dense(64,activation = 'relu'))
CNN.add(layers.Dense(10, activation= 'softmax'))

In [None]:
CNN.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
CNN.fit(X, y, batch_size=40, epochs=15) # The whole dataset

In [None]:
CNN_predict = CNN.predict(test_data) # 98.964% accuracy on test_data
CNN_predict = [np.argmax(i) for i in CNN_predict]

# SVC Solution

In [None]:
from sklearn.svm import SVC

In [None]:
svm = SVC()
svm.fit(X_train, y_train)

In [None]:
svm_predict = svm.predict(X_test)
acc = accuracy_score(y_test, svm_predict)
accuracy('SVC')

In [None]:
# plot_confusion_matrix(svm_predict)

# Decision Tree Classifier Solution

In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
DTC = DecisionTreeClassifier()
DTC.fit(X_train, y_train)

In [None]:
DTC_predict = DTC.predict(X_test)
acc = accuracy_score(DTC_predict, y_test)
accuracy('DTC')

In [None]:
# plot_confusion_matrix(DTC_predict)

# Random Forest Classifier Solution

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
RFC = RandomForestClassifier(n_estimators=300)
RFC.fit(X_train, y_train)

In [None]:
RFC_predict = RFC.predict(X_test)
acc = accuracy_score(RFC_predict, y_test)
accuracy('RFC')

In [None]:
# plot_confusion_matrix(RFC_predict)

# KNN Solution

In [None]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
best_k = find_best_k(5)
best_k

In [None]:
KNN = KNeighborsClassifier(n_neighbors=best_k)
KNN.fit(X_train, y_train)

In [None]:
KNN_predict = KNN.predict(X_test)
acc = accuracy_score(KNN_predict, y_test)
accuracy('KNN')

# Bagging Classifier Solution 

In [None]:
from sklearn.ensemble import BaggingClassifier

In [None]:
BG = BaggingClassifier(n_estimators=30)
BG.fit(X_train, y_train)

In [None]:
BG_predict = BG.predict(X_test)
acc = accuracy_score(BG_predict, y_test)
accuracy('BG')