# test
## MNIST Data Mining Algorithm Comparison

In [ ]:
# Imports for CNN Algorithm
from keras import Input
from keras.models import Model
from keras.layers import Conv2D, Dense, Dropout, Flatten, MaxPooling2D, BatchNormalization
from tensorflow.keras.utils import to_categorical

In [ ]:
# Imports for KNN Algorithm
from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

In [ ]:
# Imports for Random Forest Algorithm
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [ ]:
# Imports for SVM Algorithm
from sklearn.model_selection import cross_val_score
from sklearn.svm import LinearSVC

In [ ]:
# Import for data selection
from sklearn.model_selection import train_test_split

In [ ]:
# Import for data acquisition
import pandas as pd

In [ ]:
# Import for data preprocessing
import numpy as np

In [ ]:
# Import for data visualization
import matplotlib.pyplot as plt
import time

### Data processing

machin chouette

In [ ]:
# Data acquisition
dataset = pd.read_csv('/data/workspace_files/mnist_test.csv')
 
# Data selection
Y = dataset['label']
X = dataset.drop(columns=['label'])
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.20, shuffle=False)

# Data preprocessing for CNN : reshape and normalization
X_train_CNN, X_test_CNN, Y_train_CNN, Y_test_CNN = X_train, X_test, Y_train, Y_test
X_train_CNN = X_train_CNN.values.reshape(-1, 28,28,1)
X_test_CNN = X_test_CNN.values.reshape(-1, 28,28,1)
X_train_CNN = X_train_CNN/(6)
X_test_CNN = X_test_CNN/(6)
Y_train_CNN = pd.get_dummies(Y_train_CNN).values
Y_test_CNN = pd.get_dummies(Y_test_CNN).values
print(X_train)


### randomForest

In [ ]:
time1 = time.time()
rand = RandomForestClassifier(n_estimators=10, max_depth=10)
randomForest = cross_val_score(rand, X_train, Y_train, cv = 6)
time2 = time.time()
randomForest_time = time2-time1 * 1000.0
randomForest_acc = randomForest.mean()

### SVM

In [ ]:
time1 = time.time()
sv = LinearSVC(C=0.0001)
SVM = cross_val_score(sv, X_train, Y_train, cv = 8)
time2 = time.time()
SVM_time = time2-time1 * 1000.0
SVM_acc = SVM.mean()

### KNN

In [ ]:
time1 = time.time()
knn = KNeighborsClassifier(n_neighbors=5)
KNN = cross_val_score(knn, X_train, Y_train, cv = 8)
time2 = time.time()
KNN_time = time2-time1 * 1000.0
KNN_acc = KNN.mean()

### CNN

In [ ]:
# building the ConvNet
x0=Input(shape=(28,28,1))

# layers
x1 = Conv2D(32, kernel_size=(5, 5), activation='relu', padding='same')(x0)
x2 = BatchNormalization()(x1)
x3 = Conv2D(32, kernel_size=(5, 5), activation='relu', padding='same')(x2)
x4 = BatchNormalization()(x3)
x5 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x4)
x6 = Dropout(0.25)(x5)
x7 = Flatten()(x6)
x8 = Dense(10, activation='softmax')(x7)

model=Model(inputs=x0,outputs=x8)

# compiling and fitting the model
time1 = time.time()
model.compile(optimizer='rmsprop',loss='categorical_crossentropy',metrics=['accuracy'])
history = model.fit(X_train_CNN,Y_train_CNN,epochs=12,batch_size=15,validation_data=(X_test_CNN,Y_test_CNN))
time2 = time.time()
CNN_acc = 0
for acc in history.history['accuracy'] :
    CNN_acc += acc
CNN_acc = CNN_acc/len(history.history['accuracy'])
CNN_time = time2-time1 * 1000.0

### Data visualization

Visualize the results of the algorithms

In [ ]:
names=['KNN','SVM']
acc=[KNN_acc,SVM_acc]
plt.figure(figsize=(10,8))
graph = plt.barh(names,acc)
plt.xlabel('Accuracy')
plt.ylabel('Models')

Visualize the results of the algorithms

In [ ]:
names=['KNN','SVM']
acc=[KNN_acc,SVM_acc]
plt.figure(figsize=(10,8))
graph = plt.barh(names,acc)
plt.xlabel('Accuracy')
plt.ylabel('Models')