In [None]:
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
%matplotlib inline

np.random.seed(2)

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import itertools

from keras.utils import to_categorical # convert to one-hot-encoding
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from keras.optimizers import RMSprop
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau

np.random.seed(36)
tf.random.set_seed(36)

In [None]:
# Load the data
train = pd.read_csv("/kaggle/input/digit-recognizer/train.csv")
test = pd.read_csv("/kaggle/input/digit-recognizer/test.csv")

In [None]:
Y_train = train["label"]

# Drop 'label' column
X_train = train.drop(labels = ["label"],axis = 1)

# free some space
del train

In [None]:
# Check the data
X_train.isnull().any().describe()

In [None]:
test.isnull().any().describe()


In [None]:
# Normalize the data
X_train = X_train / 255.0
test = test / 255.0

In [None]:
# Reshape image in 3 dimensions (height = 28px, width = 28px , canal = 1)
X_train = X_train.values.reshape(-1,28,28,1)
test = test.values.reshape(-1,28,28,1)

In [None]:
np.shape(X_train)


In [None]:
# Encode labels to one hot vectors (ex : 2 -> [0,0,1,0,0,0,0,0,0,0])
Y_train = to_categorical(Y_train, num_classes = 10)

In [None]:
# Split the train and the validation set for the fitting
X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size = 0.1, random_state=42)

In [None]:
# Some examples
g = plt.imshow(X_train[0][:,:,0], cmap='binary')

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten, MaxPooling2D, Dropout

# Create the model
model = Sequential()

# Add model layers
model.add(Conv2D(64, kernel_size=3, activation='relu', input_shape=(28,28,1)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(32, kernel_size=3, activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax')) # 10 for the number of classes

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
print(np.shape(X_train))
print(np.shape(Y_train))
print(np.shape(X_val))

print(np.shape(Y_val))

In [None]:
history = model.fit(X_train, Y_train, epochs=30, batch_size=28, validation_data=(X_val, Y_val))

In [None]:
y_train_full[0]


In [None]:
X_train_full = X_train_full/255
X_test = X_test/255

In [None]:
X_train_full = X_train_full.reshape(-1, 784)
X_test = X_test.reshape(-1, 784)

In [None]:
from sklearn.model_selection import train_test_split
X_train_tr, X_train_v, y_train_tr, y_train_v = train_test_split(X_train_full, y_train_full, test_size=6000)

In [None]:
model = keras.models.Sequential()
model.add(keras.layers.Dense(500, activation="relu", input_shape=(784,),
                             name="First_Hidden_Layer"))
model.add(keras.layers.Dense(500, activation="relu", name="Second_Hidden_Layer"))
model.add(keras.layers.Dense(10, activation="softmax", name="Output_Layer"))

In [None]:
model.compile(loss="sparse_categorical_crossentropy", optimizer="sgd",
              metrics=["accuracy"])

In [None]:
history = model.fit(X_train_tr, y_train_tr, epochs=30, batch_size=32,
                    validation_data=(X_train_v, y_train_v))

In [None]:
import pandas as pd
pd.DataFrame(history.history).plot(figsize=(10,7))

In [None]:
# Here, it is evident that the training and validation accuracies have started to stray off after around epoch 5-6. This difference does not exceed 2% even after 30 epochs, so it can't be said that the model performs significantly better on training data than validation data. Thus, it is not yet overfit."

In [None]:
model.evaluate(X_test, y_test)


In [None]:
# This test accuracy score is almost the same as validation accuracy.

In [None]:
# Display misclassified images
predictions = np.argmax(model.predict(X_test), axis=-1)

misclassifiedIndexes = []
index = 0
for target, predict in zip(y_test, predictions):
  if target != predict:
    misclassifiedIndexes.append(index)
  index += 1

misclassifiedIndexes

plt.figure(figsize=(20,4))
for index in range(5):
  plt.subplot(1,5,index+1)
  plt.imshow(X_test[misclassifiedIndexes[index]].reshape(28,28), cmap='binary')
  plt.axis('off')
  plt.title(f'Target: {y_test[misclassifiedIndexes[index]]}    Predicted: {predictions[misclassifiedIndexes[index]]}', fontsize=14)

plt.show()

In [None]:
# Improving the model

In [None]:
#Model with three hidden layers with 300 neurons in the added hidden layer
#output layer now has sigmoid activation function

model2 = keras.models.Sequential()
model2.add(keras.layers.Dense(500, activation="relu", input_shape=(784,),
                             name="First_Hidden_Layer"))
model2.add(keras.layers.Dense(500, activation="relu", name="Second_Hidden_Layer"))
model2.add(keras.layers.Dense(300, activation="relu", name="Third_Hidden_Layer"))
model2.add(keras.layers.Dense(10, activation="sigmoid", name="Output_Layer"))

In [None]:
model2.compile(loss="sparse_categorical_crossentropy", optimizer="sgd",
              metrics=["accuracy"])

In [None]:
history2 = model2.fit(X_train_tr, y_train_tr, epochs=30, batch_size=32,
                    validation_data=(X_train_v, y_train_v))

In [None]:
import pandas as pd
pd.DataFrame(history2.history).plot(figsize=(10,7))

In [None]:
model2.evaluate(X_test, y_test)

In [None]:
# Display misclassified images
predictions = np.argmax(model2.predict(X_test), axis=-1)

misclassifiedIndexes = []
index = 0
for target, predict in zip(y_test, predictions):
  if target != predict:
    misclassifiedIndexes.append(index)
  index += 1

misclassifiedIndexes

plt.figure(figsize=(20,4))
for index in range(5):
  plt.subplot(1,5,index+1)
  plt.imshow(X_test[misclassifiedIndexes[index]].reshape(28,28), cmap='binary')
  plt.axis('off')
  plt.title(f'Target: {y_test[misclassifiedIndexes[index]]}    Predicted: {predictions[misclassifiedIndexes[index]]}', fontsize=14)

plt.show()

In [None]:
#Model with 4 hidden layers with 400 neurons per layer

model3 = keras.models.Sequential()
model3.add(keras.layers.Dense(400, activation="relu", input_shape=(784,),
                             name="First_Hidden_Layer"))
model3.add(keras.layers.Dense(400, activation="relu", name="Second_Hidden_Layer"))
model3.add(keras.layers.Dense(400, activation="relu", name="Third_Hidden_Layer"))
model3.add(keras.layers.Dense(400, activation="relu", name="Fourth_Hidden_Layer"))
model3.add(keras.layers.Dense(10, activation="sigmoid", name="Output_Layer"))

In [None]:
model3.compile(loss="sparse_categorical_crossentropy", optimizer="sgd",
              metrics=["accuracy"])

In [None]:
history3 = model3.fit(X_train_tr, y_train_tr, epochs=30, batch_size=32,
                    validation_data=(X_train_v, y_train_v))

In [None]:
import pandas as pd
pd.DataFrame(history3.history).plot(figsize=(10,7))

In [None]:
model3.evaluate(X_test, y_test)

In [None]:
# Display misclassified images
predictions = np.argmax(model3.predict(X_test), axis=-1)

misclassifiedIndexes = []
index = 0
for target, predict in zip(y_test, predictions):
  if target != predict:
    misclassifiedIndexes.append(index)
  index += 1

misclassifiedIndexes

plt.figure(figsize=(20,4))
for index in range(5):
  plt.subplot(1,5,index+1)
  plt.imshow(X_test[misclassifiedIndexes[index]].reshape(28,28), cmap='binary')
  plt.axis('off')
  plt.title(f'Target: {y_test[misclassifiedIndexes[index]]}    Predicted: {predictions[misclassifiedIndexes[index]]}', fontsize=14)

plt.show()