In [None]:
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow import keras
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, BatchNormalization
from keras.models import Sequential

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# 1. Load Data

In [None]:
train = pd.read_csv("/kaggle/input/digit-recognizer/train.csv")
test = pd.read_csv("/kaggle/input/digit-recognizer/test.csv")

In [None]:
train.head()

In [None]:
x_train = train.drop(['label'], axis=1)
y_train = train.label

In [None]:
print(x_train.shape)
print(y_train.shape)
print(test.shape)

# 2. Data Preprocessing

In [None]:
x_train = x_train / 255.0
test = test / 255.0

# 3. Modeling with ANN

In [None]:
model = keras.models.Sequential([
    keras.layers.Dense(units=512, activation='relu', input_shape=x_train.shape[1:]),
    keras.layers.Dense(units=256, activation='relu'),
    keras.layers.Dense(units=10, activation='softmax'),
])

In [None]:
sgd = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.90)
early_stopping_cb = keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)

model.compile(optimizer=sgd, loss='sparse_categorical_crossentropy', metrics=['acc'])

In [None]:
model.fit(x_train, y_train, epochs=5, validation_split=.20, callbacks=[early_stopping_cb])

**Challange 1:** **Create the same model as above but with Dropout and Batch Normalization. Use the same optimizer and epochs number, then compare the performance. See if the variance was higher or lower this time.**

**Challange 2:** **Use the first model. But now try at least 2 different optimizers and run training for same 5 epochs. See if other optimizers learn faster or not**

In [None]:
tf.keras.layers.BatchNormalization(
    axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True,
    beta_initializer='zeros', gamma_initializer='ones',
    moving_mean_initializer='zeros',
    moving_variance_initializer='ones', beta_regularizer=None,
    gamma_regularizer=None, beta_constraint=None, gamma_constraint=None, 
)

In [None]:
model2 = Sequential()

model2.add(Conv2D(filters = 64, kernel_size = (5,5),padding = 'Same', activation ='relu', input_shape = (28,28,1)))
model2.add(BatchNormalization())

model2.add(Conv2D(filters = 64, kernel_size = (5,5),padding = 'Same', activation ='relu'))
model2.add(BatchNormalization())

model2.add(MaxPool2D(pool_size=(2,2)))
model2.add(Dropout(0.25))

model2.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', activation ='relu'))
model2.add(BatchNormalization())

model2.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', activation ='relu'))
model2.add(BatchNormalization())
model2.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
model2.add(Dropout(0.25))

model2.add(Conv2D(filters = 64, kernel_size = (3,3), padding = 'Same',  activation ='relu'))
model2.add(BatchNormalization())
model2.add(Dropout(0.25))

model2.add(Flatten())
model2.add(Dense(256, activation = "relu"))
model2.add(BatchNormalization())
model2.add(Dropout(0.25))

model2.add(Dense(10, activation = "softmax"))

In [None]:
model2.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
Model_hits=model2.fit(x_train,y_train,epochs=50,validation_data=(x_test, y_test),callbacks=[tensorboard_callback])

In [None]:
RMSprop = tf.keras.optimizers.RMSprop(learning_rate=0.01, momentum=0.0,rho=0.9,epsilon=1e-07,centered=False,name="RMSprop")
early_stopping_cb = keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)

model.compile(optimizer=RMSprop, loss='sparse_categorical_crossentropy', metrics=['acc'])

In [None]:
model.fit(x_train, y_train, epochs=5, validation_split=.20, callbacks=[early_stopping_cb])

In [None]:
Adam = tf.keras.optimizers.Adam(learning_rate=0.001,beta_1=0.9,beta_2=0.999,epsilon=1e-07,amsgrad=False,name="Adam")
early_stopping_cb = keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)

model.compile(optimizer=Adam, loss='sparse_categorical_crossentropy', metrics=['acc'])

In [None]:
model.fit(x_train, y_train, epochs=5, validation_split=.20, callbacks=[early_stopping_cb])

# 5. Evaluation and Error Analysis

**Challange 3:** **Randomly select 1000 images from your train data. Can you do a stratified sampling? See the `stratify` parameter of the `train_test_split` function.**

**Challange 4:** **Now select the best model from above and evaluate your predictions using these 1000 samples with a confusion matrix.**

**Challange 5:** **Can you identify the images that your model got wrong? Plot those images and in the title write the actual class and the predicted class.**

In [None]:
from sklearn.model_selection import train_test_split
from sklearn import cross_validation

x_train, x_test, y_train, y_test = cross_validation.train_test_split(x_train,y_train,train_size=.8, stratify=y_train)

In [None]:
from sklearn.model_selection import cross_val_predict
y_train_pred = cross_val_predict(sgd_clf, X_train, y_train, cv=3)
 
 
from sklearn.metrics import confusion_matrix
confusion_matrix(y_train, y_train_pred)

In [None]:
plt.plot(epochs, y_train, 'g', label='Actual Class')
plt.plot(epochs, y_pred, 'b', label='Prediction Class')
plt.title('Actual and Prediction Difference')
plt.xlabel('Actual')
plt.ylabel('Predicted')
plt.legend()
plt.show()