# Import Libraries

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)


import matplotlib.pyplot as plt
%matplotlib inline


from sklearn.model_selection import train_test_split

from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Flatten
from keras.optimizers import RMSprop



# Load Data Sets

In [None]:
test = pd.read_csv("/kaggle/input/digit-recognizer/test.csv")
train = pd.read_csv("/kaggle/input/digit-recognizer/train.csv")



print(f"Shape Train data set : {train.shape}\n")
print("Train Data: \n",train.head(), "\n")

print(f"Shape Test data set : {test.shape}")
print("Test Data: \n",test.head())

In [None]:
# Split Features from label
features = train.iloc[:, 1::1].to_numpy()
labels = train.label.to_numpy()

# test data to numpy
test_feat = test.to_numpy()

In [None]:
print("Shape of features: ",features.shape)
print("features Type: ", type(features))
print("Shape of labels: ",labels.shape)
print("labels Type: ", type(labels), "\n")

print(f"test features :  {test_feat.shape} \nType : {type(test_feat)} \n")

print("features values : \n", features[0:5, :], "\n")

print("labels values : ", labels[0:5])

In [None]:
fig,ax= plt.subplots(2,2,figsize=(5,3))


# loop over the Train data set
for img, axis in zip(range(len(features)), ax.flatten()):
    
    # Reshape the flattened image data into its original shape
    image= features[img, : ].reshape(28,28)
    
    # Visualize the image using imshow
    axis.imshow(image, cmap='gray')
    axis.axis("off") 

## Preprocessing the data
The features data is actually the pixels of the image(28 X 28). And,  we have decomposed x as x = i * 28 + j, where i and j are integers between 0 and 27, inclusive, according to the mnist data description on how to locate images. 

In [None]:
# Reshaping the Normalixzed data

X= features.reshape(-1, 28, 28, 1)
test= test_feat.reshape(-1, 28, 28, 1)

print(f"Reshape features data : {X.shape} \nReshape test data : {test.shape}")

In [None]:
# Standardizing the data by centre the data around zero mean and unit variance

def standardize(x):
    x_mean = x.mean().astype(np.float32)
    x_std = x.std().astype(np.float32)
    
    return (x - x_mean)/ x_std


X_stand= standardize(X)
test_stand= standardize(test)

In [None]:
print(f"Standardize X shape : {X_stand.shape}")
print(f"Standardize test shape : {test_stand.shape}")

#### one hot encoding labels data

In [None]:
# convert label data to binary 
y_cat = to_categorical(labels)

In [None]:
# get the density size for labal and feature
input_dim = X_stand.shape[1]
nb_classes = y_cat.shape[1]

print(f"input dimension  : {input_dim}")
print(f"nb classes : {nb_classes}")

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_stand, y_cat, test_size=0.33, random_state=20)

## Build a Model 
A linear model using a simple lazy neural network.

In [None]:
# stacking layers, this layer uses Flatten, Dense, Activation, and Dropout
model = Sequential()
model.add(Flatten(input_shape=(28, 28))) # Flattening the 28 X 28 matrix input shape
model.add(Dense(128, input_dim=input_dim)) # 128 unit dense connected NN layer and 28 input dimension  
model.add(Activation('relu')) # input activation function relu
model.add(Dropout(0.15)) # set the rate of dropout to 0.15, the rate at which input unit is set to zero
model.add(Dense(128)) # Another layer of 128 unit dense connected NN layer with activation function and dropout
model.add(Activation('relu'))
model.add(Dropout(0.15))
model.add(Dense(nb_classes)) # 10 unit output shape
model.add(Activation('softmax')) # output activation function of softmax

In [None]:
# grouping the layers into an object and configuing them for training using the compile method.
model.compile(optimizer="rmsprop", loss='categorical_crossentropy', metrics=['accuracy'])

## Training Model

In [None]:
model.fit(X_train, y_train, epochs=10, batch_size=16, validation_split=0.1, verbose=1)

## Evaluate the Prediction
Evaluating the model perfomance

In [None]:
model.evaluate(X_test,  y_test, verbose=2)

## Model Prediction

In [None]:
preds = model.predict(X_test, verbose=0)


preds.shape

In [None]:
print(preds.shape)
type(preds)
preds[0]

In [None]:
predictions = model.predict(test_stand, verbose=0)

In [None]:
print(predictions.shape)
type(predictions)
predictions[0]

In [None]:
predicted_labels= []
for n in range(len(predictions)):
    predicted_labels.append(np.argmax(predictions[n]))
# predicted_labels= print(predictions.shape)
predicted_labels[0:5]

In [None]:
submission=pd.DataFrame({"ImageId": list(range(1,len(predicted_labels)+1)),
                         "Label": predicted_labels})

submission.head()

In [None]:
submission.to_csv("submission.csv", index=False, header=True)