<a href="https://colab.research.google.com/github/LimonHalder/Numerical-Digit-Classifcations-Using-CNN/blob/master/firstKaggleSubmission.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q kaggle

In [None]:
from google.colab import files
files.upload()

In [None]:
! mkdir ~/.kaggle

In [None]:
! cp kaggle.json ~/.kaggle/

In [None]:
! chmod 600 ~/.kaggle/kaggle.json

In [None]:
! kaggle datasets list

ref                                                          title                                                size  lastUpdated          downloadCount  voteCount  usabilityRating  
-----------------------------------------------------------  --------------------------------------------------  -----  -------------------  -------------  ---------  ---------------  
victorsoeiro/netflix-tv-shows-and-movies                     Netflix TV Shows and Movies                           2MB  2022-05-15 00:01:23           8951        277  1.0              
devansodariya/student-performance-data                       Student Performance Dataset                           7KB  2022-05-26 13:55:09           4613        152  0.9705882        
iamsouravbanerjee/software-professional-salaries-2022        Salary Dataset - 2022                               526KB  2022-06-15 17:13:05           1732         46  1.0              
paradisejoy/top-hits-spotify-from-20002019                   Top Hits Spoti

In [None]:
!kaggle competitions download -c digit-recognizer

Downloading digit-recognizer.zip to /content
  0% 0.00/15.3M [00:00<?, ?B/s]
100% 15.3M/15.3M [00:00<00:00, 212MB/s]


In [None]:
!unzip /content/digit-recognizer.zip

Archive:  /content/digit-recognizer.zip
  inflating: sample_submission.csv   
  inflating: test.csv                
  inflating: train.csv               


In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as ts
import tensorflow.keras as ks
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical

pd.set_option('display.max_columns', 784)

In [None]:

im_shape = (28, 28, 1)

out_df = pd.read_csv('/content/sample_submission.csv')
out_df.set_index('ImageId', inplace = True)

def prepare_data(df: pd.DataFrame, train = False):
    x_train = y_train = None
    
    if train:
        y_train = df['label']
        y_train = to_categorical(y_train, num_classes = 10)
        x_train = df.drop('label', axis=1, inplace=False)
    else:
        x_train = df
    
    # There is no semantic loss but convolutions **cores** will contain sane numbers
    x_train = x_train / 255
    
    # (rows, 28, 28, 1) 
    x_train = x_train.values.reshape(-1, *im_shape)
    
    return x_train, y_train

x_test, _        = prepare_data(pd.read_csv('/content/test.csv'))
x_train, y_train = prepare_data(pd.read_csv('/content/train.csv'),  train=True)

x_train_splitted, x_test_splitted, y_train_splitted, y_test_splitted = train_test_split(
    x_train, y_train, test_size = 0.2
)

In [None]:
from keras.preprocessing.image import ImageDataGenerator

# augmentation setup
mnist_datagen = ImageDataGenerator(
        samplewise_center              = False, 
        featurewise_std_normalization  = False,  
        samplewise_std_normalization   = False,  
        zca_whitening                  = True,  
        horizontal_flip                = False,  
        vertical_flip                   = False,
    
        rotation_range                 = 10,  
        width_shift_range              = 0.1,  
        height_shift_range             = 0.1, 
)



In [None]:
# learning options
epoch_count = 150
prefered_batch_size  = 256

model_options = {
    'optimizer': 'adam',
    'metrics':   'accuracy',                  
    'loss':      'categorical_crossentropy'  # awesome loss-function for multiclass classification
}
lrelu = lambda x: ks.layers.LeakyReLU(alpha = 0.01)(x)

# C part (2 convolutions|MaxPools(2x2))
conv_setup = {
    'kernel_size': 3,
    'strides':     1,               # we are getting them using kernel(size 5) with step 1
    'activation': 'relu'
}


lr_model = ks.Sequential()
# -----------------------------------------------------------------  # C1
lr_model.add(ks.layers.Conv2D(
    filters     = 64,               # 32 feature maps*
    input_shape = im_shape,
    **conv_setup
))
lr_model.add(ks.layers.BatchNormalization())

lr_model.add(ks.layers.Conv2D(
    filters     = 64,
    **conv_setup
))
lr_model.add(ks.layers.BatchNormalization())

lr_model.add(ks.layers.MaxPool2D()) # pool size is 2x2 by default
lr_model.add(ks.layers.Dropout(0.4))
lr_model.add(ks.layers.Conv2D(
    filters     = 128,
    **conv_setup
))
lr_model.add(ks.layers.BatchNormalization())

lr_model.add(ks.layers.Conv2D(
    filters     = 128,
    **conv_setup
))
lr_model.add(ks.layers.BatchNormalization())

lr_model.add(ks.layers.MaxPool2D())
lr_model.add(ks.layers.Dropout(0.4))

# -----------------------------------------------------------------  # Classifier part
lr_model.add(ks.layers.Flatten())   # 2d -> 1d(256 units)
lr_model.add(ks.layers.Dense(
    1024, activation = 'swish'
))


lr_model.add(ks.layers.Dropout(0.4))
lr_model.add(ks.layers.BatchNormalization())

# -----------------------------------------------------------------  # Output layer
lr_model.add(ks.layers.Dense(
    10, activation='softmax'        # 10 classes
))

# Assembly model
lr_model.compile(**model_options)
lr_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 26, 26, 64)        640       
                                                                 
 batch_normalization (BatchN  (None, 26, 26, 64)       256       
 ormalization)                                                   
                                                                 
 conv2d_1 (Conv2D)           (None, 24, 24, 64)        36928     
                                                                 
 batch_normalization_1 (Batc  (None, 24, 24, 64)       256       
 hNormalization)                                                 
                                                                 
 max_pooling2d (MaxPooling2D  (None, 12, 12, 64)       0         
 )                                                               
                                                        

In [None]:
# train our model
lr_model.fit_generator(
    mnist_datagen.flow(x_train_splitted, y_train_splitted, batch_size = prefered_batch_size),
    verbose = 1,
    epochs = 100,
    steps_per_epoch= x_train_splitted.shape[0] // prefered_batch_size,
    validation_data = (x_test_splitted, y_test_splitted),    
)

Epoch 1/100


  import sys


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

<keras.callbacks.History at 0x7f464b73bcd0>

In [None]:
# predict classes
y_preds = lr_model.predict(x_test)
y_preds_classified = np.argmax(y_preds, axis=1)
y_preds_classified[:10]

array([2, 0, 9, 0, 3, 7, 0, 3, 0, 3])

In [None]:
# upload results 
out_df['Label'] = y_preds_classified
out_df.to_csv('outer.csv')

In [None]:
p=pd.read_csv('outer.csv')
p

Unnamed: 0,ImageId,Label
0,1,2
1,2,0
2,3,9
3,4,0
4,5,3
...,...,...
27995,27996,9
27996,27997,7
27997,27998,3
27998,27999,9
