# ***DeepNumtaDCNNV1.0***

# Getting the environment ready

**Downloading  and unzipping data**

In [0]:
!wget  !wget https://bengali.ai/wp-content/uploads/datasets/NumtaDB_with_aug.zip -P drive/numta
!ls -la drive/numta
!unzip drive/numta/NumtaDB_with_aug.zip

In [0]:
!ls -la

**Installing Image Augmentation Library  '*imgaug*'**

In [0]:
!pip install git+https://github.com/aleju/imgaug

**Importing Dependencies**

In [0]:
import glob, os
import cv2
import pickle, six
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
%matplotlib inline
from imgaug import augmenters as iaa
from google.colab import files
from keras import backend as k
from keras.utils import to_categorical, layer_utils, plot_model
from keras.layers import Input, Add, Dropout, Dense, Activation, Conv2D, ZeroPadding2D, BatchNormalization, Flatten, AveragePooling2D, MaxPooling2D, GlobalMaxPooling2D
from keras.models import Model, Sequential, load_model
from keras.preprocessing.image import ImageDataGenerator
from keras.utils.data_utils import get_file
from keras.applications.imagenet_utils import get_file
from keras.initializers import glorot_uniform
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau

**Defining Data Paths**

In [0]:
RESIZE_DIM = 48 # The images will be resized to 28x28 pixels
CHANNELS = 1
data_dir=os.path.join('')
paths_train_a=glob.glob(os.path.join(data_dir,'training-a','*.png'))
paths_train_b=glob.glob(os.path.join(data_dir,'training-b','*.png'))
paths_train_e=glob.glob(os.path.join(data_dir,'training-e','*.png'))
paths_train_c=glob.glob(os.path.join(data_dir,'training-c','*.png'))
paths_train_d=glob.glob(os.path.join(data_dir,'training-d','*.png'))
paths_train_all=paths_train_a+paths_train_b+paths_train_c+paths_train_d+paths_train_e

paths_test_a=glob.glob(os.path.join(data_dir,'testing-a','*.png'))
paths_test_b=glob.glob(os.path.join(data_dir,'testing-b','*.png'))
paths_test_e=glob.glob(os.path.join(data_dir,'testing-e','*.png'))
paths_test_c=glob.glob(os.path.join(data_dir,'testing-c','*.png'))
paths_test_d=glob.glob(os.path.join(data_dir,'testing-d','*.png'))
paths_test_f=glob.glob(os.path.join(data_dir,'testing-f','*.png'))+glob.glob(os.path.join(data_dir,'testing-f','*.JPG'))
paths_test_auga=glob.glob(os.path.join(data_dir,'testing-auga','*.png'))
paths_test_augc=glob.glob(os.path.join(data_dir,'testing-augc','*.png'))
paths_test_all=paths_test_a+paths_test_b+paths_test_c+paths_test_d+paths_test_e+paths_test_f+paths_test_auga+paths_test_augc

path_label_train_a=os.path.join(data_dir,'training-a.csv')
path_label_train_b=os.path.join(data_dir,'training-b.csv')
path_label_train_e=os.path.join(data_dir,'training-e.csv')
path_label_train_c=os.path.join(data_dir,'training-c.csv')
path_label_train_d=os.path.join(data_dir,'training-d.csv')


**Necessary functions**

In [0]:
def get_key(path):
    # seperates the key of an image from the filepath
    key=path.split(sep=os.sep)[-1]
    return key


def create_submission(predictions,keys,path):
    result = pd.DataFrame(
        predictions,
        columns=['label'],
        index=keys
        )
    result.index.name='key'
    result.to_csv(path, index=True)
    

def get_data(paths_img,path_label=None,resize_dim=None):
    X=[] 
    for i,path in enumerate(paths_img):
        img=cv2.imread(path,cv2.IMREAD_COLOR) 
        img=cv2.cvtColor(img,cv2.COLOR_RGB2GRAY) # cnahging colorspace to GRAY
        if resize_dim is not None:
            img=cv2.resize(img,(resize_dim,resize_dim),interpolation=cv2.INTER_AREA) 
 
        X.append(img) # expand image to 28x28x1 and append to the list
        # display progress
        if i==len(paths_img)-1:
            end='\n'
        else: end='\r'
        print('processed {}/{}'.format(i+1,len(paths_img)),end=end)
        
    X=np.array(X) # tranform list to numpy array
    if  path_label is None:
        return X
    else:
        df = pd.read_csv(path_label) # read labels
        df=df.set_index('filename') 
        y_label=[df.loc[get_key(path)]['digit'] for path in  paths_img] # get the labels corresponding to the images
        y=to_categorical(y_label,10) # transfrom integer value to categorical variable
        return X, y

# Getting the data ready

**Loading *Train* Data**

In [0]:
X_train_a,y_train_a=get_data(paths_train_a,path_label_train_a,resize_dim=RESIZE_DIM)
X_train_b,y_train_b=get_data(paths_train_b,path_label_train_b,resize_dim=RESIZE_DIM)
X_train_c,y_train_c=get_data(paths_train_c,path_label_train_c,resize_dim=RESIZE_DIM)
X_train_d,y_train_d=get_data(paths_train_d,path_label_train_d,resize_dim=RESIZE_DIM)
X_train_e,y_train_e=get_data(paths_train_e,path_label_train_e,resize_dim=RESIZE_DIM)
X_train_all=np.concatenate((X_train_a,X_train_b,X_train_c,X_train_d,X_train_e),axis=0)
y_train_all=np.concatenate((y_train_a,y_train_b,y_train_c,y_train_d,y_train_e),axis=0)
X_train_a[0].shape,X_train_all.shape, y_train_all.shape


**Loading *Test* Data**

In [0]:
X_test_a=get_data(paths_test_a,resize_dim=RESIZE_DIM)
X_test_b=get_data(paths_test_b,resize_dim=RESIZE_DIM)
X_test_c=get_data(paths_test_c,resize_dim=RESIZE_DIM)
X_test_d=get_data(paths_test_d,resize_dim=RESIZE_DIM)
X_test_e=get_data(paths_test_e,resize_dim=RESIZE_DIM)
X_test_f=get_data(paths_test_f,resize_dim=RESIZE_DIM)
X_test_auga=get_data(paths_test_auga,resize_dim=RESIZE_DIM)
X_test_augc=get_data(paths_test_augc,resize_dim=RESIZE_DIM)
X_test_all=np.concatenate((X_test_a,X_test_b,X_test_c,X_test_d,X_test_e,X_test_f,X_test_auga,X_test_augc))
X_test_all.shape

**Function for plotting images**

In [0]:
FIG_WIDTH=28 # Width of figure
HEIGHT_PER_ROW=3 # Height of each row when showing a figure which consists of multiple rows

def imshow_group(X,y=None,y_pred=None,n_per_row=10,phase='processed'):
  
    n_sample=len(X)
    img_dim=X.shape[1]
    j=np.ceil(n_sample/n_per_row)
    fig=plt.figure(figsize=(FIG_WIDTH,HEIGHT_PER_ROW*j))
    for i,img in enumerate(X):
        plt.subplot(j,n_per_row,i+1)
        plt.imshow(img, cmap=('gray'))
        if phase=='processed':
            plt.title(np.argmax(y[i]))
        if phase=='prediction':
            top_n=3 # top 3 predictions with highest probabilities
            ind_sorted=np.argsort(y_pred[i])[::-1]
            h=img_dim+4
            for k in range(top_n):
                string='pred: {} ({:.0f}%)\n'.format(ind_sorted[k],y_pred[i,ind_sorted[k]]*100)
                plt.text(img_dim/2, h, string, horizontalalignment='center',verticalalignment='center')
                h+=4
            if y is not None:
                plt.text(img_dim/2, -4, 'true label: {}'.format(np.argmax(y[i])), 
                         horizontalalignment='center',verticalalignment='center')
        plt.axis('off')
    plt.show()


**Visualizing 50 Data from dataset**

In [0]:
imshow_group(X_train_all[:50], y_train_all[:50])

# Data Preprocessing

In [0]:
# X_train_all = X_train_all.reshape(X_train_all.shape[0],RESIZE_DIM, RESIZE_DIM,CHANNELS)

xx = np.concatenate((X_train_a, X_train_c), axis=0)
yy = np.concatenate((y_train_a, y_train_c), axis=0)
# xx = xx.reshape(xx.shape[0], RESIZE_DIM, RESIZE_DIM, CHANNELS)  

aug = iaa.SomeOf((1, 5), [
    iaa.GaussianBlur(sigma=(0.3, 1)),
    iaa.AddElementwise((0, 75)),
    iaa.AdditiveGaussianNoise(scale=(0, 0.07*255)),
    iaa.Multiply((0.5, 1.5)),
    iaa.Dropout(p=(0, 0.1)),
    iaa.Invert(0.5),
    iaa.ContrastNormalization((0.5, 1.5)),
    iaa.Affine(shear=(-30, 30), mode=['constant', 'edge']),
    iaa.Affine(scale=(0.7, 1.3), mode=['constant', 'edge']),
    iaa.Affine(scale={"x": (0.6, 1.3), "y": (0.6, 1.3)}, mode=['constant', 'edge']),
    iaa.Affine(translate_percent={"x": (-0.25, 0.25), "y": (-0.25, 0.25)}, mode='edge'),
    iaa.Affine(rotate=(-45, 45)),
    iaa.PiecewiseAffine(scale=(0.01, 0.05)),
    iaa.CropAndPad(
        percent=(0, 0.2),
        pad_mode=["edge"],
    ),
    iaa.Sequential([
       iaa.Affine(scale=(1.5)),
       iaa.CoarseDropout((0.08), size_percent=(0.24)),
       iaa.Affine(scale=(0.85), mode=["edge"])
    ], random_order=False),
    
    iaa.Sequential([
       iaa.Affine(scale=(0.83)),
       iaa.Affine(rotate=(-45, 45))
    ], random_order=False)


], random_order=True)

augg = iaa.SomeOf((1, 2), [
    iaa.GaussianBlur(sigma=(0.0, 1.35)),
    iaa.AverageBlur(3),
    iaa.AddElementwise((0, 20)),
    iaa.AdditiveGaussianNoise(scale=(0, 0.07*255)),
    iaa.Multiply((0.5, 1.5)),
    iaa.Dropout(p=(0, 0.12)),
    iaa.ContrastNormalization((0.5, 1.5)),
    iaa.Affine(shear=(-25, 25), mode=['constant', 'edge']),
    iaa.Affine(scale=(0.7, 1.3), mode=['constant', 'edge']),
    iaa.Affine(scale={"x": (0.5, 1.3), "y": (0.5, 1.3)}, mode=['constant', 'edge']),
    iaa.Affine(translate_percent={"x": (-0.20, 0.20), "y": (-0.20, 0.20)}, mode='edge'),
    iaa.CropAndPad(
        percent=((0,0.3),(0,0.3),(0,0.3),(0,0.3)),
        pad_mode=["edge"],
    )
    ,
    iaa.Sequential([
       iaa.Affine(scale=(1.5)),
       iaa.CoarseDropout((0.08), size_percent=(0.24)),
       iaa.Affine(scale=(0.85), mode=["edge"])
    ], random_order=False),
    
    iaa.Sequential([
       iaa.Affine(scale=(0.83)),
       iaa.Affine(rotate=(-60, 60))
    ], random_order=False)


], random_order=True)

new_images1 = aug.augment_images(xx)
new_images2 = augg.augment_images(xx)


X_train_all = np.concatenate((X_train_all,new_images1, new_images2), axis=0)
y_train_all = np.concatenate((y_train_all,yy, yy), axis=0)
print("Processing Done. matrix size: ", X_train_all.shape, y_train_all.shape)

X_train_all.shape

**Visualizing 80 augmented data**

In [0]:
imshow_group(new_images1[:80],yy[:80])

**Getting everything together**

In [0]:
X_train_all = X_train_all.reshape(X_train_all.shape[0],RESIZE_DIM, RESIZE_DIM,CHANNELS).astype('float32')
X_test_all = X_test_all.reshape(X_test_all.shape[0],RESIZE_DIM, RESIZE_DIM,CHANNELS).astype('float32')
# X_train_all = np.rollaxis(X_train_all, 3, 1)
# X_test_all = np.rollaxis(X_test_all, 3, 1)
X_train_all = X_train_all/255
X_test_all = X_test_all/255

indices=list(range(len(X_train_all)))
np.random.seed(42)
np.random.shuffle(indices)

ind=int(len(indices)*0.85)
# train data
X_train=X_train_all[indices[:ind]] 
y_train=y_train_all[indices[:ind]]
# validation data
X_val=X_train_all[indices[-(len(indices)-ind):]] 
y_val=y_train_all[indices[-(len(indices)-ind):]]

X_train_all[0].shape, X_train_all.shape, y_train_all.shape

# CNN model

In [0]:
def customm(img_size=RESIZE_DIM, channels=1):
    model = Sequential()
    input_shape = (img_size, img_size, channels)
    model.add(Conv2D(32, (5,5), input_shape=input_shape, activation='relu', padding='same', kernel_initializer=glorot_uniform(seed=0)))
    model.add(Conv2D(32, (5,5), activation='relu', padding='same', kernel_initializer=glorot_uniform(seed=0)))
    model.add(BatchNormalization(axis=3))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Dropout(0.2))
    
    model.add(Conv2D(64, (5,5), activation='relu', padding='same', kernel_initializer=glorot_uniform(seed=0)))
    model.add(Conv2D(64, (5,5), activation='relu', padding='same', kernel_initializer=glorot_uniform(seed=0)))
    model.add(BatchNormalization(axis=3))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Dropout(0.2))
    
    model.add(Conv2D(64, (5,5), activation='relu', padding='same', kernel_initializer=glorot_uniform(seed=0)))
    model.add(Conv2D(64, (5,5), activation='relu', padding='same', kernel_initializer=glorot_uniform(seed=0)))
    model.add(BatchNormalization(axis=3))
    model.add(MaxPooling2D(pool_size=(2,2)))
    #model.add(Dropout(0.2))
    
    model.add(Conv2D(128, (3,3), activation='relu', padding='same', kernel_initializer=glorot_uniform(seed=0)))
    model.add(Conv2D(128, (3,3), activation='relu', padding='same', kernel_initializer=glorot_uniform(seed=0)))
    model.add(BatchNormalization(axis=3))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Dropout(0.2))
    
    model.add(Flatten())
    model.add(Dense(1024, activation='relu'))
    model.add(Dropout(0.2))
    
    model.add(Dense(512, activation='relu'))
    model.add(Dense(10, activation='softmax'))
    model.compile(loss='categorical_crossentropy', metrics=['accuracy'],optimizer='adam')
    
    return model
    

# Training

In [0]:
k.tensorflow_backend.clear_session() # destroys the current graph and builds a new one

model = customm() 
#model.summary()
#model = load_model('demo_model.hdf5')
k.set_value(model.optimizer.lr,0.001) # set the learning rate
batch_size=64
model_check_point = ModelCheckpoint(filepath='demo_model.hdf5', save_best_only=True)

learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', 
                                            patience=3, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.00001)
X_train_all.shape
# fit the model
h=model.fit(x=X_train,y=y_train, batch_size=batch_size,
            epochs=60,
            #steps_per_epoch=X_train.shape[0] // batch_size,
            verbose=1,          
            validation_data=(X_val,y_val),
            shuffle=True,
            callbacks=[learning_rate_reduction,model_check_point]
            )


model = load_model('demo_model.hdf5')
predictions_prob=model.predict(X_test_all)
labels=[np.argmax(pred) for pred in predictions_prob]
keys=[get_key(path) for path in paths_test_all ]
create_submission(predictions=labels,keys=keys,path='pred.csv')


In [0]:
# plot model history
plt.plot(h.history['acc'])
plt.plot(h.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

plt.plot(h.history['loss'])
plt.plot(h.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [0]:
ls -l

In [0]:
files.download('pred.csv') # from colab to browser download

In [0]:
# imshow_group(X_test_all[50], y_pred=predictions_prob[::50], phase='prediction')