In [1]:
# Import numpy and pandas
# Import categorical encoding, necessary layers and Model from keras

In [1]:
import numpy as np
import pandas as pd
from keras.utils import to_categorical
from keras.layers import Input, Dense, Dropout, Flatten, Conv2D, BatchNormalization, Activation, MaxPooling2D
from keras.models import Model

In [None]:
# Load datasets into pandas

In [2]:
train=pd.read_csv('train.csv')
test=pd.read_csv('test.csv')

In [3]:
train.head(5)

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [10]:
# I set some parameters:
# m  = number of training examples,
# dt = total number of pixels,
# d  = number of pixels on dimension of the matrix (square root of d2),
# n  = number of classes

In [4]:
m=train.shape[0]
dt=train.shape[1]-1
d=int(np.sqrt(dt))
n=len(train.label.value_counts())
print([m,dt,d,n])

[42000, 784, 28, 10]


In [12]:
# I check that neither the training nor the test sets have missing values

In [12]:
test.columns[pd.concat([train.drop('label',axis=1),test]).isnull().sum()>0]

Index([], dtype='object')

In [14]:
# I check that there is an even distribution of training examples across the different classes

In [51]:
train['label'].value_counts(sort=False)

0    4132
1    4684
2    4177
3    4351
4    4072
5    3795
6    4137
7    4401
8    4063
9    4188
Name: label, dtype: int64

In [16]:
# Some statistics on the pixel values. 
# I compute Pixel_stats for each pixel, using all training example. 
# Then I get statistics for each Pixel_stat.
# For example, the minimum value for each pixel is always 0, but the maximum value is not always 255.

In [17]:
train.drop('label',axis=1).describe().T.drop('count',axis=1).describe().drop('count',axis=0).add_prefix('Pixel_')

Unnamed: 0,Pixel_mean,Pixel_std,Pixel_min,Pixel_25%,Pixel_50%,Pixel_75%,Pixel_max
mean,33.408911,49.307334,0.0,0.0,12.241709,62.629145,217.67602
std,42.573157,44.174709,0.0,0.0,33.240031,99.211592,83.830621
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.12947,4.62649,0.0,0.0,0.0,0.0,255.0
50%,7.2315,36.771597,0.0,0.0,0.0,0.0,255.0
75%,69.034262,100.692231,0.0,0.0,0.0,153.1875,255.0
max,139.826143,113.850143,0.0,0.0,173.0,253.0,255.0


In [22]:
# I get the dataset ready for usage with keras.
# Max: maximum pixel value in the training dataset
# x: array of pixel values from training dataset, normalized by Max
# y: labels transformed into n-dimensional array
# x_test: array of pixel values from test dataset, normalized by Max

In [52]:
Max=train.max().max()
x=train.iloc[:,1:].values/Max
y=to_categorical(train.label,n)
x_test=test.values/Max
print([x.shape,x.max(),y.shape])
print([x_test.shape,x_test.max()])

[(42000, 784), 1.0, (42000, 10)]
[(28000, 784), 1.0]


In [53]:
# I check that the categorical encoding of y with '1' at position 'i' of the n-dim array corresponds to label 'i'

In [54]:
[set(train.label[np.argmax(y,axis=1)==i])=={i} for i in range(10)]

[True, True, True, True, True, True, True, True, True, True]

In [56]:
# I define a fully connected network.
# Each hidden layer uses ReLU as activation function, and I use dropout after each of them for regularization
# The number of neurons in each layer is given by the input array dims
# The output layer has n neurons and uses softmax activation

In [55]:
def DenseModel(dims,dropout_rate):
    X_input=Input(shape=(dt))
    X=X_input
    for dim in dims:
        X=Dense(dim,activation='relu')(X)
        X=Dropout(dropout_rate)(X)
    X_output=Dense(n,activation='softmax')(X)
    model=Model(inputs=X_input,outputs=X_output)
    return model

In [None]:
# The first model I create has 5 hidden layers with dimensions (392, 196, 196, 98, 98), and 35% dropout rate.
# I fit the model with 20 epochs of training, batch size of 128 examples, and 20% of the dataset left for validation

In [87]:
dims=[dt//2,dt//4,dt//4,dt//8,dt//8]
model1=DenseModel(dims,0.35)
model1.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
model1.summary()

Model: "functional_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         [(None, 784)]             0         
_________________________________________________________________
dense_20 (Dense)             (None, 392)               307720    
_________________________________________________________________
dropout_17 (Dropout)         (None, 392)               0         
_________________________________________________________________
dense_21 (Dense)             (None, 196)               77028     
_________________________________________________________________
dropout_18 (Dropout)         (None, 196)               0         
_________________________________________________________________
dense_22 (Dense)             (None, 196)               38612     
_________________________________________________________________
dropout_19 (Dropout)         (None, 196)              

In [88]:
model1.fit(x,y,batch_size=128,epochs=20,validation_split=0.2)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x13ec65370>

In [None]:
# I set the prediction to the the class with the highest probability in the output layer (softmax) 

In [89]:
pred1=pd.DataFrame({'ImageId':range(1,len(x_test)+1),'Label':model1.predict(x_test).argmax(axis=1)}).set_index('ImageId')
pred1.to_csv('pred1.csv')

In [None]:
# The second model has 7 hidden layers with dimensions (784, 784, 392, 392, 196, 196, 98), and 35% dropout rate.
# I fit the model with 25 epochs of training, batch size of 128 examples, and 20% of the dataset left for validation

In [85]:
dims=[dt,dt,dt//2,dt//2,dt//4,dt//4,dt//8]
model2=DenseModel(dims,0.35)
model2.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
model2.summary()

Model: "functional_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         [(None, 784)]             0         
_________________________________________________________________
dense_12 (Dense)             (None, 784)               615440    
_________________________________________________________________
dropout_10 (Dropout)         (None, 784)               0         
_________________________________________________________________
dense_13 (Dense)             (None, 784)               615440    
_________________________________________________________________
dropout_11 (Dropout)         (None, 784)               0         
_________________________________________________________________
dense_14 (Dense)             (None, 392)               307720    
_________________________________________________________________
dropout_12 (Dropout)         (None, 392)              

In [86]:
model2.fit(x,y,batch_size=128,epochs=25,validation_split=0.2)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<tensorflow.python.keras.callbacks.History at 0x147458d90>

In [93]:
pred2=pd.DataFrame({'ImageId':range(1,len(x_test)+1),'Label':model2.predict(x_test).argmax(axis=1)}).set_index('ImageId')
pred2.to_csv('pred2.csv')

In [None]:
# I check that almost 2% of the predictions differ compared with the previous model

In [100]:
pred2.join(pred1, lsuffix='1',rsuffix='2').apply(lambda x: x.Label1==x.Label2,axis=1).value_counts()/len(x_test)

True     0.980321
False    0.019679
dtype: float64

In [None]:
# Third model has 9 hidden layers, dimensions are (784, 784, 784, 392, 392, 392, 196, 196, 196), and 35% dropout rate.
# I fit the model with 30 epochs of training, batch size of 128 examples, and 20% of the dataset left for validation

In [102]:
dims=[dt,dt,dt,dt//2,dt//2,dt//2,dt//4,dt//4,dt//4]
model3=DenseModel(dims,0.35)
model3.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
model3.summary()

Model: "functional_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         [(None, 784)]             0         
_________________________________________________________________
dense_26 (Dense)             (None, 784)               615440    
_________________________________________________________________
dropout_22 (Dropout)         (None, 784)               0         
_________________________________________________________________
dense_27 (Dense)             (None, 784)               615440    
_________________________________________________________________
dropout_23 (Dropout)         (None, 784)               0         
_________________________________________________________________
dense_28 (Dense)             (None, 784)               615440    
_________________________________________________________________
dropout_24 (Dropout)         (None, 784)              

In [103]:
model3.fit(x,y,batch_size=128,epochs=30,validation_split=0.2)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x13f608ac0>

In [105]:
pred3=pd.DataFrame({'ImageId':range(1,len(x_test)+1),'Label':model3.predict(x_test).argmax(axis=1)}).set_index('ImageId')
pred3.to_csv('pred3.csv')

In [None]:
# I check that almost 2.5% of the predictions differ compared with both model 1 and 2.

In [106]:
pred3.join(pred1, lsuffix='1',rsuffix='2').apply(lambda x: x.Label1==x.Label2,axis=1).value_counts()/len(x_test)

True     0.975929
False    0.024071
dtype: float64

In [107]:
pred3.join(pred2, lsuffix='1',rsuffix='2').apply(lambda x: x.Label1==x.Label2,axis=1).value_counts()/len(x_test)

True     0.975036
False    0.024964
dtype: float64

In [None]:
# Now I create the datasets for convolutional neural networks
# x_conv: 28 by 28 array of pixel values from training dataset, normalized by Max
# x_conv_test: 28 by 28 array of pixel values from test dataset, normalized by Max

In [112]:
x_conv=train.values[:,1:].reshape((m,d,d,1))/Max
x_conv_test=test.values.reshape((-1,d,d,1))/Max
print([x_conv.shape,x_conv.max()])
print([x_conv_test.shape,x_conv_test.max()])

[(42000, 28, 28, 1), 1.0]
[(28000, 28, 28, 1), 1.0]


In [113]:
# I define a convolutional neural network.
# Each hidden layer uses ReLU as activation function, and I use dropout after each of them for regularization
# The number of filters in each convolutional layer, the kernel sizes and strides are given as input arrays
# I use square filters, with the same stride on both dimensions. I always use 'same' padding
# After convolutional layers I flatten and apply a fully connected layer, with dimension given as input
# The output layer has n neurons and uses softmax activation

In [114]:
def ConvModel(dims,kernels,strides,dim_dense,dropout_rate):
    data=np.array([dims,kernels,strides]).T
    X_input=Input(shape=(d,d,1))
    X=X_input
    for dim,kernel,stride in data:
        X=Conv2D(dim,kernel_size=(kernel,kernel),strides=(stride,stride),padding='same',activation='relu')(X)
        X=Dropout(dropout_rate)(X)
    X=Flatten()(X)
    X=Dense(dim_dense,activation='relu')(X)
    X_output=Dense(n,activation='softmax')(X)
    model=Model(inputs=X_input,outputs=X_output)
    return model

In [None]:
# Generically I increase the number of filters as I reduce the size of the matrices. All kernels are 3x3.
# I start with 28x28x1 and go to 14x14x32 with a stride of 2.
# I then use 'same' padding and unit stride to stay with 14x14 window, but increase to 64 filters
# Finally I use stride of 2 again to go to 7x7x128.
# I use 128 neurons in the hidden dense layer.
# I fit the model with 15 epochs of training, 35% dropout, and 20% of the dataset left for validation

In [121]:
convmodel1=ConvModel([32,64,128],[3,3,3],[2,1,2],128,0.35)
convmodel1.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
convmodel1.summary()

Model: "functional_19"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_10 (InputLayer)        [(None, 28, 28, 1)]       0         
_________________________________________________________________
conv2d_12 (Conv2D)           (None, 14, 14, 32)        320       
_________________________________________________________________
dropout_43 (Dropout)         (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_13 (Conv2D)           (None, 14, 14, 64)        18496     
_________________________________________________________________
dropout_44 (Dropout)         (None, 14, 14, 64)        0         
_________________________________________________________________
conv2d_14 (Conv2D)           (None, 7, 7, 128)         73856     
_________________________________________________________________
dropout_45 (Dropout)         (None, 7, 7, 128)       

In [122]:
convmodel1.fit(x_conv,y_conv,batch_size=128,epochs=15,validation_split=0.2)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<tensorflow.python.keras.callbacks.History at 0x141b37fd0>

In [123]:
convpred1=pd.DataFrame({'ImageId':range(1,len(x_conv_test)+1),'Label':convmodel1.predict(x_conv_test).argmax(axis=1)}).set_index('ImageId')
convpred1.to_csv('convpred1.csv')

In [None]:
# I use more hidden layers, but still all kernels are 3x3.
# Number of filters is 32,64 and 128, twice each. 
# I half the size of the window every two layers (to 14x14, 7x7 and 4x4)
# I use 256 neurons in the hidden dense layer.
# I fit the model with 20 epochs of training, 35% dropout, and 20% of the dataset left for validation

In [125]:
convmodel2=ConvModel([32,32,64,64,128,128],[3,3,3,3,3,3],[2,1,2,1,2,1],256,0.35)
convmodel2.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
convmodel2.summary()

Model: "functional_23"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_12 (InputLayer)        [(None, 28, 28, 1)]       0         
_________________________________________________________________
conv2d_21 (Conv2D)           (None, 14, 14, 32)        320       
_________________________________________________________________
dropout_52 (Dropout)         (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_22 (Conv2D)           (None, 14, 14, 32)        9248      
_________________________________________________________________
dropout_53 (Dropout)         (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_23 (Conv2D)           (None, 7, 7, 64)          18496     
_________________________________________________________________
dropout_54 (Dropout)         (None, 7, 7, 64)        

In [126]:
convmodel2.fit(x_conv,y_conv,batch_size=128,epochs=20,validation_split=0.2)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x141f60c40>

In [127]:
convpred2=pd.DataFrame({'ImageId':range(1,len(x_conv_test)+1),'Label':convmodel2.predict(x_conv_test).argmax(axis=1)}).set_index('ImageId')
convpred2.to_csv('convpred2.csv')

In [None]:
# I try a simpler network now, with just two concolutional layers, dimensions are 32 and 64, windows 14x14 and 7x7.
# Dense layer with 128 neurons. I use 13 epochs of training, 35% dropout rate, and 20% train/dev partitioning

In [128]:
convmodel3=ConvModel([32,64],[3,3],[2,2],128,0.35)
convmodel3.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
convmodel3.summary()

Model: "functional_25"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_13 (InputLayer)        [(None, 28, 28, 1)]       0         
_________________________________________________________________
conv2d_27 (Conv2D)           (None, 14, 14, 32)        320       
_________________________________________________________________
dropout_58 (Dropout)         (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_28 (Conv2D)           (None, 7, 7, 64)          18496     
_________________________________________________________________
dropout_59 (Dropout)         (None, 7, 7, 64)          0         
_________________________________________________________________
flatten_7 (Flatten)          (None, 3136)              0         
_________________________________________________________________
dense_50 (Dense)             (None, 128)             

In [129]:
convmodel3.fit(x_conv,y_conv,batch_size=128,epochs=13,validation_split=0.2)

Epoch 1/13
Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13


<tensorflow.python.keras.callbacks.History at 0x14309f4f0>

In [130]:
convpred3=pd.DataFrame({'ImageId':range(1,len(x_conv_test)+1),'Label':convmodel3.predict(x_conv_test).argmax(axis=1)}).set_index('ImageId')
convpred3.to_csv('convpred3.csv')

In [None]:
# I define a new convolutional neural network, with batch normalization layers between convolution and activation

In [131]:
def ConvModel2(dims,kernels,strides,dim_dense,dropout_rate):
    data=np.array([dims,kernels,strides]).T
    X_input=Input(shape=(d,d,1))
    X=X_input
    for dim,kernel,stride in data:
        X=Conv2D(dim,kernel_size=(kernel,kernel),strides=(stride,stride),padding='same',use_bias=False)(X)
        X=BatchNormalization()(X)
        X=Activation('relu')(X)
        X=Dropout(dropout_rate)(X)
    X=Flatten()(X)
    X=Dense(dim_dense,use_bias=False)(X)
    X=BatchNormalization()(X)
    X=Activation('relu')(X)
    X_output=Dense(n,activation='softmax')(X)
    model=Model(inputs=X_input,outputs=X_output)
    return model

In [None]:
# Like Convolutional model 1 (4 hidden layers), but with batch normalization

In [133]:
convmodel4=ConvModel2([32,64,128],[3,3,3],[2,1,2],128,0.35)
convmodel4.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
convmodel4.summary()

Model: "functional_29"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_15 (InputLayer)        [(None, 28, 28, 1)]       0         
_________________________________________________________________
conv2d_32 (Conv2D)           (None, 14, 14, 32)        288       
_________________________________________________________________
batch_normalization_4 (Batch (None, 14, 14, 32)        128       
_________________________________________________________________
activation_4 (Activation)    (None, 14, 14, 32)        0         
_________________________________________________________________
dropout_63 (Dropout)         (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_33 (Conv2D)           (None, 14, 14, 64)        18432     
_________________________________________________________________
batch_normalization_5 (Batch (None, 14, 14, 64)      

In [134]:
convmodel4.fit(x_conv,y_conv,batch_size=128,epochs=15,validation_split=0.2)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<tensorflow.python.keras.callbacks.History at 0x108ac3640>

In [135]:
convpred4=pd.DataFrame({'ImageId':range(1,len(x_conv_test)+1),'Label':convmodel4.predict(x_conv_test).argmax(axis=1)}).set_index('ImageId')
convpred4.to_csv('convpred4.csv')

In [None]:
# Like Convolutional model 2 (7 hidden layers), but with batch normalization

In [136]:
convmodel5=ConvModel2([32,32,64,64,128,128],[3,3,3,3,3,3],[2,1,2,1,2,1],256,0.35)
convmodel5.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
convmodel5.summary()

Model: "functional_31"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_16 (InputLayer)        [(None, 28, 28, 1)]       0         
_________________________________________________________________
conv2d_35 (Conv2D)           (None, 14, 14, 32)        288       
_________________________________________________________________
batch_normalization_8 (Batch (None, 14, 14, 32)        128       
_________________________________________________________________
activation_8 (Activation)    (None, 14, 14, 32)        0         
_________________________________________________________________
dropout_66 (Dropout)         (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_36 (Conv2D)           (None, 14, 14, 32)        9216      
_________________________________________________________________
batch_normalization_9 (Batch (None, 14, 14, 32)      

In [137]:
convmodel5.fit(x_conv,y_conv,batch_size=128,epochs=20,validation_split=0.2)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x1475dd1c0>

In [140]:
convpred5=pd.DataFrame({'ImageId':range(1,len(x_conv_test)+1),'Label':convmodel5.predict(x_conv_test).argmax(axis=1)}).set_index('ImageId')
convpred5.to_csv('convpred5.csv')

In [None]:
# Finally, I create yet a third type of model, where there is a pooling layer between activation and dropout.
# The idea is that I will now be able to decrease the window size with pooling instead of using wider strides.

In [138]:
def ConvModel3(dims,kernels,strides,pools,dim_dense,dropout_rate):
    data=np.array([dims,kernels,strides,pools]).T
    X_input=Input(shape=(d,d,1))
    X=X_input
    for dim,kernel,stride,pool in data:
        X=Conv2D(dim,kernel_size=(kernel,kernel),strides=(stride,stride),padding='same',use_bias=False)(X)
        X=BatchNormalization()(X)
        X=Activation('relu')(X)
        X=MaxPooling2D(pool_size=(pool,pool))(X)
        X=Dropout(dropout_rate)(X)
    X=Flatten()(X)
    X=Dense(dim_dense,use_bias=False)(X)
    X=BatchNormalization()(X)
    X=Activation('relu')(X)
    X_output=Dense(n,activation='softmax')(X)
    model=Model(inputs=X_input,outputs=X_output)
    return model

In [None]:
# Like Convolutional model 1 (4 hidden layers), but with batch normalization and pooling.
# All strides are set to 1, and I pool with 2x2 filters after each convolutional layer.

In [139]:
convmodel6=ConvModel3([32,64,128],[3,3,3],[1,1,1],[2,2,2],128,0.35)
convmodel6.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
convmodel6.summary()

Model: "functional_33"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_17 (InputLayer)        [(None, 28, 28, 1)]       0         
_________________________________________________________________
conv2d_41 (Conv2D)           (None, 28, 28, 32)        288       
_________________________________________________________________
batch_normalization_15 (Batc (None, 28, 28, 32)        128       
_________________________________________________________________
activation_15 (Activation)   (None, 28, 28, 32)        0         
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 32)        0         
_________________________________________________________________
dropout_72 (Dropout)         (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_42 (Conv2D)           (None, 14, 14, 64)      

In [141]:
convmodel6.fit(x_conv,y_conv,batch_size=128,epochs=15,validation_split=0.2)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<tensorflow.python.keras.callbacks.History at 0x14a2f6a60>

In [142]:
convpred6=pd.DataFrame({'ImageId':range(1,len(x_conv_test)+1),'Label':convmodel6.predict(x_conv_test).argmax(axis=1)}).set_index('ImageId')
convpred6.to_csv('convpred6.csv')