In [49]:
import numpy as np # linear algebra
import pandas as pd # data processing, csv file I/O
import math

from keras import *
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Lambda, Flatten, Reshape
from keras.layers.convolutional import Conv2D, ZeroPadding2D
from keras.layers.pooling import MaxPooling2D
from keras.layers.normalization import BatchNormalization
from keras.regularizers import l2
from keras.preprocessing.image import ImageDataGenerator
from keras.utils.np_utils import to_categorical

from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

import matplotlib
import seaborn as sns
%matplotlib inline

In [2]:
Input_file = 'G:/fashion-mnist_train.csv'
df_train = pd.read_csv(Input_file)

In [3]:
print('Training data:', df_train.info())
print(df_train)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 60000 entries, 0 to 59999
Columns: 785 entries, label to pixel784
dtypes: int64(785)
memory usage: 359.3 MB
Training data: None
       label  pixel1  pixel2  pixel3  pixel4  pixel5  pixel6  pixel7  pixel8  \
0          2       0       0       0       0       0       0       0       0   
1          9       0       0       0       0       0       0       0       0   
2          6       0       0       0       0       0       0       0       5   
3          0       0       0       0       1       2       0       0       0   
4          3       0       0       0       0       0       0       0       0   
5          4       0       0       0       5       4       5       5       3   
6          4       0       0       0       0       0       0       0       0   
7          5       0       0       0       0       0       0       0       0   
8          4       0       0       0       0       0       0       3       2   
9          8       0  

In [7]:
target = df_train['label'] # Separate the target/output values of 'lable/classes' 
y = to_categorical(target) # Storing in y as output vector with index of image and what class it belong to
y.shape # 60000 records with its categorical values
print(y)

[[ 0.  0.  1. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  1.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 ..., 
 [ 0.  0.  0. ...,  0.  1.  0.]
 [ 0.  0.  0. ...,  0.  1.  0.]
 [ 0.  0.  0. ...,  1.  0.  0.]]


In [10]:
features = df_train.iloc[:,1:] # seperating the Input values of image with 784 features and leaving lables, now features contain exact total input data
print(features)
features.shape

       pixel1  pixel2  pixel3  pixel4  pixel5  pixel6  pixel7  pixel8  pixel9  \
0           0       0       0       0       0       0       0       0       0   
1           0       0       0       0       0       0       0       0       0   
2           0       0       0       0       0       0       0       5       0   
3           0       0       0       1       2       0       0       0       0   
4           0       0       0       0       0       0       0       0       0   
5           0       0       0       5       4       5       5       3       5   
6           0       0       0       0       0       0       0       0       0   
7           0       0       0       0       0       0       0       0       0   
8           0       0       0       0       0       0       3       2       0   
9           0       0       0       0       0       0       0       0       0   
10          0       0       0       0       1       0       0       0       0   
11          0       0       

(60000, 784)

In [11]:
X_train, X_test, y_train,y_test = train_test_split(features,y, test_size=0.2, random_state=1)#splitting t data in 2 sets, features has all 60000 records splitting in 20% test and 80 % training
#X_train contain 48000 rows and 784 features

In [12]:
X_test, X_val, y_test, y_val = train_test_split(X_train,y_train, test_size=0.2, random_state=1)#splitting the data in 2 sets

In [15]:
X_val.shape

(9600, 784)

In [16]:
y_train.shape
print(y_train)

[[ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  1. ...,  0.  0.  0.]
 [ 1.  0.  0. ...,  0.  0.  0.]
 ..., 
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  1.  0.  0.]
 [ 0.  0.  0. ...,  0.  1.  0.]]


In [17]:
X_val.shape

(9600, 784)

In [18]:
n_features = len(X_train.values[0])
n_pixels =int (math.sqrt(n_features))
n_classes = y_train.shape[1]
print('We\'ve got {} feature rows and {} labels'.format(len(X_train), len(y_train)))
print('Each row has {} features'.format(len(X_train.values[0])))
print('and we have {} classes'.format(n_classes))
assert(len(y_train) == len(X_train))
assert(len(X_train == len(X_test)))
#assert(n_features == n_pixels**2)
print('Input images have {0} x {0} px shape'.format(n_pixels))
print('So far, so good')

We've got 48000 feature rows and 48000 labels
Each row has 784 features
and we have 10 classes
Input images have 28 x 28 px shape
So far, so good


In [19]:
X_train = X_train.values.reshape(X_train.shape[0], n_pixels, n_pixels, 1)
X_test = X_test.values.reshape(X_test.shape[0], n_pixels, n_pixels, 1)
X_train, X_valid, y_train, y_valid = train_test_split(*shuffle(X_train, y_train), test_size=0.1)

print('X_train.shape:', X_train.shape)
print('y_train.shape:', y_train.shape)
print('X_valid.shape:', X_val.shape)
print('y_valid.shape:', y_val.shape)
print('X_test.shape:', X_test.shape)

X_train.shape: (43200, 28, 28, 1)
y_train.shape: (43200, 10)
X_valid.shape: (9600, 784)
y_valid.shape: (9600, 10)
X_test.shape: (38400, 28, 28, 1)


In [20]:
#sns.distplot(df_train['label'].values, kde=False, vertical=False, bins=10)

In [51]:
def create_my_model(shape=(28, 28, 1)):
    model = Sequential()
    #model.add(Lambda(lambda x: x / 127.5 - 1.0, input_shape=shape))
    
    model.add(Conv2D(32, (2, 2), activation='relu',input_shape=shape))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    model.add(Conv2D(32, (3, 3), activation='relu'))
    model.add(BatchNormalization())
  
    
    model.add(Flatten())
    model.add(Dense(32, activation='relu'))
    model.add(BatchNormalization())

    model.add(Dense(n_classes, activation='softmax'))
    
    
    model.compile(optimizer=optimizers.adam(lr=1e-4)  ,loss='categorical_crossentropy',metrics=['accuracy'] )
    return model
    
model = create_my_model(shape=(n_pixels, n_pixels, 1))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_11 (Conv2D)           (None, 27, 27, 32)        160       
_________________________________________________________________
batch_normalization_16 (Batc (None, 27, 27, 32)        128       
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 13, 13, 32)        0         
_________________________________________________________________
conv2d_12 (Conv2D)           (None, 11, 11, 32)        9248      
_________________________________________________________________
batch_normalization_17 (Batc (None, 11, 11, 32)        128       
_________________________________________________________________
flatten_6 (Flatten)          (None, 3872)              0         
_________________________________________________________________
dense_11 (Dense)             (None, 32)                123936    
__________

In [52]:
X_train, y_train = shuffle(X_train, y_train)
X_valid, y_valid = shuffle(X_valid, y_valid)

#imgen_train = ImageDataGenerator( rotation_range=20,width_shift_range=0.2, height_shift_range=0.2,shear_range=0.1, zoom_range=0.15,horizontal_flip=False,vertical_flip=False)
imgen_train = ImageDataGenerator()
imgen_valid = ImageDataGenerator()

imgen_train.fit(X_train)
imgen_valid.fit(X_valid)

In [53]:
model.get_config()

[{'class_name': 'Conv2D',
  'config': {'activation': 'relu',
   'activity_regularizer': None,
   'batch_input_shape': (None, 28, 28, 1),
   'bias_constraint': None,
   'bias_initializer': {'class_name': 'Zeros', 'config': {}},
   'bias_regularizer': None,
   'data_format': 'channels_last',
   'dilation_rate': (1, 1),
   'dtype': 'float32',
   'filters': 32,
   'kernel_constraint': None,
   'kernel_initializer': {'class_name': 'VarianceScaling',
    'config': {'distribution': 'uniform',
     'mode': 'fan_avg',
     'scale': 1.0,
     'seed': None}},
   'kernel_regularizer': None,
   'kernel_size': (2, 2),
   'name': 'conv2d_11',
   'padding': 'valid',
   'strides': (1, 1),
   'trainable': True,
   'use_bias': True}},
 {'class_name': 'BatchNormalization',
  'config': {'axis': -1,
   'beta_constraint': None,
   'beta_initializer': {'class_name': 'Zeros', 'config': {}},
   'beta_regularizer': None,
   'center': True,
   'epsilon': 0.001,
   'gamma_constraint': None,
   'gamma_initializer':

In [54]:
model.layers[0].get_config() # to get configuration of each layer

{'activation': 'relu',
 'activity_regularizer': None,
 'batch_input_shape': (None, 28, 28, 1),
 'bias_constraint': None,
 'bias_initializer': {'class_name': 'Zeros', 'config': {}},
 'bias_regularizer': None,
 'data_format': 'channels_last',
 'dilation_rate': (1, 1),
 'dtype': 'float32',
 'filters': 32,
 'kernel_constraint': None,
 'kernel_initializer': {'class_name': 'VarianceScaling',
  'config': {'distribution': 'uniform',
   'mode': 'fan_avg',
   'scale': 1.0,
   'seed': None}},
 'kernel_regularizer': None,
 'kernel_size': (2, 2),
 'name': 'conv2d_11',
 'padding': 'valid',
 'strides': (1, 1),
 'trainable': True,
 'use_bias': True}

In [55]:
model.layers[6].get_weights()

[array([[-0.02490543,  0.03461613,  0.02679034, ...,  0.02998577,
          0.03859191,  0.02247213],
        [-0.00388185, -0.02065638,  0.03461412, ..., -0.00560592,
          0.00414197, -0.0090251 ],
        [ 0.0268664 , -0.02417399, -0.03091028, ..., -0.00209845,
         -0.01644573,  0.0246154 ],
        ..., 
        [-0.02063824,  0.02556641,  0.01604617, ..., -0.01128889,
          0.03053714, -0.02685054],
        [ 0.0055113 ,  0.02140534, -0.03717655, ...,  0.03597895,
          0.02381878,  0.00967168],
        [-0.02379907,  0.02749325,  0.00878864, ..., -0.00520944,
         -0.00453059,  0.0189425 ]], dtype=float32),
 array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.], dtype=float32)]

In [67]:
EPOCHS =6 # actually needs to run for much longer to achieve the >99.4% validation accuracy I got
BATCH = 80

history = model.fit_generator(
    imgen_train.flow(*shuffle(X_train, y_train), batch_size=BATCH),
    steps_per_epoch=X_train.shape[0]//(4*BATCH),
    epochs=EPOCHS,
    validation_data=imgen_valid.flow(*shuffle(X_valid, y_valid)),
    validation_steps=BATCH
)

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


In [68]:
prediction = model.predict(X_test)      #This will give you predictions for each test image

In [69]:
print(prediction)
print("Shape: {}".format(prediction.shape))    # 10 probabilities for each 12000 images in x_test

[[  9.03817650e-04   9.80773330e-01   4.85527940e-04 ...,   3.63449985e-03
    1.92531361e-03   1.84335629e-03]
 [  5.02053052e-02   5.11569858e-01   1.16988132e-02 ...,   1.99568067e-02
    7.67169613e-03   2.33402606e-02]
 [  3.55746830e-03   9.31636393e-01   2.88148620e-03 ...,   4.14767023e-03
    9.95046925e-03   5.71444491e-03]
 ..., 
 [  2.76925531e-03   6.13605604e-03   8.04937503e-04 ...,   7.59327948e-01
    2.84945336e-03   2.08074570e-01]
 [  3.95110901e-03   9.16734047e-04   9.24964964e-01 ...,   1.66071067e-03
    4.63849981e-04   1.53091084e-03]
 [  1.41189937e-02   6.48342120e-03   6.36871636e-01 ...,   1.04688909e-02
    1.12603204e-02   9.59001109e-03]]
Shape: (38400, 10)


In [70]:
score = model.evaluate(X_test, y_test, verbose=0)      # This will take the highest probability and tell whether it matches with y_test

In [71]:
print("Test Loss: {} and Test accuracy: {}".format(score[0],score[1]))

Test Loss: 0.31224554888904094 and Test accuracy: 0.8975


In [73]:
model.predict_classes(X_test)

array([1, 1, 1, ..., 7, 2, 2], dtype=int64)