In [3]:
import os
import struct
import numpy as np
from numpy import vstack
from scipy.sparse import coo_matrix
from sklearn.utils import shuffle
from keras.utils import np_utils
from sklearn.metrics import accuracy_score,r2_score

import matplotlib.pyplot as plt
%matplotlib inline

Using TensorFlow backend.


In [20]:
print(np.__version__)

1.14.1


In [21]:
num_classes = 10

# 1 . Create training set and test set

In [22]:
path = './datasets/'
def load_data(path, kind=''):
    """Load MNIST data from `path`"""
    labels_path = os.path.join(path,
                               '%s-labels-idx1-ubyte'
                               % kind)
    images_path = os.path.join(path,
                               '%s-images-idx3-ubyte'
                               % kind)
    with open(labels_path, 'rb') as lbpath:
        magic, n = struct.unpack('>II',
                                 lbpath.read(8))
        labels = np.fromfile(lbpath,
                             dtype=np.uint8)

    with open(images_path, 'rb') as imgpath:
        magic, num, rows, cols = struct.unpack('>IIII',
                                               imgpath.read(16))
        images = np.fromfile(imgpath,
                             dtype=np.uint8).reshape(len(labels), 784)

    return images, labels

In [23]:
# Handwritten numbers training data

X_train,y_train = load_data(path, kind='train')
X_train = X_train / 255

X_test, y_test = load_data(path, kind='t10k')
X_test = X_test / 255

In [24]:
#  Testing set (mnist testing set)

X_test  = X_test.reshape(len(X_test),1,28,28).astype('float')
y_test  = np_utils.to_categorical(y_test,  num_classes)

In [25]:
noise_random = np.random.rand(6000,784)
noise_labels = np.full((6000), 10, dtype=int)

In [26]:

def flatten(x):
    ''' Creates a generator object that loops through a nested list '''
    # First see if the list is iterable
    try:
        it_is = iter(x)
    # If it's not iterable return the list as is
    except TypeError:
        yield x
    # If it is iterable, loop through the list recursively
    else:
        for i in it_is:
            for j in flatten(i):
                yield j



In [27]:
def prepare_ds(X_train, y_train):
    
    X_train_split = []
    y_train_split = []
    indics = []

    for i in range(15):    

        if( i < 5):

            index_i = np.where( y_train == i)
            indics.append(index_i)


        elif(i > 4 and i< 10):

            index_i = np.where( y_train == i)
            indics.append(index_i)        
            indics.pop(0)

        elif(i>9):

            index_i = np.where( y_train == i-10)
            indics.append(index_i)

    #     flattened_list = np.array(indics)
        indices_i = list(flatten(indics))

        X_train_i, y_train_i = X_train[indices_i], y_train[indices_i]

    #     noise_random = np.random.rand(60000,784)
    #     noise_labels = np.full((60000), 10, dtype=int)

    #     X_train_i = np.concatenate((X_train_i, noise_random),axis=0)
    #     y_train_i = np.concatenate((y_train_i, noise_labels),axis=0)



        X_sparse_train = coo_matrix(X_train_i)
        X_train_i, X_sparse_train, y_train_i = shuffle(X_train_i, X_sparse_train, y_train_i, random_state=42)

        X_train_i = X_train_i.reshape(len(X_train_i),1,28,28).astype('float')
        y_train_i = np_utils.to_categorical(y_train_i, num_classes)
    
        X_train_split.append(X_train_i)
        y_train_split.append(y_train_i)
        
    
    return X_train_split, y_train_split
    

In [28]:
X_train_split, y_train_split = prepare_ds(X_train, y_train)

In [29]:
X_train_split[14].shape

(60000, 1, 28, 28)

In [30]:
len(y_train_split)

15

# 2 . Build Model -- keras model

In [4]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.utils import np_utils
from keras import backend as K
from keras.losses import categorical_crossentropy
from keras.optimizers import Adadelta

K.clear_session()

In [5]:
def models_factory():
    model = Sequential()

    model.add(Conv2D(32, (3, 3), activation='relu', 
                     data_format='channels_first',
                     input_shape=(1, 28, 28)))
    model.add(Conv2D(32, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(11, activation='softmax'))

    model.compile(loss=categorical_crossentropy,
                  optimizer=Adadelta(),
                  metrics=['accuracy'])
    return model

In [6]:
model = models_factory()
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 32, 26, 26)        320       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 30, 24, 32)        7520      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 15, 12, 32)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 15, 12, 32)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 13, 10, 64)        18496     
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 11, 8, 64)         36928     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 5, 4, 64)          0         
__________

# 3. Training

1. Your model needs be improved (change parameters)
2. You may need to try a different machine learning algorithm (not all algorithms created equal)
3. You need more data (subtle relationship difficult to find)
4. You may need to try transforming your data (dependent upon algorithm used)
5. There may be no relationship between your dependent and independent variables

In [37]:
def train_test_iter():
    hs_history=[]

    for i in range(15):

        print(i)

        X_train=X_train_split[i]
        y_train = y_train_split[i]


        history = model.fit(X_train, y_train, epochs=12, batch_size=32, validation_split = 0.3,verbose = 0)
        hs_history.append(history)


    return model

In [36]:
# model.fit(X_train, y_train, epochs=12, batch_size=32)
hs_train_scores = []
hs_test_scores = []


for i in range(1):
    
    model_trained = train_test_iter()
    
    test_score = model_trained.evaluate(X_test, y_test, verbose=1)
    hs_test_scores.append(test_score)
    print("%d : Mnist pure test sets -> Loss: %.2f%%" % (i+1, test_score[0]*100))
    print("%d : Mnist pure test sets -> Accuracy: %.2f%%" % (i+1, test_score[1]*100))
    print()

0
Train on 4738 samples, validate on 1185 samples
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
1
Train on 10132 samples, validate on 2533 samples
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
2
Train on 14898 samples, validate on 3725 samples
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
3
Train on 19803 samples, validate on 4951 samples
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
4
Train on 24476 samples, validate on 6120 samples
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12


Epoch 10/12
Epoch 11/12
Epoch 12/12
5
Train on 24075 samples, validate on 6019 samples
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
6
Train on 23416 samples, validate on 5854 samples
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
7
Train on 23661 samples, validate on 5916 samples
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
8
Train on 23437 samples, validate on 5860 samples
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
9
Train on 23523 samples, validate on 5881 samples
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12


Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
10
Train on 28261 samples, validate on 7066 samples
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
11
Train on 33655 samples, validate on 8414 samples
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
12
Train on 38421 samples, validate on 9606 samples
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
13
Train on 43326 samples, validate on 10832 samples
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
14
Train on 48000 samples, validate on 12000 samples
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 

In [None]:
# model.fit(X_train, y_train, epochs=12, batch_size=32)
hs_train_scores = []
hs_test_scores = []
hs_history=[]
for i in range(30):
        
    history = model.fit(X_train, y_train, epochs=12, batch_size=32,validation_split=0.3,verbose=0)
    hs_history.append(history)
    
    
    print("------------------------%d : training --------------------------------------" % (i+1))
    train_score = model.evaluate(X_train, y_train, verbose=1)
    hs_train_scores.append(train_score)
    print("%d : Mnist Training -> Loss: %.2f%%" % (i+1, train_score[0]*100))
    print("%d : Mnist Training -> Accuracy: %.2f%%" % (i+1, train_score[1]*100))
    print();
    print("------------------------%d : test ------------------------------------------" % (i+1))
    test_score = model.evaluate(X_test, y_test, verbose=1)
    hs_test_scores.append(test_score)
    print("%d : Mnist pure test sets -> Loss: %.2f%%" % (i+1, test_score[0]*100))
    print("%d : Mnist pure test sets -> Accuracy: %.2f%%" % (i+1, test_score[1]*100))
    print()

In [None]:
print(history.history.keys())
# summarize history for accuracy
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

# 5. Save model

In [None]:
model.save("./models/MNIST_with_mixed.h5")

Check the following things when training any type of deep neural network:

1. the data used to calculate training accuracy is not identical to the data used to train your NN. This sounds weird, but possible in practice, especially in case of images, if you don't keep track of what is happening. For example, you train on random patches of images and calculate training accuracy on random patches of same images. It is easy to forget that though they are same images, the patches are randomly selected.
2. More than the values of train and val accuracy, I would be concerned about what you said, "i'm copy pasting a random epoch but all are roughly the same". No, they can't be same. Accuracy at different epochs is mostly different, because network is learning so it is constantly changing its weights. If accuracy goes up then that means it is approaching the minima of the loss function.
I think you should be more concerned about getting a low training accuracy instead of getting a lower training accuracy than the validation accuracy.
3. Do all the sanity checks given here. Read the entire article if possible, it's very good.
4. Make sure you are doing pre-processing in the right manner. For example, make sure that mean over entire training data is zero. For testing data, subtract the mean vector of the training data from each instance of testing data. Don't subtract the mean of testing data from itself. Since, you wouldn't know the mean of testing data at runtime.
4. Check if your loss at the very first epoch makes sense. For example, in a 10-class classification problem, starting loss should be -ln(0.1) = 2.302 (given here).
5. Again, from here, overfit a tiny subset of data and make sure you can achieve zero cost. Full details in the link.
6. If nothing works, just train and test on the same data and see if you can get 90% + accuracy. Otherwise, examine your network more closely by looking at individual layer outputs (given in Keras FAQ) etc.


<a href ="https://github.com/keras-team/keras/issues/1761">maybe solution</a>