In [1]:
# !pip install keras
# !pip install matplotlib
# !pip install sklearn
# !pip install imblearn
# !pip install python-resize-image
# !pip install opencv-python

In [20]:
import cv2
import matplotlib.pyplot as plt
import numpy as np
from __future__ import print_function
import keras
from keras.datasets import mnist
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Flatten, Input
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
from keras.layers import Concatenate, Dense, LSTM, Input, concatenate
from keras.optimizers import Adagrad

# Data prep

In [221]:
import numpy as np
train_spoken_raw = np.load('spoken_train.npy')
train_written_raw = np.load('written_train.npy')
train_y_raw = np.load('match_train.npy')
assignment_spoken_raw = np.load('spoken_test.npy')
assignment_written_raw = np.load('written_test.npy')

In [222]:
fragment_length = 28

## Put data in correct datatype and format
### Written

In [223]:
train_written = np.array([x.astype('float32') for x in train_written_raw])
assignment_written = np.array([x.astype('float32') for x in assignment_written_raw])

### Spoken

In [224]:
train_spoken = np.zeros((len(train_spoken_raw),13*fragment_length))
for idx,im in enumerate(train_spoken_raw):
    im = cv2.cvtColor(im.astype('float32'), cv2.COLOR_GRAY2BGR)
    new_im = cv2.resize(im,(13,fragment_length), interpolation = cv2.INTER_CUBIC)
    train_spoken[idx,:] = new_im[:,:,0].reshape(13*fragment_length)

In [225]:
assignment_spoken = np.zeros((len(assignment_spoken_raw),13*fragment_length))
for idx,im in enumerate(assignment_spoken_raw):
    im = cv2.cvtColor(im.astype('float32'), cv2.COLOR_GRAY2BGR)
    new_im = cv2.resize(im,(13,fragment_length), interpolation = cv2.INTER_CUBIC)
    assignment_spoken[idx,:] = new_im[:,:,0].reshape(13*fragment_length)

### Train

In [226]:
train_y = train_y_raw.reshape(45000,1)

## Concatenate to create train test

In [227]:
train_data = np.concatenate((train_y , train_written , train_spoken),axis=1)

In [237]:
from sklearn.model_selection import train_test_split

train_set, test_set = \
            train_test_split(train_data, test_size=0.2, random_state=123)

## oversample / undersample

In [238]:
indices_negative = np.where(train_set[:,0]==False)[0]
indices_positive = np.where(train_set[:,0]==True)[0]

### Oversample

In [239]:
indices_positive = np.random.choice(indices_positive,size=len(indices_negative),replace=True)
train_balanced = np.concatenate((train_set[indices_negative],train_set[indices_positive]),axis=0)
balanced_data.shape

(64932, 1149)

### Undersample

In [240]:
# indices_negative = np.random.choice(indices_negative,size=len(indices_positive),replace=False)
# train_set_balanced = np.concatenate((train_set[indices_negative],train_set[indices_positive]),axis=0)
# train_set_balanced.shape

In [241]:
# train_data_balanced = train_data

## Shuffle data

In [242]:
from sklearn.utils import shuffle
train_balanced = shuffle(train_balanced)
train_balanced = shuffle(train_balanced)

In [243]:
train_y = train_balanced[:,0]
train_written = train_balanced[:,1:785]
train_spoken  = train_balanced[:,785:]

In [244]:
test_y = test_set[:,0]
test_written = test_set[:,1:785]
test_spoken  = test_set[:,785:]

## Transform to proper 2D- format 

In [245]:
train_written = np.array([x.reshape(28,28,1) for x in train_written])
test_written = np.array([x.reshape(28,28,1) for x in test_written])
assignment_written = np.array([x.reshape(28,28,1) for x in assignment_written])
print(train_written.shape)

(64932, 28, 28, 1)


In [246]:
train_spoken = np.array([x.reshape(fragment_length,13,1) for x in train_spoken])
test_spoken = np.array([x.reshape(fragment_length,13,1) for x in test_spoken])
assignment_spoken = np.array([x.reshape(fragment_length,13,1) for x in assignment_spoken])
print(train_spoken.shape)

(64932, 28, 13, 1)


In [247]:
train_y = train_y.reshape(len(train_y),1)

## Normalize 

In [248]:
train_written /= 255
test_written /= 255
assignment_written /= 255

train_spoken /= 8
test_spoken /= 8
assignment_spoken /= 8

## Merge written and spoken 

In [260]:
train_written.shape

(64932, 28, 28, 1)

In [277]:
train_spoken.shape

(64932, 28, 13, 1)

In [275]:
zeros_train.shape

(64932, 28, 3, 1)

In [278]:
zeros_train = np.zeros(train_written.shape)[:,:,:3,:]
train_x = np.concatenate((train_written,zeros_train,train_spoken),axis=2)
zeros_test = np.zeros(test_written.shape)[:,:,:3,:]
test_x = np.concatenate((test_written,zeros_test,test_spoken),axis=2)
zeros_assignment = np.zeros(assignment_written.shape)[:,:,:3,:]
assignment_x = np.concatenate((assignment_written,zeros_assignment,assignment_spoken),axis=2)

# Model

In [279]:
from keras.callbacks import Callback
from sklearn.metrics import confusion_matrix, f1_score, precision_score, recall_score
class Metrics(Callback):

    def on_train_begin(self, logs={}):
        self.val_f1s = []
        self.val_recalls = []
        self.val_precisions = []

    def on_epoch_end(self, epoch, logs={}):
        val_predict = (np.asarray(self.model.predict(self.validation_data[0]))).round()
        val_targ = self.validation_data[1]
        _val_f1 = f1_score(val_targ, val_predict)
        self.val_f1s.append(_val_f1)
        print("— val_f1: %2f"%(_val_f1))
        return 0
    
metrics = Metrics()

## Model for combined images

In [280]:
written_input = Input(shape=(28,28+13+3,1), name='written_input')
x = Conv2D(50, kernel_size=(3, 3), activation='tanh')(written_input)
x = Conv2D(100, (3, 3), activation='tanh')(x)
x = MaxPooling2D(pool_size=(2,2))(x)
x = Dropout(0.25)(x)
x = Flatten()(x)
x = Dense(100, activation='relu')(x)
x = Dropout(0.5)(x)
output = Dense(1, activation='sigmoid', name='output')(x)
# output = Dense(10, activation='softmax')(x)

# written_model = Model(inputs=[written_input], outputs=[output1])
final_model = Model(inputs=[written_input], outputs=[output])

In [281]:
final_model.compile(loss=keras.losses.binary_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])

In [None]:
# all_train_data_x = np.concatenate((train_x,test_x),axis=0)
# all_train_data_y = np.concatenate((train_y,test_y.reshape(len(test_y),1)

In [None]:
final_model.fit(train_x,test_x,
          batch_size=100,
          epochs=50,
          verbose=1, 
          callbacks=[metrics],
          validation_data=(test_x, test_y))

Train on 73932 samples, validate on 9000 samples
Epoch 1/50

In [309]:
final_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
written_input (InputLayer)   (None, 28, 44, 1)         0         
_________________________________________________________________
conv2d_37 (Conv2D)           (None, 26, 42, 50)        500       
_________________________________________________________________
conv2d_38 (Conv2D)           (None, 24, 40, 100)       45100     
_________________________________________________________________
max_pooling2d_19 (MaxPooling (None, 12, 20, 100)       0         
_________________________________________________________________
dropout_37 (Dropout)         (None, 12, 20, 100)       0         
_________________________________________________________________
flatten_19 (Flatten)         (None, 24000)             0         
_________________________________________________________________
dense_19 (Dense)             (None, 100)               2400100   
__________

In [314]:
pre_test_y = final_model.predict(test_x).round()

In [315]:
f1_score(pre_test_y,test_y)

0.9957343083485679

# BELOW NOT UNNUSED 

## Submodel Spoken digits

In [99]:
# spoken_input = Input(shape=(fragment_length,13,1), name='spoken_input')
# x = Conv2D(32, kernel_size=(3, 3), activation='tanh')(spoken_input)
# x = Conv2D(64, (3, 3), activation='relu')(x)
# x = MaxPooling2D(pool_size=(2,2))(x)
# x = Dropout(0.25)(x)
# x = Flatten()(x)
# x = Dense(125, activation='relu',kernel_initializer='uniform')(x)
# output2 = Dropout(0.5)(x)
# # output = Dense(10, activation='softmax')(x)

# spoken_model = Model(inputs=[spoken_input], outputs=[output2])

## Last layer

In [100]:
x = keras.layers.concatenate([spoken_model.output, written_model.output])
output = Dense(1, activation='sigmoid', name='output')(x)

final_model = Model(inputs=[spoken_model.input,written_model.input], outputs=[output])

In [101]:
final_model.compile(loss=keras.losses.binary_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])

In [1]:
final_model.fit([train_spoken,train_written], train_y,
          batch_size=200,
          epochs=100,
          verbose=1, 
          validation_data=([test_spoken,test_written], test_y))