# This was originally located in the main directory outside src/

In [1]:
from keras.models import Sequential
from keras.layers import Convolution2D, MaxPooling2D, Dense, Dropout, Activation, Flatten	
from keras.utils import to_categorical
from collections import defaultdict
import numpy as np
import math
import matplotlib.pyplot as plt
from src import self
%matplotlib inline

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
signfile = open("data/signs.txt", "r")
signtoid = defaultdict()
idtosign = defaultdict()
for i, line in enumerate(signfile):
    signtoid[line.split('\n')[0]] = i
    idtosign[i] = line.split('\n')[0]

In [3]:
ydata = self.tsvtodata("data/data.tsv", signtoid)
xdata = self.imgstodata("data/", ydata)

In [6]:
X_train = []
y_train = []
X_test = []
y_test = []
X_val = []
y_val = []
test_rate = 0.5
validation_rate = 0.3
# training data will include 'omnglt' as a valuable resource, so rates exclude that
for i, j in enumerate(np.random.permutation(len(xdata))):
    if ydata[i]['source'] == 'omnglt':
        X_train.append(xdata[i])
        y_train.append(ydata[i])
    elif j < np.floor(len(xdata) * test_rate):
        X_test.append(xdata[i])
        y_test.append(ydata[i])
    elif j > np.floor(len(xdata) * (1.0 - validation_rate)):
        X_val.append(xdata[i])
        y_val.append(ydata[i])
    else:
        X_train.append(xdata[i])
        y_train.append(ydata[i])
X_train = np.array(X_train)
y_train = np.array(y_train)
X_test = np.array(X_test)
y_test = np.array(y_test)
X_val = np.array(X_val)
y_val = np.array(y_val)
print('Train data shape: ', X_train.shape)
print('Train labels shape: ', y_train.shape)
print('Validation data shape: ', X_val.shape)
print('Validation labels shape: ', y_val.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)

Train data shape:  (221, 64, 64, 1)
Train labels shape:  (221,)
Validation data shape:  (118, 64, 64, 1)
Validation labels shape:  (118,)
Test data shape:  (196, 64, 64, 1)
Test labels shape:  (196,)


In [7]:
models = defaultdict()
fields = ['base', 'anusvara', 'visarga', 'vowel', 'adjunct']
for field in fields:
    num_classes = len(signtoid) if field in ['base', 'vowel', 'adjunct'] else 2
    model = Sequential()
    model.add(Convolution2D(32, (5, 5), activation='relu', input_shape = (64, 64, 1)))
    model.add(MaxPooling2D(pool_size=(2,2), strides=(2, 2)))
    model.add(Convolution2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Flatten())
    model.add(Dense(1024, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    model.fit(X_train, to_categorical([entry[field] for entry in y_train], num_classes = num_classes), batch_size=50, epochs=20, verbose=1)
    score = model.evaluate(X_test, to_categorical([entry[field] for entry in y_test], num_classes = num_classes), verbose=0)
    print(score)
    models[field] = model

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[3.740842880034933, 0.5204081632653061]
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[0.08120135282972894, 0.9846938775510204]
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[1.1920928955078125e-07, 1.0]
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20


Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[1.6635672630449967, 0.8571428571428571]


In [8]:
for field in fields:
    models[field].save('src/models/{}.h5'.format(field))