In [1]:
import sys

sys.path.append('../scripts')
from btsc_dataset import load_full_dataset, split_images_and_labels, resize_images
from utils import train_validation_test_split

In [2]:
# Effect of increasing image size from 32->64: No accuracy change. Significantly longer train times.
# Effect of increasing image size from 32->16: ~3% accuracy decrease. Significantly faster train times.
resized_data = resize_images(load_full_dataset(), size=32)
(images, labels) = split_images_and_labels(resized_data)
unique_labels = set(labels)

print('Images:', images.shape)
print('Labels:', labels.shape)
print('Total Unique Labels:', len(unique_labels))

Images: (7095, 32, 32, 3)
Labels: (7095,)
Total Unique Labels: 62


In [3]:
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical

integer_labels= LabelEncoder().fit_transform(labels)
ont_hot_labels = to_categorical(integer_labels)

X = images
y = ont_hot_labels

X_train, X_valid, X_test, \
y_train, y_valid, y_test = train_validation_test_split(X, y, random_state=42)

print('X:', X.shape)
print('y:', y.shape)
print()
print('X_train:', X_train.shape)
print('y_train:', y_train.shape)
print()
print('X_valid:', X_valid.shape)
print('y_valid:', y_valid.shape)
print()
print('X_test:', X_test.shape)
print('y_test:', y_test.shape)

Using TensorFlow backend.


X: (7095, 32, 32, 3)
y: (7095, 62)

X_train: (4257, 32, 32, 3)
y_train: (4257, 62)

X_valid: (1419, 32, 32, 3)
y_valid: (1419, 62)

X_test: (1419, 32, 32, 3)
y_test: (1419, 62)


In [4]:
from keras.models import *
from keras.layers import *

model = Sequential()

# Effect of increasing filters from 16 to 32: ~1% accuracy decrease
# Effect of increasing filters from 16 to 64: ~1% accuracy increase
model.add(Conv2D(64, (3, 3), input_shape=X[0].shape, activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Effect of adding this layer: no accuracy change
# model.add(Dropout(0.2))

# Effect of adding this layer: No accuracy change (~95-96%)
# model.add(Conv2D(64, (3, 3), activation='relu'))
# model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
# Effect of increasing nodes from 64->128: No accuracy change (~95-96%).
# Effect of decreasing nodes from 64->32: ~10% accuracy decrease (~85%).
model.add(Dense(64, activation='relu'))
model.add(Dense(len(unique_labels), activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [5]:
model.fit(X_train, y_train, batch_size=50, epochs=10, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x10791cfd0>

In [6]:
loss, acc = model.evaluate(X_valid, y_valid, verbose=1)

print('Validation Results:')
print('  Loss     =', loss)
print('  Accuracy =', acc)

Validation Results:
  Loss     = 0.19383730056253934
  Accuracy = 0.9527836506680925


In [7]:
# loss, acc = model.evaluate(X_test, y_test, verbose=1)

# print('Test Results:')
# print('  Loss     =', loss)
# print('  Accuracy =', acc)