In [None]:
%matplotlib inline

import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 75

import sys
sys.path.append('../scripts')

from btsc_dataset import load_full_dataset, split_images_and_labels, resize_images
from utils import train_validation_test_split

import numpy as np

In [None]:
# Effect of increasing image size from 32->64: No accuracy change. Significantly longer train times.
# Effect of increasing image size from 32->16: ~3% accuracy decrease. Significantly faster train times.
# from sklearn.preprocessing import normalize

resized_data = resize_images(load_full_dataset(), size=32)
(images, labels) = split_images_and_labels(resized_data)
unique_labels = set(labels)

print('Images:', images.shape)
print('Labels:', labels.shape)
print('Total Unique Labels:', len(unique_labels))

In [None]:

# [print(x[0][0].shape) for x in images]

def normalize(img):
    new_img = img.copy()
    
    num_channels = 3
    for channel in range(num_channels):
        total = float(0)
        
        for row in img:
            for pixel in row:
                total += float(pixel[channel])

        height  = len(img)
        width   = len(img[0])
        average = total / (width * height)

        for row_idx in range(height):
            for pixel_idx in range(width):
#                 print('GOT:', img[row_idx][pixel_idx])
                new_img[row_idx][pixel_idx][channel] = img[row_idx][pixel_idx][channel] // average
    
    return new_img

def do_normalize(img, idx):
#     print('Normalizing:', idx)
    return normalize(img)
    
images_2 = np.array([do_normalize(img, idx) for (idx, img) in enumerate(images)])
# images_2 = np.array([normalize(x) for x in images])
# images_2
print('DONE:', images_2.shape)
print(images_2[0][0])

In [None]:
print('Hello?...')
def plot_images_and_labels(images_np, labels_np):
    images = images_np.tolist()
    labels = labels_np.tolist()
    
    def first_image_with_label(label):
        x = images[labels.index(label)]
        print('X:', images)
        return x
    
    plt.figure(figsize=(15, 15))
    
    print('ABOVE')
    unique_labels = sorted(set(labels))
    for (idx, label) in enumerate(unique_labels, 1):
        print('WITHIN:', idx, label)
        plt.subplot(8, 8, idx)
        plt.axis('off')
        plt.title(f'{label} ({labels.count(label)})')
        plt.imshow(first_image_with_label(label))

    plt.show()
    
plot_images_and_labels(images_2, labels)

In [None]:
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical

integer_labels= LabelEncoder().fit_transform(labels)
ont_hot_labels = to_categorical(integer_labels)

X = images
y = ont_hot_labels

X_train, X_valid, X_test, \
y_train, y_valid, y_test = train_validation_test_split(X, y, random_state=42)

print('X:', X.shape)
print('y:', y.shape)
print()
print('X_train:', X_train.shape)
print('y_train:', y_train.shape)
print()
print('X_valid:', X_valid.shape)
print('y_valid:', y_valid.shape)
print()
print('X_test:', X_test.shape)
print('y_test:', y_test.shape)

In [None]:
from keras.models import *
from keras.layers import *

model = Sequential()

# Effect of increasing filters from 16 to 32: ~1% accuracy decrease
# Effect of increasing filters from 16 to 64: ~1% accuracy increase
model.add(Conv2D(64, (3, 3), input_shape=X[0].shape, activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Effect of adding this layer: no accuracy change
# model.add(Dropout(0.2))

# Effect of adding this layer: No accuracy change (~95-96%)
# model.add(Conv2D(64, (3, 3), activation='relu'))
# model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
# Effect of increasing nodes from 64->128: No accuracy change (~95-96%).
# Effect of decreasing nodes from 64->32: ~10% accuracy decrease (~85%).
model.add(Dense(64, activation='relu'))
model.add(Dense(len(unique_labels), activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
history = model.fit(X_train, y_train, validation_data=(X_valid, y_valid), batch_size=50, epochs=10, verbose=1)

In [None]:
# Plot training & validation accuracies per epoch
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('Model Accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['Train', 'Validation'], loc='lower right')
plt.show()

In [None]:
# Plot training & validation losses per epoch
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['Train', 'Validation'], loc='upper right')
plt.show()

In [None]:
# loss, acc = model.evaluate(X_test, y_test, verbose=1)

# print('Test Results:')
# print('  Loss     =', loss)
# print('  Accuracy =', acc)