In [1]:
# creating paths to src and data folders in the repo
import sys
import pathlib
src_path = pathlib.Path().absolute().parent.parent / "src"
data_path = pathlib.Path().absolute().parent.parent / "data"
dump_path = data_path / 'initial_dump'

# train test split paths
train_path = data_path / 'fsm_tts/train'
test_path = data_path / 'fsm_tts/test'

# add src path to sys.path so it is searched in import statements
sys.path.append(str(src_path))

# basic imports for data manipulation and visualization
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# silence max image size warning
from PIL import Image
Image.MAX_IMAGE_PIXELS = 1000000000 

# import modeling packages
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# modeling metrics
from sklearn.metrics import classification_report, confusion_matrix

changes to data generators:
    
    test_generator shuffle

In [2]:
train_generator = ImageDataGenerator().flow_from_directory(str(train_path),
                                                           target_size=(150, 150),
                                                           batch_size=100,
                                                           class_mode='categorical')
test_generator = ImageDataGenerator().flow_from_directory(str(test_path),
                                                          target_size=(150, 150),
                                                          batch_size=33,
                                                          class_mode='categorical',
                                                          shuffle=False)

Found 5700 images belonging to 6 classes.
Found 363 images belonging to 6 classes.


changes to model architecture:
    
    num conv2d layers 2 -> 3
    
    1st dense layer dropout 0 -> .25

increasing complexity showed positive results, testing if more complexity continues improving predictions

In [7]:
model = models.Sequential()
model.add(layers.Conv2D(30, (3, 3), activation='relu', input_shape=(150, 150, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(30, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(40, activation='relu'))
model.add(layers.Dropout(.25))
model.add(layers.Dense(6, activation='softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [8]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_3 (Conv2D)            (None, 148, 148, 30)      840       
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 74, 74, 30)        0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 72, 72, 30)        8130      
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 36, 36, 30)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 38880)             0         
_________________________________________________________________
dense_2 (Dense)              (None, 40)                1555240   
_________________________________________________________________
dropout_1 (Dropout)          (None, 40)               

#### changes to fit:
    
    epochs 3 -> 4

In [9]:
model.fit(
        train_generator,
        steps_per_epoch=57,
        epochs=4,
        validation_data=test_generator,
        validation_steps=11)

Epoch 1/4

  "Palette images with Transparency expressed in bytes should be "


Epoch 2/4
Epoch 3/4
Epoch 4/4


<tensorflow.python.keras.callbacks.History at 0x7ff7999ff7b8>

In [10]:
confusion_matrix(test_generator.classes, np.argmax(model.predict(test_generator), axis=1))

array([[58,  1,  0,  0,  0,  0],
       [59,  0,  0,  0,  0,  0],
       [68,  0,  0,  0,  0,  0],
       [59,  0,  0,  0,  0,  0],
       [59,  0,  0,  0,  0,  0],
       [59,  0,  0,  0,  0,  0]])