In [1]:
# creating paths to src and data folders in the repo
import sys
import pathlib
src_path = pathlib.Path().absolute().parent.parent / "src"
data_path = pathlib.Path().absolute().parent.parent / "data"

# train test split paths
train_path = data_path / 'tts/train'
test_path = data_path / 'tts/test'
val_path = data_path / 'tts/val'

# add src path to sys.path so it is searched in import statements
sys.path.append(str(src_path))

# basic imports for data manipulation and visualization
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# silence max image size warning
from PIL import Image
Image.MAX_IMAGE_PIXELS = 1000000000 

# import modeling packages
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# modeling metrics
from sklearn.metrics import classification_report, confusion_matrix

changes to data generators:
    
    train batch_size 20

In [2]:
train_generator = ImageDataGenerator().flow_from_directory(str(train_path),
                                                           target_size=(150, 150),
                                                           batch_size=20,
                                                           class_mode='categorical')
test_generator = ImageDataGenerator().flow_from_directory(str(test_path),
                                                          target_size=(150, 150),
                                                          batch_size=50,
                                                          class_mode='categorical',
                                                          shuffle=False)
val_generator = ImageDataGenerator().flow_from_directory(str(val_path),
                                                          target_size=(150, 150),
                                                          batch_size=109,
                                                          class_mode='categorical',
                                                          shuffle=False)

Found 4000 images belonging to 5 classes.
Found 500 images belonging to 5 classes.
Found 545 images belonging to 5 classes.


changes to model architecture:
        
    1st dense layer dropout 0 -> .25
    
    dense layers 1 -> 2

matching the new number of classes

In [3]:
model = models.Sequential()
model.add(layers.Conv2D(30, (3, 3), activation='relu', input_shape=(150, 150, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(30, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(40, activation='relu'))
model.add(layers.Dropout(.25))
model.add(layers.Dense(40, activation='relu'))
model.add(layers.Dense(5, activation='softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [4]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 148, 148, 30)      840       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 74, 74, 30)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 72, 72, 30)        8130      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 36, 36, 30)        0         
_________________________________________________________________
flatten (Flatten)            (None, 38880)             0         
_________________________________________________________________
dense (Dense)                (None, 40)                1555240   
_________________________________________________________________
dropout (Dropout)            (None, 40)                0

changes to fit:
    
    epochs 4

In [5]:
model.fit(
        train_generator,
        steps_per_epoch=200,
        epochs=4,
        validation_data=test_generator,
        validation_steps=10)

Epoch 1/4

  "Palette images with Transparency expressed in bytes should be "


Epoch 2/4
Epoch 3/4
Epoch 4/4


<tensorflow.python.keras.callbacks.History at 0x7ff2720c7358>

In [6]:
confusion_matrix(test_generator.classes, np.argmax(model.predict(test_generator), axis=1))

array([[14, 11, 56,  7, 12],
       [ 8, 10, 64,  5, 13],
       [ 3,  4, 75,  9,  9],
       [12,  7, 57,  9, 15],
       [ 5,  8, 51,  6, 30]])

not great but at least its not guessing the same class everytime now

In [10]:
test_generator.filepaths

['/Users/matthewwilliamson/Documents/Flatiron/capstone_project/FlatironCapstone/data/tts/test/digital/jewyp3.jpg',
 '/Users/matthewwilliamson/Documents/Flatiron/capstone_project/FlatironCapstone/data/tts/test/digital/jf1f47.jpg',
 '/Users/matthewwilliamson/Documents/Flatiron/capstone_project/FlatironCapstone/data/tts/test/digital/jf2awm.jpg',
 '/Users/matthewwilliamson/Documents/Flatiron/capstone_project/FlatironCapstone/data/tts/test/digital/jfe4ad.png',
 '/Users/matthewwilliamson/Documents/Flatiron/capstone_project/FlatironCapstone/data/tts/test/digital/jfwtdz.jpg',
 '/Users/matthewwilliamson/Documents/Flatiron/capstone_project/FlatironCapstone/data/tts/test/digital/jfwvru.jpg',
 '/Users/matthewwilliamson/Documents/Flatiron/capstone_project/FlatironCapstone/data/tts/test/digital/jfx7gj.jpg',
 '/Users/matthewwilliamson/Documents/Flatiron/capstone_project/FlatironCapstone/data/tts/test/digital/jg21qg.jpg',
 '/Users/matthewwilliamson/Documents/Flatiron/capstone_project/FlatironCapstone/