In [1]:
# creating paths to src and data folders in the repo
import sys
import pathlib
src_path = pathlib.Path().absolute().parent.parent / "src"
data_path = pathlib.Path().absolute().parent.parent / "data"

# train test split paths
train_path = data_path / 'binary_tts/train'
test_path = data_path / 'binary_tts/test'
val_path = data_path / 'binary_tts/val'

# add src path to sys.path so it is searched in import statements
sys.path.append(str(src_path))

# basic imports for data manipulation and visualization
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# silence max image size warning
from PIL import Image
Image.MAX_IMAGE_PIXELS = 1000000000 

# import modeling packages
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# modeling metrics
from sklearn.metrics import classification_report, confusion_matrix, plot_confusion_matrix

changes to data generators:
    
    interpolation 'nearest' -> 'bicubic'
    
    target_size 150x150 -> 200x200
    

In [2]:
train_generator = ImageDataGenerator().flow_from_directory(str(train_path),
                                                           target_size=(300, 300),
                                                           batch_size=20,
                                                           class_mode='binary',
                                                           interpolation='bicubic')
test_generator = ImageDataGenerator().flow_from_directory(str(test_path),
                                                          target_size=(300, 300),
                                                          batch_size=20,
                                                          class_mode='binary',
                                                          interpolation='bicubic',
                                                          shuffle=False)
val_generator = ImageDataGenerator().flow_from_directory(str(val_path),
                                                         target_size=(300, 300),
                                                         batch_size=20,
                                                         class_mode='binary',
                                                         interpolation='bicubic',
                                                         shuffle=False)

Found 6000 images belonging to 2 classes.
Found 200 images belonging to 2 classes.
Found 200 images belonging to 2 classes.


changes to model architecture:
    
    input_shape 150x150 -> 200x200
    
    num conv2d layers 2 -> 1
    
    output num_nodes 2 -> 1
    
    output_activation 'softmax -> sigmoid'
    
    MaxPooling pool_size 2x2 -> 5x5

In [9]:
model = models.Sequential()
model.add(layers.Conv2D(35, (3, 3), activation='relu', input_shape=(400, 400, 3)))
model.add(layers.MaxPooling2D((5, 5)))
model.add(layers.Flatten())
model.add(layers.Dense(40, activation='relu'))
model.add(layers.Dropout(.2))
model.add(layers.Dense(40, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [10]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_2 (Conv2D)            (None, 398, 398, 35)      980       
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 79, 79, 35)        0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 218435)            0         
_________________________________________________________________
dense_6 (Dense)              (None, 40)                8737440   
_________________________________________________________________
dropout_2 (Dropout)          (None, 40)                0         
_________________________________________________________________
dense_7 (Dense)              (None, 40)                1640      
_________________________________________________________________
dense_8 (Dense)              (None, 1)                

changes to fit:
    
    epochs 10 -> 5
    
    steps_per_epoch 200 -> 300

In [11]:
model.fit(
        train_generator,
        steps_per_epoch=300,
        epochs=6,
        validation_data=test_generator,
        validation_steps=10)

Epoch 1/6


InvalidArgumentError:  Input to reshape is a tensor with 2436700 values, but the requested shape requires a multiple of 218435
	 [[node sequential_2/flatten_2/Reshape (defined at <ipython-input-11-41fa16bddcd3>:6) ]] [Op:__inference_train_function_2700]

Function call stack:
train_function


In [None]:
conf_mat = confusion_matrix(test_generator.classes, model.predict_classes(test_generator))

class_names = list(test_generator.class_indices.keys())

fig, ax = plt.subplots(figsize=(8,6))
model_10_path = pathlib.Path().absolute().parent.parent / 'reports/figures/mod_10_heatmap.png'
ax.set_ylim([0,1])
sns.heatmap(conf_mat, xticklabels=class_names, yticklabels=class_names, ax=ax, square=True, annot=True, cmap='Blues')
plt.savefig(model_10_path)

In [None]:
accuracies = []
for i in range(0, 2):
    acc = conf_mat[i][i] / sum(conf_mat[i])
    accuracies.append(acc)
accuracies

In [None]:
test_predictions = model.predict(test_generator)

In [None]:
test_predictions

In [None]:
test_generator.classes

In [None]:
conf_mat

In [None]:
class_predictions = model.predict_classes(test_generator)

In [None]:
sum(class_predictions)

In [None]:
conf_mat = confusion_matrix(test_generator.classes, class_predictions)


sns.heatmap(conf_mat, xticklabels=class_names, yticklabels=class_names, ax=ax, square=True, annot=True, cmap='Blues')

In [None]:
plt.show()

In [None]:
conf_mat = confusion_matrix(test_generator.classes, class_predictions)

class_names = list(test_generator.class_indices.keys())

fig, ax = plt.subplots(figsize=(8,6))
model_10_path = pathlib.Path().absolute().parent.parent / 'reports/figures/mod_10_heatmap.png'
ax.set_ylim([0,2])
sns.heatmap(conf_mat,
            xticklabels=class_names,
            yticklabels=class_names,
            ax=ax,
            annot=True,
            cmap='Blues')
plt.savefig(model_10_path)