In [18]:
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image

%matplotlib inline

import tensorflow as tf
from tensorflow import keras

from keras.preprocessing.image import ImageDataGenerator

from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight

from keras.preprocessing import image

from keras.models import Sequential

from keras.metrics import TruePositives, FalsePositives, TrueNegatives, FalseNegatives
from keras.optimizers import SGD,RMSprop,adam

from tensorflow.keras import layers
#from keras.applications.vgg16 import VGG16

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [2]:
data = pd.read_csv("C:\\Users\\tfurr\\OneDrive\\Documents\\School\\UChicago\\Spring 2023\\MSCA Capstone 1\\Code Files\\Working Labeling Checklist with Dummy Variables - Working Labeling Checklist with Dummy Variables.csv")

In [None]:
data.head()

In [None]:
data.ALLIGATOR.value_counts()

In [None]:
data.Full.value_counts()

In [3]:
data['CATEGORY 1'] = data['CATEGORY 1'].str.lower()

In [None]:
data['CATEGORY 1'].value_counts()

In [4]:
data['is_pass'] = data['CATEGORY 1'].astype(str).apply(lambda x: 1 if 'pass' in x else 0)

In [None]:
data.head()

In [5]:
data.is_pass.value_counts()

0    2842
1    2158
Name: is_pass, dtype: int64

In [6]:
data.is_pass = data.is_pass.map({0:'False', 1:'True'})

In [7]:
data.is_pass.value_counts()

False    2842
True     2158
Name: is_pass, dtype: int64

In [8]:
training, test_df = train_test_split(data, test_size=0.2, stratify=data['is_pass'])
train_df, val_df = train_test_split(training, test_size=0.15, stratify=training['is_pass'])

# Checking the proportions of True and False values in the 'Full' column for train and test sets
train_counts = train_df['is_pass'].value_counts(normalize=True)
test_counts = test_df['is_pass'].value_counts(normalize=True)
val_counts = val_df['is_pass'].value_counts(normalize=True)
print("Train set proportions:")
print(train_counts)
print("\nTest set proportions:")
print(test_counts)
print("\nVal set proportions:")
print(val_counts)

Train set proportions:
False    0.568529
True     0.431471
Name: is_pass, dtype: float64

Test set proportions:
False    0.568
True     0.432
Name: is_pass, dtype: float64

Val set proportions:
False    0.568333
True     0.431667
Name: is_pass, dtype: float64


In [9]:
image_dir = "C:\\Users\\tfurr\\OneDrive\\Documents\\School\\UChicago\\Spring 2023\\MSCA Capstone 1\\Code Files\\Photos_all\\"

datagen = ImageDataGenerator(
   # rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2)
    #horizontal_flip=True)

In [10]:
batch_size=32

train_generator = datagen.flow_from_dataframe(
    dataframe=train_df,
    directory=image_dir,
    x_col='ext',
    y_col='is_pass',
    target_size=(224,224),
    batch_size=batch_size,
    class_mode="binary")

val_generator = datagen.flow_from_dataframe(
    dataframe=val_df,
    directory=image_dir,
    x_col='ext',
    y_col='is_pass',
    target_size=(224,224),
    batch_size=batch_size,
    class_mode="binary")

test_generator = datagen.flow_from_dataframe(
    dataframe=test_df,
    directory=image_dir,
    x_col='ext',
    y_col='is_pass',
    target_size=(224,224),
    batch_size=batch_size,
    class_mode="binary")

Found 3372 validated image filenames belonging to 2 classes.
Found 594 validated image filenames belonging to 2 classes.
Found 995 validated image filenames belonging to 2 classes.




In [11]:
input_dimension = (224, 224, 3)

metrics1 = [TruePositives(), FalsePositives(), TrueNegatives(), FalseNegatives()]

first_model = Sequential([
    layers.Resizing(224, 224),
    layers.Rescaling(1./255),
    layers.RandomFlip(mode="horizontal_and_vertical"),
    layers.RandomTranslation(height_factor=0.2, width_factor=0.2),
    layers.RandomRotation(0.2),
    layers.RandomContrast(factor=0.2),
    layers.RandomBrightness(factor=0.2),
    
    layers.Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_dimension),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Dropout(0.1),
    
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Dropout(0.15),
    layers.Flatten(),
    
    layers.Dense(250, activation='relu'),
    
    layers.Dense(1, activation='sigmoid')
])


first_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=metrics1)

In [12]:
history = first_model.fit(train_generator, epochs=4, validation_data=val_generator)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
  7/106 [>.............................] - ETA: 10:47 - loss: 0.6878 - true_positives: 0.0000e+00 - false_positives: 0.0000e+00 - true_negatives: 125.0000 - false_negatives: 99.0000

KeyboardInterrupt: 

In [13]:
final_metrics = history.history

print("Final Metrics:")
for metric_name, metric_values in final_metrics.items():
    print(f"{metric_name}: {metric_values[-1]}")

NameError: name 'history' is not defined

In [None]:
test_metrics = first_model.evaluate(test_generator)

In [None]:
names = first_model.metrics_names
values = test_metrics

print("Final Metrics:")
for metric_name, metric_value in zip(names, values):
    print(f"{metric_name}: {metric_value}")

# Model balanced with Class Weights

In [20]:
class_weights = compute_class_weight('balanced', classes=["False","True"], y=train_df['is_pass'])
class_weights = {0: class_weights[0], 1: class_weights[1]}

In [23]:
input_dimension = (224, 224, 3)

metrics1 = [TruePositives(), FalsePositives(), TrueNegatives(), FalseNegatives()]

balanced_model = Sequential([
    layers.Resizing(224, 224),
    layers.Rescaling(1./255),
    layers.RandomFlip(mode="horizontal_and_vertical"),
    layers.RandomTranslation(height_factor=0.2, width_factor=0.2),
    layers.RandomRotation(0.2),
    layers.RandomContrast(factor=0.2),
    layers.RandomBrightness(factor=0.2),
    
    layers.Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_dimension),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Dropout(0.25),
    
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    #layers.Dropout(0.5),
    layers.Flatten(),
    
    layers.Dense(128, activation='relu'),
    #layers.Dropout(0.5),
    
    layers.Dense(1, activation='sigmoid')
])


balanced_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=metrics1)



In [24]:
balanced_history = balanced_model.fit(train_generator, epochs=5, batch_size=32, validation_data=val_generator, class_weight=class_weights)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [51]:
1456/(1456+(0.5*(1916)))

0.6031483015741508

In [25]:
test_metrics_balanced = balanced_model.evaluate(test_generator)



In [None]:
names = balanced_model.metrics_names
values = test_metrics_balanced

print("Final Metrics:")
for metric_name, metric_value in zip(names, values):
    print(f"{metric_name}: {metric_value}")

In [26]:
preds = balanced_model.predict(test_generator)
train_preds = balanced_model.predict(train_generator)
val_preds = balanced_model.predict(val_generator)



In [34]:
preds

array([[0.50086755],
       [0.50007147],
       [0.49992794],
       [0.5008668 ],
       [0.49946502],
       [0.5012158 ],
       [0.4993006 ],
       [0.49887633],
       [0.49762788],
       [0.5009391 ],
       [0.4994395 ],
       [0.4965839 ],
       [0.4994374 ],
       [0.50081354],
       [0.5001744 ],
       [0.49912655],
       [0.5010349 ],
       [0.500105  ],
       [0.50121737],
       [0.49993333],
       [0.5004535 ],
       [0.5006805 ],
       [0.5003644 ],
       [0.5005371 ],
       [0.5012378 ],
       [0.4997031 ],
       [0.5002513 ],
       [0.5011231 ],
       [0.5008692 ],
       [0.50100744],
       [0.4988451 ],
       [0.50092995],
       [0.49741155],
       [0.501031  ],
       [0.5009427 ],
       [0.50126535],
       [0.4995309 ],
       [0.49962997],
       [0.49787784],
       [0.499427  ],
       [0.5005649 ],
       [0.5013076 ],
       [0.50083685],
       [0.5007735 ],
       [0.49640948],
       [0.49969417],
       [0.50100034],
       [0.500

In [35]:
threshold = preds.mean()
binary_predictions = [1 if prediction > threshold else 0 for prediction in train_preds]
binary_predictions

[0,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,


In [36]:
from sklearn.metrics import precision_score, recall_score, f1_score

precision = precision_score(train_generator.classes, binary_predictions)
recall = recall_score(train_generator.classes, binary_predictions)
f1 = f1_score(train_generator.classes, binary_predictions)

# Print the metrics
#print("Loss: {:.4f}".format(test_loss_airbags))
#print("Accuracy: {:.4f}".format(test_acc_airbags))
print("Precision: {:.4f}".format(precision))
print("Recall: {:.4f}".format(recall))
print("F1-Score: {:.4f}".format(f1))

Precision: 0.4321
Recall: 0.5440
F1-Score: 0.4816


# CHanging to probabilities

In [None]:
train_generator.class_indices

In [39]:
input_dimension = (224, 224, 3)

metrics1 = [TruePositives(), FalsePositives(), TrueNegatives(), FalseNegatives()]

soft_model = Sequential([
    layers.Resizing(224, 224),
    layers.Rescaling(1./255),
    #layers.RandomFlip(mode="horizontal_and_vertical"),
    #layers.RandomTranslation(height_factor=0.2, width_factor=0.2),
    #layers.RandomRotation(0.2),
    #layers.RandomContrast(factor=0.2),
    #layers.RandomBrightness(factor=0.2),
    
    layers.Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_dimension),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Dropout(0.25),
    
    #layers.Conv2D(64, (3, 3), activation='relu'),
    #layers.MaxPooling2D(pool_size=(2, 2)),
    #layers.Dropout(0.5),
    layers.Flatten(),
    
    layers.Dense(128, activation='relu'),
    #layers.Dropout(0.5),
    
    layers.Dense(1, activation='sigmoid')
])


soft_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=metrics1)

In [40]:
soft_hist = soft_model.fit(train_generator, epochs=5, batch_size=32, validation_data=val_generator)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [49]:
f1 = 564/(564+(.5*(340+892)))
f1

0.47796610169491527

In [41]:
soft_preds = soft_model.predict(test_generator)
soft_train_preds = soft_model.predict(train_generator)
soft_val_preds = soft_model.predict(val_generator)



In [50]:
soft_train_preds.mean()

0.46337956

In [43]:
threshold = soft_train_preds.mean()
binary_predictions_soft = [1 if prediction > threshold else 0 for prediction in soft_train_preds]
binary_predictions_soft

[0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 1,


In [44]:
from sklearn.metrics import precision_score, recall_score, f1_score

precision = precision_score(train_generator.classes, binary_predictions_soft)
recall = recall_score(train_generator.classes, binary_predictions_soft)
f1 = f1_score(train_generator.classes, binary_predictions_soft)

# Print the metrics
#print("Loss: {:.4f}".format(test_loss_airbags))
#print("Accuracy: {:.4f}".format(test_acc_airbags))
print("Precision: {:.4f}".format(precision))
print("Recall: {:.4f}".format(recall))
print("F1-Score: {:.4f}".format(f1))

Precision: 0.4324
Recall: 0.4918
F1-Score: 0.4602
