In [1]:
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image

%matplotlib inline

import tensorflow as tf
from tensorflow import keras

from keras.preprocessing.image import ImageDataGenerator

from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight

from keras.preprocessing import image

from keras.models import Sequential

from keras.metrics import TruePositives, FalsePositives, TrueNegatives, FalseNegatives, BinaryAccuracy, Precision, Recall, Accuracy, AUC
from keras.optimizers import SGD,RMSprop,adam

from tensorflow.keras import layers
from keras.applications.vgg16 import VGG16

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [2]:
def get_metrics(history):
    mets = history.history
    
    for metric_name, metric_values in mets.items():
        print(f"{metric_name}: {metric_values[-1]}") 

In [3]:
data = pd.read_csv("C:\\Users\\tfurr\\Downloads\\Working Labeling Checklist - Sheet1.csv")
data.head()

Unnamed: 0,id,manifestid,documentid,Full,uploaddatetimejsonmetadata,Folder,File,LABEL,SIGN OUT,ALLIGATOR,CATEGORY 1,CATEGORY 2,CATEGORY 3,POOR QUALITY,NOTES,onpremfilepathjsonmetadata,Unnamed: 16,Unnamed: 17
0,EXAMPLE,EXAMPLE,EXAMPLE,EXAMPLE,EXAMPLE,EXAMPLE,EXAMPLE,Vogt,27-Apr,True,dunnage,strap,airbag,True,...,EXAMPLE,PROGRESS,
1,EXAMPLE,EXAMPLE,EXAMPLE,EXAMPLE,EXAMPLE,EXAMPLE,EXAMPLE,Vogt,27-Apr,False,restack,return to level,pass,False,...,EXAMPLE,5000,<-- Count
2,180426,43417541,83328644,TRUE,4/7/2023 7:25,328,644-0.jpg,Vogt,29-Apr,False,pass,,,False,,\\\\offtffs01\\Transflo\\Storage\\LHTRAILER\\C...,100.00%,<-- % Complete
3,180429,43418071,83328656,FALSE,4/7/2023 7:26,328,656-0.jpg,Vogt,29-Apr,False,dunnage,,,False,,\\\\offtffs01\\Transflo\\Storage\\LHTRAILER\\C...,,
4,180421,43416911,83328720,FALSE,4/7/2023 7:24,328,720-0.jpg,Vogt,29-Apr,False,pass,,,False,,\\\\offtffs01\\Transflo\\Storage\\LHTRAILER\\C...,,


In [4]:
# Get rid of first two rows
data = data[2:]
data = data[data['SIGN OUT'].notna()]
data = data[data['POOR QUALITY'].notna()]
data['ext'] = data['Folder'] + '\\' + data['File']

data.head()

Unnamed: 0,id,manifestid,documentid,Full,uploaddatetimejsonmetadata,Folder,File,LABEL,SIGN OUT,ALLIGATOR,CATEGORY 1,CATEGORY 2,CATEGORY 3,POOR QUALITY,NOTES,onpremfilepathjsonmetadata,Unnamed: 16,Unnamed: 17,ext
2,180426,43417541,83328644,True,4/7/2023 7:25,328,644-0.jpg,Vogt,29-Apr,False,pass,,,False,,\\\\offtffs01\\Transflo\\Storage\\LHTRAILER\\C...,100.00%,<-- % Complete,328\644-0.jpg
3,180429,43418071,83328656,False,4/7/2023 7:26,328,656-0.jpg,Vogt,29-Apr,False,dunnage,,,False,,\\\\offtffs01\\Transflo\\Storage\\LHTRAILER\\C...,,,328\656-0.jpg
4,180421,43416911,83328720,False,4/7/2023 7:24,328,720-0.jpg,Vogt,29-Apr,False,pass,,,False,,\\\\offtffs01\\Transflo\\Storage\\LHTRAILER\\C...,,,328\720-0.jpg
5,180420,43416841,83328779,True,4/7/2023 7:24,328,779-0.jpg,Vogt,29-Apr,True,airbag,,,False,,\\\\offtffs01\\Transflo\\Storage\\LHTRAILER\\C...,,,328\779-0.jpg
6,180422,43416961,83328780,True,4/7/2023 7:24,328,780-0.jpg,Vogt,29-Apr,False,pass,,,False,,\\\\offtffs01\\Transflo\\Storage\\LHTRAILER\\C...,,,328\780-0.jpg


In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4996 entries, 2 to 5001
Data columns (total 19 columns):
 #   Column                      Non-Null Count  Dtype 
---  ------                      --------------  ----- 
 0   id                          4996 non-null   object
 1   manifestid                  4996 non-null   object
 2   documentid                  4996 non-null   object
 3   Full                        4996 non-null   object
 4   uploaddatetimejsonmetadata  4996 non-null   object
 5   Folder                      4996 non-null   object
 6   File                        4996 non-null   object
 7   LABEL                       4996 non-null   object
 8   SIGN OUT                    4996 non-null   object
 9   ALLIGATOR                   4970 non-null   object
 10  CATEGORY 1                  4865 non-null   object
 11  CATEGORY 2                  493 non-null    object
 12  CATEGORY 3                  65 non-null     object
 13  POOR QUALITY                4996 non-null   obje

In [None]:
# Check how many everyone has labeled
data[data['SIGN OUT'].notna()].LABEL.value_counts(normalize=True)

In [None]:
data[data['SIGN OUT'].notna()].LABEL.value_counts()

In [None]:
data.groupby(['LABEL', 'SIGN OUT']).size().reset_index(name='Count').sort_values('Count', ascending=False)

In [None]:
data['SIGN OUT'].value_counts()

In [None]:
data.ALLIGATOR.value_counts()

In [None]:
data.Full.value_counts()

In [None]:
# Get rid of case problems
data['CATEGORY 1'] = data['CATEGORY 1'].str.lower()
data['CATEGORY 1'].value_counts()

In [None]:
# Get rid of case problems
data['CATEGORY 2'] = data['CATEGORY 2'].str.lower()
data['CATEGORY 2'].value_counts()

In [None]:
# Get rid of case problems
data['CATEGORY 3'] = data['CATEGORY 3'].str.lower()
data['CATEGORY 3'].value_counts()

In [None]:
# Create a new column that shows if a picture is a duplicate of another
data['DUPLICATE'] = data['CATEGORY 1'].str.contains('duplicate', case=False, regex = False)
data.DUPLICATE.value_counts()

In [None]:
data['POOR QUALITY'].value_counts()

In [None]:
# Creates a df for those that are poor quality, high quality, alligator and not alligator freight and full and not full
poor_df = data[data['POOR QUALITY'] == True]
good_df = data[data['POOR QUALITY'] == False]

alligator_df = data[data['ALLIGATOR'] == True]
non_alligator_df = data[data['ALLIGATOR'] == False]

full_df = data[data['Full'] == 'TRUE']
not_full_df = data[data['Full'] == 'FALSE']

In [None]:
print(poor_df.shape)
print(good_df.shape)

In [None]:
print(alligator_df.shape)
print(non_alligator_df.shape)

In [None]:
print(full_df.shape)
print(not_full_df.shape)

In [None]:
# Pictures that show there needs to be an airbag
airbag_df = data[data['CATEGORY 1'].str.contains('airbag', case=False, regex=False) | 
                 data['CATEGORY 2'].str.contains('airbag', case=False, regex=False) |
                 data['CATEGORY 3'].str.contains('airbag', case=False, regex=False)]

# Pictures that don't need airbag
non_airbag_df = data[~data.index.isin(airbag_df.index)]

In [None]:
airbag_df.shape

In [None]:
non_airbag_df.shape

In [None]:
# Pictures that show there needs to be a return to level
level_df = data[data['CATEGORY 1'].str.contains('level', case=False, regex=False) | 
                 data['CATEGORY 2'].str.contains('level', case=False, regex=False) |
                 data['CATEGORY 3'].str.contains('level', case=False, regex=False)]

# Pictures that don't need return to level
non_level_df = data[~data.index.isin(level_df.index)]

In [None]:
level_df.shape

In [None]:
non_level_df.shape

In [None]:
# Pictures that show there needs to be a restack
restack_df = data[data['CATEGORY 1'].str.contains('restack', case=False, regex=False) | 
                 data['CATEGORY 2'].str.contains('restack', case=False, regex=False) |
                 data['CATEGORY 3'].str.contains('restack', case=False, regex=False)]

# Pictures that don't need a restack
non_restack_df = data[~data.index.isin(restack_df.index)]

In [None]:
restack_df.shape

In [None]:
non_restack_df.shape

In [None]:
# Pictures that pass
pass_df = data[data['CATEGORY 1'].str.contains('pass', case=False, regex=False) | 
                 data['CATEGORY 2'].str.contains('pass', case=False, regex=False) |
                 data['CATEGORY 3'].str.contains('pass', case=False, regex=False)]

# Pictures that don't pass
non_pass_df = data[~data.index.isin(pass_df.index)]

In [None]:
pass_df.shape

In [None]:
non_pass_df.shape

# Model for Poor quality

In [6]:

# Assuming your dataframe is called 'df'
# Splitting the dataframe into train and test sets while stratifying based on 'Full' column
training, test_df = train_test_split(data, test_size=0.2, stratify=data['POOR QUALITY'])
train_df, val_df = train_test_split(training, test_size=0.15, stratify=training['POOR QUALITY'])

# Checking the proportions of True and False values in the 'Full' column for train and test sets
train_counts = train_df['POOR QUALITY'].value_counts(normalize=True)
test_counts = test_df['POOR QUALITY'].value_counts(normalize=True)
val_counts = val_df['POOR QUALITY'].value_counts(normalize=True)
print("Train set proportions:")
print(train_counts)
print("\nTest set proportions:")
print(test_counts)
print("\nVal set proportions:")
print(val_counts)

Train set proportions:
False    0.885453
True     0.114547
Name: POOR QUALITY, dtype: float64

Test set proportions:
False    0.886
True     0.114
Name: POOR QUALITY, dtype: float64

Val set proportions:
False    0.885
True     0.115
Name: POOR QUALITY, dtype: float64


In [7]:
train_df['POOR QUALITY'] = train_df['POOR QUALITY'].map({True:'True', False:'False'})
val_df['POOR QUALITY'] = val_df['POOR QUALITY'].map({True:'True', False:'False'})
test_df['POOR QUALITY'] = test_df['POOR QUALITY'].map({True:'True', False:'False'})

In [8]:
image_dir = "C:\\Users\\tfurr\\OneDrive\\Documents\\School\\UChicago\\Spring 2023\\MSCA Capstone 1\\Code Files\\Photos_all\\"

datagen = ImageDataGenerator(
   # rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2)
    #horizontal_flip=True)

In [9]:
batch_size=32

train_generator = datagen.flow_from_dataframe(
    dataframe=train_df,
    directory=image_dir,
    x_col='ext',
    y_col='POOR QUALITY',
    target_size=(224,224),
    batch_size=batch_size,
    class_mode="binary")

val_generator = datagen.flow_from_dataframe(
    dataframe=val_df,
    directory=image_dir,
    x_col='ext',
    y_col='POOR QUALITY',
    target_size=(224,224),
    batch_size=batch_size,
    class_mode="binary")

test_generator = datagen.flow_from_dataframe(
    dataframe=test_df,
    directory=image_dir,
    x_col='ext',
    y_col='POOR QUALITY',
    target_size=(224,224),
    batch_size=batch_size,
    class_mode="binary")

Found 3369 validated image filenames belonging to 2 classes.
Found 594 validated image filenames belonging to 2 classes.




Found 994 validated image filenames belonging to 2 classes.




In [10]:
input_dimension = (224, 224, 3)

metrics1 = [TruePositives(), FalsePositives(), TrueNegatives(), FalseNegatives(), BinaryAccuracy(), Precision(), Recall(), Accuracy(), AUC()]

model = Sequential([
    layers.Resizing(224, 224),
    layers.Rescaling(1./255),
    layers.RandomFlip(mode="horizontal_and_vertical"),
    layers.RandomTranslation(height_factor=0.2, width_factor=0.2),
    layers.RandomRotation(0.2),
    layers.RandomContrast(factor=0.2),
    layers.RandomBrightness(factor=0.2),
    
    layers.Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_dimension),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Dropout(0.25),
    
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Dropout(0.5),
    layers.Flatten(),
    
    layers.Dense(250, activation='relu'),
    layers.Dropout(0.5),
    
    layers.Dense(1, activation='sigmoid')
])


model.compile(optimizer='adam', loss='binary_crossentropy', metrics=metrics1)

In [11]:
history = model.fit(train_generator, epochs=2, validation_data=val_generator)

Epoch 1/2
Epoch 2/2


In [12]:
final_metrics = history.history

print("Final Metrics:")
for metric_name, metric_values in final_metrics.items():
    print(f"{metric_name}: {metric_values[-1]}")

Final Metrics:
loss: 0.412374883890152
true_positives: 0.0
false_positives: 0.0
true_negatives: 2982.0
false_negatives: 387.0
binary_accuracy: 0.8851290941238403
precision: 0.0
recall: 0.0
accuracy: 0.0
auc: 0.4675659239292145
val_loss: 0.46460995078086853
val_true_positives: 0.0
val_false_positives: 0.0
val_true_negatives: 525.0
val_false_negatives: 69.0
val_binary_accuracy: 0.8838383555412292
val_precision: 0.0
val_recall: 0.0
val_accuracy: 0.0
val_auc: 0.5170462131500244


In [13]:
test_metrics = model.evaluate(test_generator)



In [14]:
names = model.metrics_names
values = test_metrics

print("Final Metrics:")
for metric_name, metric_value in zip(names, values):
    print(f"{metric_name}: {metric_value}")

Final Metrics:
loss: 0.44988328218460083
true_positives: 0.0
false_positives: 0.0
true_negatives: 880.0
false_negatives: 114.0
binary_accuracy: 0.8853118419647217
precision: 0.0
recall: 0.0
accuracy: 0.0
auc: 0.5871361494064331


## Model with additional conv2d, batch normalization and regularizer

In [None]:

input_dimension = (224, 224, 3)

model1 = Sequential([
    layers.Resizing(224, 224),
    layers.Rescaling(1./255),
    layers.RandomFlip(mode="horizontal_and_vertical"),
    layers.RandomTranslation(height_factor=0.2, width_factor=0.2),
    layers.RandomRotation(0.2),
    layers.RandomContrast(factor=0.2),
    layers.RandomBrightness(factor=0.2),
    
    layers.Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_dimension),
    layers.BatchNormalization(),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Dropout(0.25),
    
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D(pool_size=(2,2)),
    layers.Dropout(0.5),
    layers.Flatten(),
    
    layers.Dense(250, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
    layers.Dropout(0.5),
    
    layers.Dense(1, activation='sigmoid')
])


model1.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [None]:
model1.fit(train_generator, epochs=5, validation_data=val_generator)

In [None]:
model1.evaluate(test_generator)

# Model with different weights on the positives

In [15]:
input_dimension = (224, 224, 3)

metrics1 = [TruePositives(), FalsePositives(), TrueNegatives(), FalseNegatives(), BinaryAccuracy(), Precision(), Recall(), Accuracy(), AUC()]

weighted_model = Sequential([
    layers.Resizing(224, 224),
    layers.Rescaling(1./255),
    layers.RandomFlip(mode="horizontal_and_vertical"),
    layers.RandomTranslation(height_factor=0.2, width_factor=0.2),
    layers.RandomRotation(0.2),
    layers.RandomContrast(factor=0.2),
    layers.RandomBrightness(factor=0.2),
    
    layers.Conv2D(32, kernel_size=(3,3), activation='relu', input_shape=input_dimension),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Dropout(0.25),
    
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Dropout(0.5),
    layers.Flatten(),
    
    layers.Dense(250, activation='relu'),
    layers.Dropout(0.5),
    
    layers.Dense(1, activation='sigmoid')
    
])

weighted_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=metrics1)

In [16]:
neg, pos = np.bincount(data['POOR QUALITY'])
total = neg + pos

In [17]:
weight_for_False = (1 / neg) * (total / 2.0)
weight_for_True = (1 / pos/2) * (total / 2.0)

weight_for_False, weight_for_True

(0.5646473779385172, 2.1835664335664338)

In [18]:
class_weights = {0:weight_for_False, 1: weight_for_True}

In [19]:
weighted_history = weighted_model.fit(train_generator, epochs=2, validation_data=val_generator, class_weight=class_weights)

Epoch 1/2
Epoch 2/2


In [20]:
weighted_final_metrics = weighted_history.history

print("Final Metrics:")
for metric_name, metric_values in weighted_final_metrics.items():
    print(f"{metric_name}: {metric_values[-1]}")

Final Metrics:
loss: 0.4904424250125885
true_positives_1: 6.0
false_positives_1: 47.0
true_negatives_1: 2935.0
false_negatives_1: 381.0
binary_accuracy: 0.872959315776825
precision_1: 0.11320754885673523
recall_1: 0.01550387591123581
accuracy: 0.0
auc_1: 0.49954813718795776
val_loss: 0.584652841091156
val_true_positives_1: 0.0
val_false_positives_1: 0.0
val_true_negatives_1: 525.0
val_false_negatives_1: 69.0
val_binary_accuracy: 0.8838383555412292
val_precision_1: 0.0
val_recall_1: 0.0
val_accuracy: 0.0
val_auc_1: 0.4911663234233856


In [22]:
weighted_test_metrics = weighted_model.evaluate(test_generator)



In [24]:
names = weighted_model.metrics_names
values = weighted_test_metrics

print("Final Metrics:")
for metric_name, metric_value in zip(names, values):
    print(f"{metric_name}: {metric_value}")

Final Metrics:
loss: 0.5843783617019653
true_positives_1: 0.0
false_positives_1: 0.0
true_negatives_1: 880.0
false_negatives_1: 114.0
binary_accuracy: 0.8853118419647217
precision_1: 0.0
recall_1: 0.0
accuracy: 0.0
auc_1: 0.49058014154434204


### Biased Model

In [None]:
initial_bias = np.log([pos/neg])
initial_bias

In [None]:
input_dimension = (224, 224, 3)

metrics1 = [TruePositives(), FalsePositives(), TrueNegatives(), FalseNegatives(), BinaryAccuracy(), Precision(), Recall(), Accuracy(), AUC()]

output_bias = tf.keras.initializers.Constant(initial_bias)

biased_model = Sequential([
    layers.Resizing(224, 224),
    layers.Rescaling(1./255),
    layers.RandomFlip(mode="horizontal_and_vertical"),
    layers.RandomTranslation(height_factor=0.2, width_factor=0.2),
    layers.RandomRotation(0.2),
    layers.RandomContrast(factor=0.2),
    layers.RandomBrightness(factor=0.2),
    
    layers.Conv2D(32, kernel_size=(3,3), activation='relu', input_shape=input_dimension),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Dropout(0.25),
    
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Dropout(0.5),
    layers.Flatten(),
    
    layers.Dense(250, activation='relu'),
    layers.Dropout(0.5),
    
    layers.Dense(1, activation='sigmoid', bias_initializer=output_bias)
    
])

biased_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=metrics1)

In [None]:
biased_history = biased_model.fit(train_generator, epochs=5, validation_data=val_generator)

In [None]:
get_metrics(biased_history)

In [None]:
biased_test_metrics = biased_model.evaluate(test_generator)

In [None]:
names = biased_model.metrics_names
values = biased_test_metrics

print("Final Metrics:")
for metric_name, metric_value in zip(names, values):
    print(f"{metric_name}: {metric_value}")

### Run over a bunch of different model weights

In [None]:
target_labels = data['POOR QUALITY']
target_labels = target_labels.astype('bool')  # Convert to boolean type if needed

# Handle missing or NaN values
target_labels[pd.isna(target_labels)] = False

class_weights = compute_class_weight("balanced", classes=[False, True], y=target_labels)

In [None]:

wdic = {0: class_weights[0], 1: class_weights[1]}
wdic11 = {0: class_weights[0], 1: class_weights[1]/1.1}
wdic12 = {0: class_weights[0], 1: class_weights[1]/1.2}
wdic13 = {0: class_weights[0], 1: class_weights[1]/1.3}
wdic14 = {0: class_weights[0], 1: class_weights[1]/1.4}
wdic15 = {0: class_weights[0], 1: class_weights[1]/1.5}
wdic16 = {0: class_weights[0], 1: class_weights[1]/1.6}
wdic17 = {0: class_weights[0], 1: class_weights[1]/1.7}

In [None]:
class_weights_list = [wdic, wdic11, wdic12, wdic13, wdic14, wdic15, wdic16, wdic17]
class_weights_list

### Class weights based on what is given

In [None]:
input_dimension = (224, 224, 3)

metrics1 = [TruePositives(), FalsePositives(), TrueNegatives(), FalseNegatives(), BinaryAccuracy(), Precision(), Recall(), Accuracy(), AUC()]

model_weight = Sequential([
    layers.Resizing(224, 224),
    layers.Rescaling(1./255),
    layers.RandomFlip(mode="horizontal_and_vertical"),
    layers.RandomTranslation(height_factor=0.2, width_factor=0.2),
    layers.RandomRotation(0.2),
    layers.RandomContrast(factor=0.2),
    layers.RandomBrightness(factor=0.2),
    
    layers.Conv2D(32, kernel_size=(3,3), activation='relu', input_shape=input_dimension),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Dropout(0.25),
    
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Dropout(0.5),
    layers.Flatten(),
    
    layers.Dense(250, activation='relu'),
    layers.Dropout(0.5),
    
    layers.Dense(1, activation='sigmoid')
    
])

model_weight.compile(optimizer='adam', loss='binary_crossentropy', metrics=metrics1)

In [None]:
hist_weight = model_weight.fit(train_generator, epochs=5, validation_data=val_generator, class_weight=wdic)

In [None]:
get_metrics(hist_weight)

### Weights divided by 1.2

In [None]:
input_dimension = (224, 224, 3)

metrics1 = [TruePositives(), FalsePositives(), TrueNegatives(), FalseNegatives(), BinaryAccuracy(), Precision(), Recall(), Accuracy(), AUC()]

model12 = Sequential([
    layers.Resizing(224, 224),
    layers.Rescaling(1./255),
    layers.RandomFlip(mode="horizontal_and_vertical"),
    layers.RandomTranslation(height_factor=0.2, width_factor=0.2),
    layers.RandomRotation(0.2),
    layers.RandomContrast(factor=0.2),
    layers.RandomBrightness(factor=0.2),
    
    layers.Conv2D(32, kernel_size=(3,3), activation='relu', input_shape=input_dimension),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Dropout(0.25),
    
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Dropout(0.5),
    layers.Flatten(),
    
    layers.Dense(250, activation='relu'),
    layers.Dropout(0.5),
    
    layers.Dense(1, activation='sigmoid')
    
])

model12.compile(optimizer='adam', loss='binary_crossentropy', metrics=metrics1)

In [None]:
history12 = model12.fit(train_generator, epochs=5, validation_data=val_generator, class_weight=wdic12)

In [None]:
get_metrics(history12)

### Weights divided by 1.5

In [None]:
input_dimension = (224, 224, 3)

metrics1 = [TruePositives(), FalsePositives(), TrueNegatives(), FalseNegatives(), BinaryAccuracy(), Precision(), Recall(), Accuracy(), AUC()]

model15 = Sequential([
    layers.Resizing(224, 224),
    layers.Rescaling(1./255),
    layers.RandomFlip(mode="horizontal_and_vertical"),
    layers.RandomTranslation(height_factor=0.2, width_factor=0.2),
    layers.RandomRotation(0.2),
    layers.RandomContrast(factor=0.2),
    layers.RandomBrightness(factor=0.2),
    
    layers.Conv2D(32, kernel_size=(3,3), activation='relu', input_shape=input_dimension),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Dropout(0.25),
    
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Dropout(0.5),
    layers.Flatten(),
    
    layers.Dense(250, activation='relu'),
    layers.Dropout(0.5),
    
    layers.Dense(1, activation='sigmoid')
    
])

model15.compile(optimizer='adam', loss='binary_crossentropy', metrics=metrics1)

In [None]:
history15 = model15.fit(train_generator, epochs=5, validation_data=val_generator, class_weight=wdic15)

In [None]:
get_metrics(history15)

# VGG16 Transfer Learning

In [None]:
from keras.models import Model

metrics1 = [TruePositives(), FalsePositives(), TrueNegatives(), FalseNegatives(), BinaryAccuracy(), Precision(), Recall(), Accuracy(), AUC()]

base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224,224,3))

for layer in base_model.layers:
    layer.trainable= False

x = base_model.output
# Apply the layers from model15 on top of the base model
x = layers.Resizing(224, 224)(x)
x = layers.Rescaling(1./255)(x)
x = layers.RandomFlip(mode="horizontal_and_vertical")(x)
x = layers.RandomTranslation(height_factor=0.2, width_factor=0.2)(x)
x = layers.RandomRotation(0.2)(x)
#x = layers.RandomContrast(factor=0.2)(x)
x = layers.RandomBrightness(factor=0.2)(x)

x = layers.Conv2D(32, kernel_size=(3,3), activation='relu', input_shape=(224, 224, 3))(x)
x = layers.MaxPooling2D(pool_size=(2, 2))(x)
x = layers.Dropout(0.25)(x)

x = layers.Conv2D(64, (3, 3), activation='relu')(x)
x = layers.MaxPooling2D(pool_size=(2, 2))(x)
x = layers.Dropout(0.5)(x)
x = layers.Flatten()(x)

x = layers.Dense(250, activation='relu')(x)
x = layers.Dropout(0.5)(x)

predictions = layers.Dense(1, activation='sigmoid')(x)

# Create the final model
vgg_model = Model(inputs=base_model.input, outputs=predictions)

In [None]:
vgg_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=metrics1)
vgg_model.fit(train_generator, epochs=5, validation_data=val_generator)