In [76]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers, models, regularizers
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.applications import VGG16, ResNet50, InceptionV3
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import to_categorical
from PIL import Image
import tensorflow as tf


In [78]:
no_df = df[df['labels'] == 'No']
yes_df = df[df['labels'] == 'Yes']

augmentor = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)

augmented_images = []
augmented_labels = []

n_generate = len(yes_df) - len(no_df)

for i in range(n_generate):
    img_path = no_df.iloc[i % len(no_df)]['filepaths']
    img = load_img(img_path, target_size=(224, 224))
    img_array = img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    
    aug_iter = augmentor.flow(img_array, batch_size=1)
    aug_img = next(aug_iter)[0].astype(np.uint8)
    
    augmented_images.append(aug_img)
    augmented_labels.append('No')


In [79]:
augmented_folder = "C:/Users/aniru/OneDrive/Desktop/Capstone/Augmented_No"
os.makedirs(augmented_folder, exist_ok=True)

augmented_filepaths = []

for idx, img in enumerate(augmented_images):
    img_pil = Image.fromarray(img)
    path = os.path.join(augmented_folder, f"aug_no_{idx}.jpg")
    img_pil.save(path)
    augmented_filepaths.append(path)


In [80]:
augmented_df = pd.DataFrame({
    'filepaths': augmented_filepaths,
    'labels': augmented_labels
})

final_df = pd.concat([df, augmented_df], axis=0).reset_index(drop=True)


In [81]:
train_df, test_df = train_test_split(final_df, test_size=0.2, stratify=final_df['labels'], random_state=42)

train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    x_col='filepaths',
    y_col='labels',
    target_size=(224, 224),
    batch_size=64,
    class_mode='binary'
)

test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_df,
    x_col='filepaths',
    y_col='labels',
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',
    shuffle=False
)


Found 1198 validated image filenames belonging to 2 classes.
Found 300 validated image filenames belonging to 2 classes.


In [82]:
model2 = models.Sequential([
    layers.Input(shape=(224, 224, 3)),
    
    # First Convolutional Block
    layers.Conv2D(25, (5, 5), activation='relu', padding='same'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.BatchNormalization(),
    
    # Second Convolutional Block
    layers.Conv2D(50, (5, 5), activation='relu', padding='same'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.BatchNormalization(),
    
    # Third Convolutional Block
    layers.Conv2D(70, (3, 3), activation='relu', padding='same'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.BatchNormalization(),
    
    layers.Conv2D(70, (3, 3), activation='relu', padding='valid'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.BatchNormalization(),
    
    # ANN Block
    layers.Flatten(),
    layers.Dense(100, activation='relu'),
    layers.Dropout(0.25),
    layers.Dense(100, activation='relu'),
    layers.Dropout(0.25),
    
    # Output Layer
    layers.Dense(1, activation='sigmoid')  # for binary classification
])

# Compile the model
model2.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Show the model architecture
model2.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_6 (Conv2D)           (None, 224, 224, 25)      1900      
                                                                 
 max_pooling2d_6 (MaxPooling  (None, 112, 112, 25)     0         
 2D)                                                             
                                                                 
 batch_normalization_4 (Batc  (None, 112, 112, 25)     100       
 hNormalization)                                                 
                                                                 
 conv2d_7 (Conv2D)           (None, 112, 112, 50)      31300     
                                                                 
 max_pooling2d_7 (MaxPooling  (None, 56, 56, 50)       0         
 2D)                                                             
                                                      

In [83]:
model2.compile(
    optimizer='adam',                 # Adam optimizer
    loss='binary_crossentropy',        # Because it's a binary classification
    metrics=['accuracy']               # Track accuracy during training
)

In [84]:
epochs = 30
batch_size = 32

# Train the model
history = model2.fit(
    train_generator,
    validation_data=test_generator,
    epochs=epochs,
    batch_size=batch_size
)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [85]:
true_labels = test_generator.classes
class_labels = list(test_generator.class_indices.keys())

# Predict probabilities
pred_probs = model2.predict(test_generator)
# Convert probabilities to binary class (threshold = 0.5)
pred_labels = (pred_probs > 0.5).astype(int).reshape(-1)


# Generate the classification report
report = classification_report(true_labels, pred_labels, target_names=class_labels)
print(report)

              precision    recall  f1-score   support

          No       0.81      0.68      0.74       150
         Yes       0.72      0.84      0.78       150

    accuracy                           0.76       300
   macro avg       0.77      0.76      0.76       300
weighted avg       0.77      0.76      0.76       300



In [86]:
# Number of images per class after augmentation
print(final_df['labels'].value_counts())

labels
No     749
Yes    749
Name: count, dtype: int64


In [90]:
print(test_generator.class_indices)


{'No': 0, 'Yes': 1}
