In [1]:
import os
import pandas as pd
from PIL import Image

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image

from sklearn.model_selection import train_test_split

import numpy as np
import matplotlib.pyplot as plt

In [None]:
dataset_path = 'C:/Users/Jon/Documents/DAT 490 Capstone/clean_datasets'

file_paths = []
labels = []

for root, dirs, files in os.walk(dataset_path):
    for file in files:
        if file.endswith(('.jpg', '.jpeg', '.png')):
            file_path = os.path.join(root, file)
            try:
                
                with Image.open(file_path) as img:
                    img.verify()  
                file_paths.append(file_path)
                labels.append(os.path.basename(root))
            except (IOError, SyntaxError) as e:
                print(f"Invalid image file: {file_path} - {e}")

df = pd.DataFrame({
    'file_path': file_paths,
    'label': labels
})

print(df['label'].value_counts())

Invalid image file: C:/Users/Jon/Documents/DAT 490 Capstone/clean_datasets\normal\Tr-no_1011.jpg - cannot identify image file 'C:\\Users\\Jon\\Documents\\DAT 490 Capstone\\clean_datasets\\normal\\Tr-no_1011.jpg'
Invalid image file: C:/Users/Jon/Documents/DAT 490 Capstone/clean_datasets\normal\Tr-no_1012.jpg - cannot identify image file 'C:\\Users\\Jon\\Documents\\DAT 490 Capstone\\clean_datasets\\normal\\Tr-no_1012.jpg'
Invalid image file: C:/Users/Jon/Documents/DAT 490 Capstone/clean_datasets\normal\Tr-no_1019.jpg - cannot identify image file 'C:\\Users\\Jon\\Documents\\DAT 490 Capstone\\clean_datasets\\normal\\Tr-no_1019.jpg'


In [3]:
X_train, X_val, y_train, y_val = train_test_split(df['file_path'], df['label'], test_size = 0.3, random_state = 42)

train_df = pd.DataFrame({'file_path': X_train, 'label': y_train})
val_df = pd.DataFrame({'file_path': X_val, 'label': y_val})

In [None]:
img_width, img_height = 250, 250
batch_size = 32

train_datagen = ImageDataGenerator(
    rescale = 1.0 / 255.0, 
    rotation_range = 20,    
    width_shift_range = 0.2,  
    height_shift_range = 0.2,  
    shear_range = 0.2,      
    zoom_range = 0.2,       
    horizontal_flip = True,  
    fill_mode = 'nearest'   
)

val_datagen = ImageDataGenerator(rescale = 1.0 / 255.0)


train_generator = train_datagen.flow_from_dataframe(
    dataframe = train_df,
    x_col = 'file_path',
    y_col = 'label',
    target_size = (img_width, img_height),
    batch_size = batch_size,
    class_mode = 'categorical' 
)

val_generator = val_datagen.flow_from_dataframe(
    dataframe = val_df,
    x_col = 'file_path',
    y_col = 'label',
    target_size = (img_width, img_height),
    batch_size = batch_size,
    class_mode = 'categorical' 
)

Found 10864 validated image filenames belonging to 4 classes.
Found 4656 validated image filenames belonging to 4 classes.


In [None]:
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(img_width, img_height, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(train_generator.class_indices), activation='softmax')
])

model.compile(optimizer = 'adam',
              loss = 'categorical_crossentropy',  
              metrics = ['accuracy'])


model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
early_stopping = EarlyStopping(
    monitor = 'val_loss', 
    patience = 3,        
    restore_best_weights = True   
)

batch_size = 16  
epochs = 5 

history = model.fit(
    train_generator,
    steps_per_epoch = len(train_generator),
    epochs = epochs,
    validation_data = val_generator,
    validation_steps = len(val_generator),
    callbacks = [early_stopping] 
)

  self._warn_if_super_not_called()


Epoch 1/5
[1m340/340[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m233s[0m 682ms/step - accuracy: 0.4167 - loss: 1.2681 - val_accuracy: 0.6327 - val_loss: 0.9283
Epoch 2/5
[1m340/340[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 121us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 3/5


  self.gen.throw(value)
  current = self.get_monitor_value(logs)


[1m 31/340[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m3:07[0m 608ms/step - accuracy: 0.6027 - loss: 0.9998

KeyboardInterrupt: 

In [None]:
val_loss, val_accuracy = model.evaluate(val_generator)
print(f"Validation Loss: {val_loss}")
print(f"Validation Accuracy: {val_accuracy}")


plt.plot(history.history['accuracy'], label = 'Training Accuracy')
plt.plot(history.history['val_accuracy'], label = 'Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()


plt.plot(history.history['loss'], label = 'Training Loss')
plt.plot(history.history['val_loss'], label = 'Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()