In [None]:
# Import necessary libraries
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models, callbacks, regularizers
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report
import pandas as pd
import os
import shutil
import time

In [None]:
# Set paths to your dataset directories
dataset_dir = r'C:\Users\AE AL Emran\Documents\Thesis_paper_1_Driver_Fatic_topic_8_type_clasification\dataset2\dataset\training_set'

In [None]:
# Define paths for train and test directories
train_dir = os.path.join(dataset_dir, 'train')
test_dir = os.path.join(dataset_dir, 'test')

In [None]:
# Helper function to handle read-only files during deletion
def handle_remove_readonly(func, path, exc):
    import stat
    os.chmod(path, stat.S_IWRITE)
    func(path)

In [None]:
# Helper function to retry directory deletion if a PermissionError occurs
def remove_dir_with_retry(path):
    retries = 3
    for i in range(retries):
        try:
            shutil.rmtree(path, onerror=handle_remove_readonly)
            print(f"Successfully deleted: {path}")
            break
        except PermissionError as e:
            print(f"PermissionError: {e}. Retrying in 2 seconds... ({i+1}/{retries})")
            time.sleep(2)
    else:
        print(f"Could not delete {path} after {retries} retries.")

In [None]:
# Clear old directories if they exist
for folder in [train_dir, test_dir]:
    if os.path.exists(folder):
        remove_dir_with_retry(folder)

In [None]:
# Helper function to split data into train and test
def split_data(data_dir, train_dir, test_dir, train_ratio=0.8):
    classes = [cls for cls in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, cls))]

    for cls in classes:
        cls_dir = os.path.join(data_dir, cls)
        images = os.listdir(cls_dir)

        # Shuffle and split images
        np.random.shuffle(images)
        train_count = int(len(images) * train_ratio)

        train_images = images[:train_count]
        test_images = images[train_count:]

        # Copy images to new directories
        for subset, subset_images in zip([train_dir, test_dir], [train_images, test_images]):
            subset_cls_dir = os.path.join(subset, cls)
            os.makedirs(subset_cls_dir, exist_ok=True)
            for img in subset_images:
                shutil.copy(os.path.join(cls_dir, img), os.path.join(subset_cls_dir, img))

In [None]:
# Perform data splitting
split_data(dataset_dir, train_dir, test_dir)

In [None]:
# Create instances of ImageDataGenerator for training and testing
train_datagen = ImageDataGenerator(
    rescale=1./255,
    zoom_range=0.2,  # Keep essential augmentations
    horizontal_flip=True,
    fill_mode='nearest'
)

test_datagen = ImageDataGenerator(rescale=1./255)

In [None]:
# Create generators for training and testing
batch_size = 64 ################################################################### 64 k 32 korchi
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='categorical'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False
)

Found 3451 images belonging to 7 classes.
Found 868 images belonging to 7 classes.


In [None]:
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False  # Freeze the pre-trained layers

In [None]:
model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.001)), ################################### 0 komaichi
    layers.BatchNormalization(),
    layers.Dropout(0.3),  # Reduce dropout rate
    layers.Dense(7, activation='softmax')  # Adjust for your number of classes
])

In [None]:
# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Callbacks: Model checkpointing, Reduce learning rate on plateau, and Early Stopping
model_checkpoint = callbacks.ModelCheckpoint('best_model_vgg19.keras', save_best_only=True, monitor='val_loss')
reduce_lr = callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6)
early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

In [None]:
# Train the model and store the history
history = model.fit(
    train_generator,
    validation_data=test_generator,
    epochs=50,
    callbacks=[model_checkpoint, reduce_lr, early_stopping]
)

In [None]:
# Evaluate the model on the test data
test_loss, test_accuracy = model.evaluate(test_generator)
print(f'Test accuracy: {test_accuracy:.2f}')

In [None]:
# Convert training history to a DataFrame for a tabular view of train and validation metrics
history_df = pd.DataFrame({
    "Epoch": range(1, len(history.history['accuracy']) + 1),
    "Train Accuracy": history.history['accuracy'],
    "Validation Accuracy": history.history['val_accuracy'],
    "Train Loss": history.history['loss'],
    "Validation Loss": history.history['val_loss']
})
print("\nTrain and Validation Accuracy and Loss by Epoch:")
print(history_df)

In [None]:
# Get per-class accuracy and loss
test_generator.reset()
predictions = model.predict(test_generator)
predicted_labels = np.argmax(predictions, axis=1)
true_labels = test_generator.classes

# Class labels
class_labels = list(test_generator.class_indices.keys())

[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 2s/step


In [None]:
# Classification report
classification_report_dict = classification_report(true_labels, predicted_labels, target_names=class_labels, output_dict=True)
classification_df = pd.DataFrame(classification_report_dict).transpose()
print("\nClass-wise Accuracy and Loss:")
print(classification_df)

In [None]:
# Plot confusion matrix
conf_matrix = confusion_matrix(true_labels, predicted_labels)
plt.figure(figsize=(8, 6))
disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix, display_labels=class_labels)
disp.plot(cmap=plt.cm.Blues, values_format='d', xticks_rotation=45)
plt.title("Confusion Matrix")
plt.show()

In [None]:
# Plot accuracy and loss over epochs
#plt.figure(figsize=(10, 6))
# Plot accuracy
#plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.axhline(y=test_accuracy, color='r', linestyle='--', label='Final Test Accuracy')
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epochs')
plt.legend()
plt.show()

In [None]:
# Plot loss
#plt.figure(figsize=(10, 6))
#plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.axhline(y=test_loss, color='r', linestyle='--', label='Final Test Loss')
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epochs')
plt.legend()
plt.show()