In [None]:
import os
import numpy as np
import random
import shutil
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt

# Set random seed for reproducibility
random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

# Define directories and parameters
source_dir = 'processed_data'
train_dir = 'train'
val_dir = 'val'
img_size = (224, 224)  # Change the input shape to (224, 224)
batch_size = 32  # Increase batch size

# Function to split data into train and val directories
def split_data(source, train, val, split_size):
    files = os.listdir(source)
    random.shuffle(files)
    split_index = int(split_size * len(files))
    train_files = files[:split_index]
    val_files = files[split_index:]

    for file in train_files:
        shutil.copy(os.path.join(source, file), os.path.join(train, file))

    for file in val_files:
        shutil.copy(os.path.join(source, file), os.path.join(val, file))

# Create train and val directories if they don't exist
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)

for label in os.listdir(source_dir):
    os.makedirs(os.path.join(train_dir, label), exist_ok=True)
    os.makedirs(os.path.join(val_dir, label), exist_ok=True)
    split_data(os.path.join(source_dir, label), os.path.join(train_dir, label), os.path.join(val_dir, label),
               split_size=0.8)

# Create data generators with prefetching for faster data loading
train_datagen = ImageDataGenerator(
    rescale=1.0/255,
    rotation_range=15,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=True
).prefetch(tf.data.AUTOTUNE)  # Add prefetching

val_datagen = ImageDataGenerator(rescale=1.0/255)

val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False
).prefetch(tf.data.AUTOTUNE)  # Add prefetching

# Define the number of output classes
n_classes = len(os.listdir(train_dir))

# Load MobileNetV2 as a feature extractor with the new input shape
base_model = MobileNetV2(input_shape=(img_size[0], img_size[1], 3), weights='imagenet', include_top=False)

# Add custom layers on top of MobileNetV2 with Dropout regularization
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.5)(x)  # Add Dropout layer to reduce overfitting

# Define the number of output classes for the final Dense layer
predictions = Dense(n_classes, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model and collect history for learning curves
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // batch_size,
    epochs=30,
    validation_data=val_generator,
    validation_steps=val_generator.samples // batch_size
)

# Plot learning curves
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.tight_layout()
plt.show()

# Save the trained model
model.save('model_mobilenetv2.keras')

# Display model summary
model.summary()

# Evaluate the model on the validation data and get confidence
val_loss, val_accuracy = model.evaluate(val_generator)
print(f'Validation Loss: {val_loss:.4f}')
print(f'Validation Accuracy: {val_accuracy:.4f}')
