In [None]:
# Import necessary libraries
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Dropout

# Define the directory containing the images
image_dir = 'extracted_images'

# Create a DataFrame with image paths and labels
data = []
for label in os.listdir(image_dir):
    label_dir = os.path.join(image_dir, label)
    if os.path.isdir(label_dir):
        for file in os.listdir(label_dir):
            if file.endswith(('png', 'jpg', 'jpeg')):
                data.append([os.path.join(label_dir, file), label])

df = pd.DataFrame(data, columns=['image_path', 'label'])
print(df.head())


In [None]:
# Encode labels
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['label'])

# Create train and test sets
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['label'])

# Display the distribution of labels in train and test sets
print(train_df['label'].value_counts())
print(test_df['label'].value_counts())

# Data augmentation and normalization
train_datagen = ImageDataGenerator(rescale=1./255, rotation_range=20, width_shift_range=0.2,
                                   height_shift_range=0.2, shear_range=0.2, zoom_range=0.2,
                                   horizontal_flip=True, fill_mode='nearest')

test_datagen = ImageDataGenerator(rescale=1./255)

# Create generators
train_generator = train_datagen.flow_from_dataframe(dataframe=train_df, x_col='image_path', y_col='label',
                                                    class_mode='raw', target_size=(224, 224), batch_size=32)

test_generator = test_datagen.flow_from_dataframe(dataframe=test_df, x_col='image_path', y_col='label',
                                                  class_mode='raw', target_size=(224, 224), batch_size=32)


In [None]:
model_1 = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

model_1.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model_1.summary()

history_1 = model_1.fit(train_generator, epochs=10, validation_data=test_generator)


In [None]:
model_2 = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

model_2.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model_2.summary()

history_2 = model_2.fit(train_generator, epochs=10, validation_data=test_generator)


In [None]:
# Evaluate Model 1
loss_1, acc_1 = model_1.evaluate(test_generator)
print(f"Model 1 Accuracy: {acc_1}")

# Evaluate Model 2
loss_2, acc_2 = model_2.evaluate(test_generator)
print(f"Model 2 Accuracy: {acc_2}")

# Plotting the training and validation accuracy and loss
import matplotlib.pyplot as plt

def plot_history(history, model_num):
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    epochs_range = range(len(acc))

    plt.figure(figsize=(8, 8))
    plt.subplot(1, 2, 1)
    plt.plot(epochs_range, acc, label='Training Accuracy')
    plt.plot(epochs_range, val_acc, label='Validation Accuracy')
    plt.legend(loc='lower right')
    plt.title(f'Training and Validation Accuracy (Model {model_num})')

    plt.subplot(1, 2, 2)
    plt.plot(epochs_range, loss, label='Training Loss')
    plt.plot(epochs_range, val_loss, label='Validation Loss')
    plt.legend(loc='upper right')
    plt.title(f'Training and Validation Loss (Model {model_num})')
    plt.show()

plot_history(history_1, 1)
plot_history(history_2, 2)


In [None]:
best_model = model_1 if acc_1 > acc_2 else model_2
print("Best model selected based on validation accuracy.")


In [None]:
best_model.save('best_model.h5')
print("Best model saved as best_model.h5")
