## CNN

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import cv2

import random
import shutil
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator,load_img, img_to_array
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.models import load_model

from albumentations.pytorch import ToTensorV2
from albumentations import Compose, HorizontalFlip, RandomBrightnessContrast, Rotate, Resize

from ultralytics import YOLO
import math

In [None]:
train_input_path = 'data/Train'

In [None]:
def get_augmentation_pipeline():
    return Compose([
        HorizontalFlip(p=0.5),
        Rotate(0.1),
    ])


In [None]:
def balance_classes_with_augmentation(input_path, output_path, transform, target_count, csv_output_path):
    if not os.path.exists(output_path):
        os.makedirs(output_path)

    data_for_csv = []

    for folder in os.listdir(input_path):
        print(f"Started processing '{folder}'")
        folder_path = os.path.join(input_path, folder)

        images = os.listdir(folder_path)
        current_count = len(images)

        if target_count <= current_count:
            # Randomly pick images up to the target count
            selected_images = random.sample(images, target_count)
        else:
            # Use all images and augment additional images
            selected_images = images.copy()
            augmentation_needed = target_count - current_count

        for image_file in selected_images:
            image_path = os.path.join(folder_path, image_file)
            save_path = os.path.join(output_path, image_file)
            
            # Simply copy the image from the source to the destination without processing
            shutil.copy(image_path, save_path)
            data_for_csv.append({
                "ClassId": folder,
                "Path": os.path.relpath(save_path, start=output_path)
            })

        if target_count > current_count:
            # Augment additional images
            for i in range(augmentation_needed):
                image_file = random.choice(images)  # Randomly pick an image to augment
                image_path = os.path.join(folder_path, image_file)
                image = cv2.imread(image_path)
                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                augmented = transform(image=image)
                augmented_image = augmented['image']
                augmented_file_name = f"aug_{current_count + i}_{image_file}"
                save_path = os.path.join(output_path, augmented_file_name)
                cv2.imwrite(save_path, cv2.cvtColor(augmented_image, cv2.COLOR_RGB2BGR))
                data_for_csv.append({
                    "ClassId": folder,
                    "Path": os.path.relpath(save_path, start=output_path)
                })

        print(f"Completed processing '{folder}'")

    df = pd.DataFrame(data_for_csv)
    df.to_csv(csv_output_path, index=False)

In [None]:
train_output_path = 'data/Augmented2_Train'
target_count = 500  # Target number of images per class
csv_output_path = 'data/aug2_train.csv'

balance_classes_with_augmentation(train_input_path, train_output_path, get_augmentation_pipeline(), target_count, csv_output_path)

In [None]:
# Load the CSV file into a DataFrame
df = pd.read_csv('data/aug2_train.csv')

# Prepend the directory path to the image filenames
base_dir = 'data/Augmented2_Train/'
df['Path'] = base_dir + df['Path']

# Ensure ClassId is a string (necessary if using categorical class_mode)
df['ClassId'] = df['ClassId'].astype(str)

# Split the data into training and validation sets (20% for validation)
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)

# Setup ImageDataGenerator for training with data augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    horizontal_flip=True,
    rotation_range=10,
    zoom_range=0.2,
    fill_mode='nearest'
)

val_datagen = ImageDataGenerator(rescale=1./255)

# Setup generators
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    x_col='Path',
    y_col='ClassId',
    target_size=(180, 180),
    batch_size=32,
    class_mode='categorical'
)

val_generator = val_datagen.flow_from_dataframe(
    dataframe=val_df,
    x_col='Path',
    y_col='ClassId',
    target_size=(180, 180),
    batch_size=32,
    class_mode='categorical'
)

# Function to adjust learning rate
def scheduler(epoch, lr):
    if epoch < 10:
        return lr
    else:
        return lr * np.exp(-0.1)




In [None]:
# Model architecture
inputs = tf.keras.Input(shape=(180, 180, 3))
x = layers.Rescaling(1./255)(inputs)
x = layers.Conv2D(32, 3, padding='same', activation='relu')(x)
x = layers.BatchNormalization()(x)
x = layers.MaxPooling2D()(x)
x = layers.Conv2D(64, 3, padding='same', activation='relu')(x)
x = layers.BatchNormalization()(x)
x = layers.MaxPooling2D()(x)
x = layers.Conv2D(128, 3, padding='same', activation='relu')(x)
x = layers.BatchNormalization()(x)
x = layers.MaxPooling2D()(x)
x = layers.Conv2D(256, 3, padding='same', activation='relu')(x)
x = layers.BatchNormalization()(x)
x = layers.MaxPooling2D()(x)
x = layers.Dropout(0.5)(x)
x = layers.Flatten()(x)
x = layers.Dense(256, activation='relu')(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(43, activation='softmax')(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

# Early stopping and model checkpoint
early_stopping = EarlyStopping(monitor='val_loss', patience=4, restore_best_weights=True)
model_checkpoint = ModelCheckpoint(
    'best_model.keras',      # Change the file extension to .keras
    monitor='val_loss',
    save_best_only=True,
    save_weights_only=False,
    mode='min',
    verbose=1
)
# Train the model with learning rate scheduler
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=20,
    callbacks=[tf.keras.callbacks.LearningRateScheduler(scheduler), early_stopping, model_checkpoint]
)

print(model.summary())

In [None]:
import matplotlib.pyplot as plt

# Plot training & validation accuracy values
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train')
plt.plot(history.history['val_accuracy'], label='Validation')
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(loc='upper left')

# Plot training & validation loss values
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train')
plt.plot(history.history['val_loss'], label='Validation')
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(loc='upper left')

plt.tight_layout()
plt.show()


In [None]:
# Load the CSV to get the mapping from the "augmentation" process
df = pd.read_csv('data/aug2_train.csv')

# Create a mapping dictionary from the 'ClassId'
class_mapping = df['ClassId'].astype(str).unique()
class_mapping = {index: label for index, label in enumerate(class_mapping)}

In [None]:
# Load the model
model = load_model('best_model.keras')

# Example: Using a specific test image
image_path = 'data/Test/00000.png'
image = load_img(image_path, target_size=(180, 180))
image_array = img_to_array(image) / 255.0
image_array = np.expand_dims(image_array, axis=0)

# Make prediction
predictions = model.predict(image_array)
predicted_class_index = np.argmax(predictions, axis=1)[0]
predicted_class_name = class_mapping[predicted_class_index]

print(f"Predicted class for image: {predicted_class_name}")
print(f"Probability of predicted class: {predictions[0][predicted_class_index]:.4f}")

## Yolo

In [None]:
def validation_Data(input_path1):
    if not os.path.exists(input_path1):
        raise ValueError(f'{input_path1} Path not exists...!')
    
    val_path = './data/val'
    if not os.path.exists(val_path):
        os.makedirs(val_path)
    
    for folder in os.listdir(input_path1):
        folder_path = os.path.join(input_path1, folder)
        save_folder = os.path.join(val_path, folder)
        if not os.path.exists(save_folder):
            os.makedirs(save_folder)
        
        images = os.listdir(folder_path)
        current_count = len(images)
        print(f'{folder_path} count {current_count}')
        
        # select 10 % of images as validation images
        num_Select = math.ceil((10.0 / 100) * current_count)
        
        num_to_select = max(1, num_Select)
        
        val_images = random.sample(images, num_to_select)
        
        print(f'val path {save_folder} count {len(images)}')
        
        for img in val_images:
            src_path = os.path.join(folder_path, img)
            dest_path = os.path.join(save_folder, img)
            
            
            # Handle filename conflicts
            if os.path.exists(dest_path):
                base, ext = os.path.splitext(img)
                counter = 1
                while os.path.exists(dest_path):
                    new_name = f"{base}_{counter}{ext}"
                    dest_path = os.path.join(save_folder, new_name)
                    counter += 1

            shutil.move(src_path, dest_path)
            
        
        print(f'Images on {folder_path} after val dataset is {len(os.listdir(folder_path))} ...!')

In [None]:
model = YOLO('yolo11n-cls.pt')

#train the model
results = model.train(data='./data', epochs=20, imgsz=640, patience=4,
                      batch=16, save=True, project='yolov11-gtsb', name='initial_run')

In [None]:
model = YOLO('yolov11-gtsb/initial_run2/weights/best.pt')

# Load and resize the image to 640x640
image_path = 'data/test/00500.png'
image = cv2.imread(image_path)

# Resize the image while preserving aspect ratio
resized_image = cv2.resize(image, (640, 640), interpolation=cv2.INTER_LINEAR)

# Feed the resized image to the model (adjust according to your model's input requirements)
result = model(resized_image)  # Make sure the model can accept direct numpy array input

# Output the result
result[0].show()  # Adjust based on how results are structured in your model's output
print(result)

In [None]:
model = YOLO('yolov11-gtsb/initial_run2/weights/best.pt')

# Path to the test CSV and test folder
test_csv_path = 'Test.csv'  # Update with your test CSV path
test_folder_path = 'data'       # Update with your test folder path

# Read the test CSV
test_df = pd.read_csv(test_csv_path)  # Assumes columns: 'image_path', 'class_id'

# Initialize counters
correct_predictions = 0
total_predictions = 0

# Iterate over each row in the test DataFrame
for index, row in test_df.iterrows():
    image_path = os.path.join(test_folder_path, row['Path'])  # Full image path
    ground_truth_class_id = row['ClassId']  # Ground truth class ID
    
    # Run prediction
    results = model(image_path)
    
    # Extract the top class ID
    if results[0].probs is not None:
        probs = results[0].probs
        top_class_id = probs.top1  # Class ID with the highest probability
        predicted_class_name = results[0].names[int(top_class_id)]  # Get class name
        
        # Compare predicted class with ground truth (account for mapping issues if needed)
        if int(predicted_class_name) == int(ground_truth_class_id):
            correct_predictions += 1
        
        total_predictions += 1

# Calculate accuracy
accuracy = (correct_predictions / total_predictions) * 100 if total_predictions > 0 else 0
print(f"Accuracy: {accuracy:.2f}%")