Splitting

In [None]:
import os
import shutil
from sklearn.model_selection import train_test_split

def split_data(data_dir, output_dir, val_ratio=0.2):
    """
    Split the DATA folder into train and validation sets.
    Args:
        data_dir: Path to the original DATA folder
        output_dir: Path to save the split datasets
        val_ratio: Fraction of data to use for validation
    """
    train_dir = os.path.join(output_dir, "train")
    val_dir = os.path.join(output_dir, "val")
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(val_dir, exist_ok=True)

    for class_name in os.listdir(data_dir):
        class_dir = os.path.join(data_dir, class_name)
        if not os.path.isdir(class_dir):
            continue
        
        images = os.listdir(class_dir)
        train_imgs, val_imgs = train_test_split(images, test_size=val_ratio, random_state=42)
        
        # class subdirectories
        train_class_dir = os.path.join(train_dir, class_name)
        val_class_dir = os.path.join(val_dir, class_name)
        os.makedirs(train_class_dir, exist_ok=True)
        os.makedirs(val_class_dir, exist_ok=True)
        
        # Move images to train and val folders
        for img in train_imgs:
            shutil.copy(os.path.join(class_dir, img), os.path.join(train_class_dir, img))
        for img in val_imgs:
            shutil.copy(os.path.join(class_dir, img), os.path.join(val_class_dir, img))

# Paths
data_dir = r"C:\Users\Lenovo\Desktop\AI_Project\dataset\DATA"
output_dir = r"C:\Users\Lenovo\Desktop\AI_Project\balanced_data"


split_data(data_dir, output_dir, val_ratio=0.2)


Augmentation

In [9]:
import os
import numpy as np
from PIL import Image, ImageEnhance, ImageFilter
from tqdm import tqdm
import random



def add_salt_and_pepper_noise(image, amount=0.04, salt_vs_pepper=0.5):
    """Add salt-and-pepper noise to an image."""
    image = image.convert('RGB')
    img_array = np.array(image)
    h, w = img_array.shape[:2]
    num_salt = int(amount * h * w * salt_vs_pepper)
    num_pepper = int(amount * h * w * (1 - salt_vs_pepper))

    # Add salt (white)
    coords = [np.random.randint(0, i - 1, num_salt) for i in img_array.shape[:2]]
    img_array[coords[0], coords[1]] = 255

    # Add pepper (black)
    coords = [np.random.randint(0, i - 1, num_pepper) for i in img_array.shape[:2]]
    img_array[coords[0], coords[1]] = 0

    return Image.fromarray(img_array)

def add_gaussian_noise(image, mean=0, std=0.05):
    """Add Gaussian noise to an image."""
    image = image.convert('RGB')
    img_array = np.array(image) / 255.0  # Normalize to [0, 1]
    noise = np.random.normal(mean, std, img_array.shape)
    noisy_img_array = img_array + noise
    noisy_img_array = np.clip(noisy_img_array, 0, 1)  # Ensure values are between 0 and 1
    return Image.fromarray((noisy_img_array * 255).astype(np.uint8))

def random_zoom(image, crop_percentage=0.1):
    """Randomly zoom into an image by cropping 10% from each side (top, left, bottom, right)."""
    width, height = image.size
    
    # Calculate the cropping area
    left = int(width * crop_percentage)
    top = int(height * crop_percentage)
    right = width - int(width * crop_percentage)
    bottom = height - int(height * crop_percentage)
    
    # Crop the image
    cropped_img = image.crop((left, top, right, bottom))
    
    # Optionally resize back to original size (or you can keep the zoomed-in size)
    return cropped_img.resize((width, height), Image.ANTIALIAS)


def adjust_brightness_contrast(image, brightness_factor=1.2, contrast_factor=1.2):
    """Adjust the brightness and contrast of an image."""
    image = image.convert("RGB")  # Ensure the image is in RGB mode
    enhancer = ImageEnhance.Brightness(image)
    image = enhancer.enhance(brightness_factor)
    
    enhancer = ImageEnhance.Contrast(image)
    image = enhancer.enhance(contrast_factor)
    
    return image

def add_gaussian_blur(image, radius=2):
    return image.filter(ImageFilter.GaussianBlur(radius))

def darken(image, factor=0.6):
    # Ensure image is in RGB mode
    if image.mode != 'RGB':
        image = image.convert('RGB')
    
    enhancer = ImageEnhance.Brightness(image)
    return enhancer.enhance(factor)



MAX_CLASS_SIZE = 600

def apply_augmentation_to_class(class_images, class_size, class_name):
    augmented_images = []
    
    if class_size >= MAX_CLASS_SIZE:
        print(f"Skipping augmentation for {class_name} as class size has reached the limit of {MAX_CLASS_SIZE}.")
        return augmented_images  # No augmentation for this class

    for img in class_images:
        count = 0 
        if class_size >= MAX_CLASS_SIZE:
            print(f"Reached max class size for {class_name}, stopping augmentation.")
            break  # Stop augmentation if class size reaches the limit
        
        original_image_name = os.path.basename(img.filename)  # Get the original image name

        if class_size < 35:
            # Apply all augmentations and combinations of two
            augmented_images.append((add_salt_and_pepper_noise(img), f'{original_image_name}_salt_and_pepper_noise'))
            augmented_images.append((add_gaussian_noise(img), f'{original_image_name}_gaussian_noise'))
            augmented_images.append((random_zoom(img), f'{original_image_name}_random_zoom'))
            augmented_images.append((adjust_brightness_contrast(img), f'{original_image_name}_brightness_contrast'))

            # Convert to RGB before applying Gaussian blur
            img_rgb = img.convert("RGB")
            augmented_images.append((img_rgb.filter(ImageFilter.GaussianBlur(radius=2)), f'{original_image_name}_gaussian_blur'))

            augmented_images.append((darken(img), f'{original_image_name}_darken'))  # Darken the image

            # Apply combinations of two augmentations
            augmented_images.append((add_gaussian_noise(random_zoom(img)), f'{original_image_name}_gaussian_noise_zoom'))
            augmented_images.append((add_salt_and_pepper_noise(adjust_brightness_contrast(img)), f'{original_image_name}_salt_brightness_contrast'))
            augmented_images.append((add_salt_and_pepper_noise(random_zoom(img)), f'{original_image_name}_salt_zoom'))
            augmented_images.append((add_gaussian_noise(adjust_brightness_contrast(img)), f'{original_image_name}_gaussian_noise_contrast'))

        elif 35 <= class_size < 50:
            # Apply all augmentations and combinations of two
            augmented_images.append((add_salt_and_pepper_noise(img), f'{original_image_name}_salt_and_pepper_noise'))
            augmented_images.append((add_gaussian_noise(img), f'{original_image_name}_gaussian_noise'))
            augmented_images.append((random_zoom(img), f'{original_image_name}_random_zoom'))
            augmented_images.append((adjust_brightness_contrast(img), f'{original_image_name}_brightness_contrast'))
            augmented_images.append((img.filter(ImageFilter.GaussianBlur(radius=2)), f'{original_image_name}_gaussian_blur'))
            augmented_images.append((img, f'{original_image_name}_darken'))  # Implement darken function if needed

            # Apply combinations of two augmentations
            augmented_images.append((add_gaussian_noise(random_zoom(img)), f'{original_image_name}_gaussian_noise_zoom'))
            augmented_images.append((add_salt_and_pepper_noise(adjust_brightness_contrast(img)), f'{original_image_name}_salt_brightness_contrast'))
            augmented_images.append((add_salt_and_pepper_noise(random_zoom(img)), f'{original_image_name}_salt_zoom'))
        
        elif 50 <= class_size < 100:
            # Apply 6 different augmentations
            augmented_images.append((add_salt_and_pepper_noise(img), f'{original_image_name}_salt_and_pepper_noise'))
            augmented_images.append((add_gaussian_noise(img), f'{original_image_name}_gaussian_noise'))
            augmented_images.append((random_zoom(img), f'{original_image_name}_random_zoom'))
            augmented_images.append((adjust_brightness_contrast(img), f'{original_image_name}_brightness_contrast'))
            augmented_images.append((img.filter(ImageFilter.GaussianBlur(radius=2)), f'{original_image_name}_gaussian_blur'))


        elif 100 <= class_size < 120:
            # Apply 4 different augmentations
            augmented_images.append((add_salt_and_pepper_noise(img), f'{original_image_name}_salt_and_pepper_noise'))
            augmented_images.append((add_gaussian_noise(img), f'{original_image_name}_gaussian_noise'))
            augmented_images.append((img.filter(ImageFilter.GaussianBlur(radius=2)), f'{original_image_name}_gaussian_blur'))
            augmented_images.append((adjust_brightness_contrast(random_zoom(img)), f'{original_image_name}_brightness_contrast_random_zoom'))

        elif 120 <= class_size < 150:
            # Apply 4 different augmentations
            augmented_images.append((add_salt_and_pepper_noise(img), f'{original_image_name}_salt_and_pepper_noise'))
            augmented_images.append((add_gaussian_noise(img), f'{original_image_name}_gaussian_noise'))
            augmented_images.append((img.filter(ImageFilter.GaussianBlur(radius=2)), f'{original_image_name}_gaussian_blur'))

        elif 150 <= class_size < 200:
            # Apply 3 different augmentations
            augmented_images.append((add_salt_and_pepper_noise(img), f'{original_image_name}_salt_and_pepper_noise'))
            augmented_images.append((add_gaussian_noise(img), f'{original_image_name}_gaussian_noise'))
            augmented_images.append((img.filter(ImageFilter.GaussianBlur(radius=2)), f'{original_image_name}_gaussian_blur'))
            
            if MAX_CLASS_SIZE - class_size < len(augmented_images): break
        
        
            
        elif class_size>= 200:
            augmented_images.append((add_salt_and_pepper_noise(img), f'{original_image_name}_salt_and_pepper_noise'))
            augmented_images.append((add_gaussian_noise(img), f'{original_image_name}_gaussian_noise'))
            class_size += 2
            if class_size >= MAX_CLASS_SIZE: break
        
        '''if class_size >= MAX_CLASS_SIZE:
            print(f"Reached max class size for {class_name}, stopping augmentation.")
            break'''
        
    return augmented_images

def save_augmented_images(class_images, augmented_images, class_path):
    for img, img_name in augmented_images:
        # Convert image to RGB before saving if not already in RGB mode
        if img.mode != 'RGB':
            img = img.convert('RGB')
        
        img.save(f"{class_path}/{img_name}.jpg", 'JPEG')
# Example usage
def process_images_in_dataset(train_dir):
    class_names = os.listdir(train_dir)
    
    for class_name in class_names:
        class_path = os.path.join(train_dir, class_name)
        
        if os.path.isdir(class_path):
            class_images = [Image.open(os.path.join(class_path, img)) for img in os.listdir(class_path) if img.lower().endswith(('png', 'jpg', 'jpeg'))]
            class_size = len(class_images)
            
            # Apply augmentations based on class size
            augmented_images = apply_augmentation_to_class(class_images, class_size, class_name)
            
            # Save augmented images
            save_augmented_images(class_images, augmented_images, class_path)
            
            print(f"Processed {class_name} with {class_size} original images.")

# Run the processing
train_dir = r"C:\Users\Lenovo\Desktop\AI_Project\balanced_data\train"
process_images_in_dataset(train_dir)

  return cropped_img.resize((width, height), Image.ANTIALIAS)


Processed 0 with 102 original images.
Processed 1 with 36 original images.
Processed 10 with 94 original images.
Processed 11 with 181 original images.
Processed 12 with 87 original images.
Processed 13 with 93 original images.
Processed 14 with 110 original images.
Processed 15 with 34 original images.
Processed 16 with 165 original images.
Processed 17 with 144 original images.
Processed 18 with 168 original images.
Processed 19 with 392 original images.
Processed 2 with 83 original images.
Processed 20 with 19 original images.
Processed 21 with 22 original images.
Processed 22 with 24 original images.
Processed 23 with 23 original images.
Processed 24 with 86 original images.
Processed 25 with 19 original images.
Processed 26 with 198 original images.
Processed 27 with 32 original images.
Processed 28 with 408 original images.
Processed 29 with 44 original images.
Processed 3 with 251 original images.
Processed 30 with 136 original images.
Processed 31 with 38 original images.
Proce

In [None]:
import os
import pandas as pd

def rename_folders(base_dir, csv_file):
    """
    Rename folders based on a CSV mapping and replace spaces, parentheses, and slashes with underscores.

    Args:
    - base_dir (str): The base directory containing folders to rename.
    - csv_file (str): Path to the CSV file containing the old and new folder names.
    """
    # Load CSV file into a pandas DataFrame
    df = pd.read_csv(csv_file)

    # Iterate through each row in the DataFrame
    for _, row in df.iterrows():
        old_folder = str(row['ClassId'])  # Current folder name
        new_folder = row['Name']  # New folder name

        # Replace spaces, parentheses, and slashes with underscores
        old_folder = old_folder
        new_folder = new_folder.replace(" ", "_").replace("(", "_").replace(")", "_").replace("/", "_")

        # Ensure that consecutive underscores are replaced with a single underscore
        old_folder = old_folder.replace("__", "_")
        new_folder = new_folder.replace("__", "_")

        # Create the full path to the folder (make sure the paths are correct)
        old_folder_path = os.path.join(base_dir, old_folder)
        new_folder_path = os.path.join(base_dir, new_folder)

        # Debugging: Print paths to verify them
        #print(f"Attempting to rename: '{old_folder_path}' -> '{new_folder_path}'")

        # Check if the old folder exists before renaming
        if os.path.exists(old_folder_path):
            try:
                # Rename the folder
                os.rename(old_folder_path, new_folder_path)
                print(f"Renamed '{old_folder}' to '{new_folder}' in '{base_dir}'")
            except Exception as e:
                print(f"Error renaming '{old_folder}' to '{new_folder}': {e}")
        else:
            print(f"Folder '{old_folder}' not found in '{base_dir}'.")


base_dir1 = r"C:\Users\Lenovo\Desktop\AI_Project\dataset\TEST"  # Use raw string for Windows paths
base_dir2 = r"C:\Users\Lenovo\Desktop\AI_Project\balanced_data/train" 
base_dir3 = r"C:\Users\Lenovo\Desktop\AI_Project\balanced_data/val" 

csv_file = "labels.csv"


rename_folders(base_dir1, csv_file)
rename_folders(base_dir2, csv_file)
rename_folders(base_dir3, csv_file)


Renamed '0' to 'Speed_limit_5km_h_' in 'C:\Users\Lenovo\Desktop\AI_Project\dataset\TEST'
Renamed '1' to 'Speed_limit_15km_h_' in 'C:\Users\Lenovo\Desktop\AI_Project\dataset\TEST'
Renamed '2' to 'Speed_limit_30km_h_' in 'C:\Users\Lenovo\Desktop\AI_Project\dataset\TEST'
Renamed '3' to 'Speed_limit_40km_h_' in 'C:\Users\Lenovo\Desktop\AI_Project\dataset\TEST'
Renamed '4' to 'Speed_limit_50km_h_' in 'C:\Users\Lenovo\Desktop\AI_Project\dataset\TEST'
Renamed '5' to 'Speed_limit_60km_h_' in 'C:\Users\Lenovo\Desktop\AI_Project\dataset\TEST'
Renamed '6' to 'Speed_limit_70km_h_' in 'C:\Users\Lenovo\Desktop\AI_Project\dataset\TEST'
Renamed '7' to 'speed_limit_80km_h_' in 'C:\Users\Lenovo\Desktop\AI_Project\dataset\TEST'
Renamed '8' to 'Dont_Go_straight_or_left' in 'C:\Users\Lenovo\Desktop\AI_Project\dataset\TEST'
Renamed '9' to 'Unknown7' in 'C:\Users\Lenovo\Desktop\AI_Project\dataset\TEST'
Renamed '10' to 'Dont_Go_straight' in 'C:\Users\Lenovo\Desktop\AI_Project\dataset\TEST'
Renamed '11' to 'Do