## Library


In [15]:
import os
import random
from pathlib import Path
from io import BytesIO
from urllib.request import urlopen
from tqdm import tqdm
from rembg import remove
from PIL import Image
import matplotlib.pyplot as plt


## Specify the path to your downloaded image dataset


In [16]:
import os
from PIL import Image
import matplotlib.pyplot as plt
import random

# Function to load and display a few samples from a folder
def display_samples(folder_path, num_samples=1):
    for root, dirs, files in os.walk(folder_path):
        if not dirs:  # If the current folder has no subdirectories (innermost folder)
            current_folder_path = root
            image_files = [f for f in files if f.endswith(('.jpg', '.jpeg', '.png'))]

            # Choose random images from the folder
            sample_images = random.sample(image_files, min(num_samples, len(image_files)))

            # Display sample images
            for image_name in sample_images:
                image_path = os.path.join(current_folder_path, image_name)
                image = Image.open(image_path)

                # Get image dimensions
                width, height = image.size

                # Check if the image is RGB or grayscale
                image_type = "RGB" if image.mode == "RGB" else "Grayscale"

                # Display image information
                print(f"Folder: {os.path.basename(current_folder_path)}")
                print(f"Image Dimensions: {width} x {height}")
                print(f"Image Type: {image_type}")

                # Display the image
                plt.imshow(image)
                plt.title(f"Folder: {os.path.basename(current_folder_path)}")
                plt.show()

# Specify the path to your dataset
dataset_folder_path = '/Users/aditya/Desktop/Class/DS675Machine Learning/Project/DATASET/RAW'

# Display a few samples from the innermost folders
#display_samples(dataset_folder_path, num_samples=1)


## Removing Background
 

In [11]:

# Function to remove background using rembg
def remove_background_with_rembg(image_path):
    with open(image_path, "rb") as input_file:
        input_data = input_file.read()

    output_data = rembg.remove(input_data)

    # Convert output_data to a PIL Image
    output_image = Image.open(BytesIO(output_data))

    # Convert to RGB mode if the image has an alpha channel
    if output_image.mode == 'RGBA':
        output_image = output_image.convert('RGB')

    return output_image



# Process all images in the directory and its subdirectories
input_directory = dataset_folder_path
output_directory = '/Users/aditya/Desktop/Class/DS675Machine Learning/Project/DATASET/NBRAW'

for root, dirs, files in os.walk(input_directory):
    for item in files:
        item_path = os.path.join(root, item)

        # Check if the item has a common image extension (you can extend the list if needed)
        if item.lower().endswith(('.png', '.jpg', '.jpeg')):
            processed_image = remove_background_with_rembg(item_path)

            # Create the corresponding output subdirectory
            relative_path = os.path.relpath(item_path, input_directory)
            output_subdirectory = os.path.join(output_directory, os.path.dirname(relative_path))
            os.makedirs(output_subdirectory, exist_ok=True)

            # Save the processed image with a new filename
            output_filename = os.path.join(output_subdirectory, f"processed_{item}")
            processed_image.save(output_filename)



'\n# Process all images in the directory and its subdirectories\ninput_directory = dataset_folder_path\noutput_directory = \'/Users/aditya/Desktop/Class/DS675Machine Learning/Project/DATASET/NBRAW\'\n\nfor root, dirs, files in os.walk(input_directory):\n    for item in files:\n        item_path = os.path.join(root, item)\n\n        # Check if the item has a common image extension (you can extend the list if needed)\n        if item.lower().endswith((\'.png\', \'.jpg\', \'.jpeg\')):\n            processed_image = remove_background_with_rembg(item_path)\n\n            # Create the corresponding output subdirectory\n            relative_path = os.path.relpath(item_path, input_directory)\n            output_subdirectory = os.path.join(output_directory, os.path.dirname(relative_path))\n            os.makedirs(output_subdirectory, exist_ok=True)\n\n            # Save the processed image with a new filename\n            output_filename = os.path.join(output_subdirectory, f"processed_{item}")\

## Pre Processing

In [25]:
import os
from PIL import Image
import random

# Function to preprocess and save samples without data augmentation
def preprocess_and_save_images(input_folder, output_folder, target_size=(256, 256)):
    total_images = 0
    total_processed_images = 0

    for root, dirs, files in os.walk(input_folder):
        for folder in dirs:
            current_folder_path = os.path.join(root, folder)
            output_folder_path = os.path.join(output_folder, folder)

            # Create the output folder for the current class if it doesn't exist
            os.makedirs(output_folder_path, exist_ok=True)

            for image_name in os.listdir(current_folder_path):
                total_images += 1

                input_image_path = os.path.join(current_folder_path, image_name)
                output_image_path = os.path.join(output_folder_path, f"{folder}_{image_name}")

                # Preprocess and save the image
                if image_process(input_image_path, output_image_path, target_size):
                    total_processed_images += 1

    # Save statistics in a text file
    save_stats(total_images, total_processed_images)

# Function to preprocess and save a single image
def image_process(input_image_path, output_image_path, target_size=(256, 256)):
    try:
        # Open the image
        image = Image.open(input_image_path)

        # Resize the image
        resized_image = image.resize(target_size)

        # Convert the image to grayscale
        grayscale_image = resized_image.convert("L")

        # Remove random noise (example: salt and pepper noise)
        noisy_image = grayscale_image.point(lambda p: p + random.choice([-50, 0, 50]) if random.random() < 0.05 else p)

        # Save the preprocessed image without compression
        noisy_image.save(output_image_path)

        return True
    except Exception as e:
        print(f"Error processing {input_image_path}: {str(e)}")
        return False

# Function to save statistics in a text file
def save_stats(total_images, total_processed_images):
    stats_path = "preprocessing_stats.txt"
    with open(stats_path, "w") as stats_file:
        stats_file.write(f"Total Images: {total_images}\n")
        stats_file.write(f"Total Processed Images: {total_processed_images}\n")

# Specify the paths to the original images and the output folder
original_images_folder_path = '/Users/aditya/Desktop/Class/DS675Machine Learning/Project/DATASET/NBRAW'
output_folder_path = '/Users/aditya/Desktop/Class/DS675Machine Learning/Project/DATASET/PRO'
# Preprocess and save all images without data augmentation and organize them into folders by class
preprocess_and_save_images(original_images_folder_path, output_folder_path)


Error processing /Users/aditya/Desktop/Class/DS675Machine Learning/Project/DATASET/NBRAW/Train_Alphabet/.DS_Store: cannot identify image file '/Users/aditya/Desktop/Class/DS675Machine Learning/Project/DATASET/NBRAW/Train_Alphabet/.DS_Store'
Error processing /Users/aditya/Desktop/Class/DS675Machine Learning/Project/DATASET/NBRAW/Train_Alphabet/R: [Errno 21] Is a directory: '/Users/aditya/Desktop/Class/DS675Machine Learning/Project/DATASET/NBRAW/Train_Alphabet/R'
Error processing /Users/aditya/Desktop/Class/DS675Machine Learning/Project/DATASET/NBRAW/Train_Alphabet/U: [Errno 21] Is a directory: '/Users/aditya/Desktop/Class/DS675Machine Learning/Project/DATASET/NBRAW/Train_Alphabet/U'
Error processing /Users/aditya/Desktop/Class/DS675Machine Learning/Project/DATASET/NBRAW/Train_Alphabet/I: [Errno 21] Is a directory: '/Users/aditya/Desktop/Class/DS675Machine Learning/Project/DATASET/NBRAW/Train_Alphabet/I'
Error processing /Users/aditya/Desktop/Class/DS675Machine Learning/Project/DATASET/N

# Model

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define constants
input_shape = (256, 256, 1)  # Adjust based on your preprocessed image dimensions
num_classes = 5  # Number of classes in your dataset

# Function to build the CNN model
def build_cnn_model():
    model = models.Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(128, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Flatten())
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dropout(0.5))  # Adding dropout for regularization
    model.add(layers.Dense(num_classes, activation='softmax'))

    return model

# Function to compile the model
def compile_model(model):
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

# Function to apply data augmentation
def apply_data_augmentation(train_dir):
    train_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest'
    )
    return train_datagen.flow_from_directory(
        train_dir,
        target_size=(input_shape[0], input_shape[1]),
        batch_size=32,
        color_mode='grayscale',
        class_mode='categorical'
    )

# Function to train the model
def train_model(model, train_generator, epochs=10):
    model.fit(train_generator, epochs=epochs)

# Function to evaluate the model
def evaluate_model(model, test_generator):
    return model.evaluate(test_generator)

# Specify the path to the preprocessed training images
preprocessed_train_images_folder_path = '/Users/aditya/Desktop/Class/DS675Machine Learning/Project/DATASET/SPLIT/train'

# Apply data augmentation
train_generator = apply_data_augmentation(preprocessed_train_images_folder_path)

# Build the CNN model
cnn_model = build_cnn_model()

# Compile the model
compile_model(cnn_model)

# Train the model
train_model(cnn_model, train_generator, epochs=10)

# Specify the path to the test images
test_dataset_folder_path = '/Users/aditya/Desktop/Class/DS675Machine Learning/Project/DATASET/SPLIT/test'

# Apply data augmentation to the test set
test_generator = apply_data_augmentation(test_dataset_folder_path)

# Evaluate the model on the test set
evaluation_result = evaluate_model(cnn_model, test_generator)
print("Evaluation Result:", evaluation_result)

Found 3600 images belonging to 5 classes.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10

KeyboardInterrupt: 