# New Section

In [4]:
import os
import cv2  # For image processing
import numpy as np
import zipfile
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.decomposition import PCA  # Dimensionality reduction for image data

# Define image directory path and image size
# Use local path instead of Google Colab
# The 'Images' folder should be in the same directory as this notebook
image_folder_path = 'Images'  # Change this path if your Images folder is elsewhere

size = (128, 128)

# Lists to store images and labels
images = []
labels = []  # Assuming all images contain stop signs (label = 1)

# Load, process, and append images
if os.path.exists(image_folder_path):
    for file_name in os.listdir(image_folder_path):
        if file_name.endswith(('.jpg', '.png')):
            img_path = os.path.join(image_folder_path, file_name)
            img = cv2.imread(img_path)  # Load image
            if img is not None: # Check if image was loaded successfully
                gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  # Convert to grayscale
                gray_img = cv2.resize(gray_img, size)  # Resize to 128x128
                gray_img = gray_img / 255.0  # Normalize pixel values to [0, 1]
                images.append(gray_img.flatten())  # Flatten image into 1D vector
                labels.append(1)
            else:
                print(f"Warning: Could not load image: {img_path}")
else:
    print(f"Error: Image folder not found at '{image_folder_path}'")
    print(f"Current directory: {os.getcwd()}")

# Convert to numpy arrays
X = np.array(images)
y = np.array(labels)

# Check if we have images
if len(X) == 0:
    print("Error: No images were loaded. Please check the image folder path and image files.")
else:
    # Split the data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Dimensionality reduction with PCA (optional but recommended for high-dimensional data)
    # Dynamically set n_components to be less than min(n_samples, n_features)
    max_components = min(X_train.shape[0], X_train.shape[1])
    n_components = min(38, max_components)  # Use 38 or the max available, whichever is smaller
    
    print(f"Training set shape: {X_train.shape}")
    print(f"Using {n_components} PCA components")
    
    pca = PCA(n_components=n_components)
    X_train_pca = pca.fit_transform(X_train)
    X_test_pca = pca.transform(X_test)


    # Train a Random Forest classifier
    clf = RandomForestClassifier(n_estimators=100, random_state=42)
    clf.fit(X_train_pca, y_train)

    # Predict on the test set
    y_pred = clf.predict(X_test_pca)

    # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {accuracy * 100:.2f}%")
    print("Classification Report:")
    print(classification_report(y_test, y_pred))

Training set shape: (26, 16384)
Using 26 PCA components
Accuracy: 100.00%
Classification Report:
              precision    recall  f1-score   support

           1       1.00      1.00      1.00         7

    accuracy                           1.00         7
   macro avg       1.00      1.00      1.00         7
weighted avg       1.00      1.00      1.00         7

