In [None]:

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
import glob
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
import time
import warnings
warnings.filterwarnings('ignore')

In [None]:
current_dir = os.getcwd()
NOISY_BASE_DIR = os.path.join(current_dir, "content", "data", "noisy")

def load_images_and_labels_from_noise_dirs(base_dir, expected_shape=(224, 224, 3), max_images_per_dir=2500):
    """Load up to `max_images_per_dir` images per noise type and label by directory name"""
    images = []
    labels = []

    for noise_label in os.listdir(base_dir):
        noise_dir = os.path.join(base_dir, noise_label)
        if not os.path.isdir(noise_dir):
            continue

        image_files = glob.glob(os.path.join(noise_dir, "*.png"))
        image_files = image_files[:max_images_per_dir]  # Limit number of images

        print(f"Loading {len(image_files)} images from {noise_label}")

        for img_path in image_files:
            try:
                img = Image.open(img_path).convert('RGB')
                img_array = np.array(img)

                if img_array.shape != expected_shape:
                    # print(f"Skipping {img_path} due to unexpected shape: {img_array.shape}")
                    continue

                images.append(img_array)
                labels.append(noise_label)

            except Exception as e:
                print(f"Error loading {img_path}: {e}")

    return np.array(images), np.array(labels)

X, y = load_images_and_labels_from_noise_dirs(NOISY_BASE_DIR, max_images_per_dir=1000)

print(f"Total images loaded: {len(X)}")
print(f"Image shape: {X[0].shape}")
print(f"Unique labels (noise types): {np.unique(y)}")

Loading 1000 images from gaussian_poisson
Loading 1000 images from poisson_speckle
Loading 1000 images from gaussian_uniform
Loading 1000 images from gaussian_salt_pepper
Loading 1000 images from poisson
Loading 1000 images from gaussian_speckle
Loading 1000 images from salt_pepper_uniform
Loading 1000 images from uniform
Loading 1000 images from gaussian_salt_pepper_speckle
Loading 1000 images from speckle_uniform
Loading 1000 images from salt_pepper
Loading 1000 images from gaussian
Loading 1000 images from salt_pepper_speckle_uniform
Loading 1000 images from poisson_uniform
Loading 1000 images from speckle
Loading 1000 images from gaussian_poisson_uniform
Loading 1000 images from salt_pepper_speckle
Total images loaded: 16949
Image shape: (224, 224, 3)
Unique labels (noise types): ['gaussian' 'gaussian_poisson' 'gaussian_poisson_uniform'
 'gaussian_salt_pepper' 'gaussian_salt_pepper_speckle' 'gaussian_speckle'
 'gaussian_uniform' 'poisson' 'poisson_speckle' 'poisson_uniform'
 'salt_

In [4]:
# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

In [None]:
def extract_features(images):
    """Extract features from images for ML classifiers"""
    features = []

    for img in images:
        # Flatten image

        # Calculate basic statistics
        mean_rgb = np.mean(img, axis=(0, 1))
        std_rgb = np.std(img, axis=(0, 1))

        # Calculate histogram features
        hist_r = np.histogram(img[:,:,0], bins=32, range=(0, 255))[0]
        hist_g = np.histogram(img[:,:,1], bins=32, range=(0, 255))[0]
        hist_b = np.histogram(img[:,:,2], bins=32, range=(0, 255))[0]

        # Combine features
        feature_vector = np.concatenate([
            mean_rgb, std_rgb, hist_r, hist_g, hist_b
        ])

        features.append(feature_vector)

    return np.array(features)

print("Extracting features for ML classifiers...")
X_features = extract_features(X)

Extracting features for ML classifiers...


In [9]:
# Apply PCA to reduce dimensionality
print("Applying PCA for dimensionality reduction...")
pca = PCA(n_components=50)
X_pca = pca.fit_transform(X_features)

print(f"Original feature shape: {X_features.shape}")
print(f"PCA feature shape: {X_pca.shape}")
print(f"Explained variance ratio: {pca.explained_variance_ratio_[:5]}")

Applying PCA for dimensionality reduction...
Original feature shape: (33881, 102)
PCA feature shape: (33881, 50)
Explained variance ratio: [0.27917821 0.23148427 0.11516947 0.07507437 0.03913307]


In [10]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X_pca, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(f"Training set: {X_train_scaled.shape}")
print(f"Test set: {X_test_scaled.shape}")

Training set: (27104, 50)
Test set: (6777, 50)


In [None]:
classifiers = {
    'SVM': SVC(kernel='rbf', random_state=42),
    'Decision Tree': DecisionTreeClassifier(random_state=42, max_depth=10),
    'Logistic Regression': LogisticRegression(random_state=42, max_iter=1000),
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42, max_depth=15)
}

ml_results = {}

for name, clf in classifiers.items():
    print(f"\n{'='*50}")
    print(f"Training {name}...")
    print(f"{'='*50}")

    start_time = time.time()

    clf.fit(X_train_scaled, y_train)

    y_pred = clf.predict(X_test_scaled)

    accuracy = accuracy_score(y_test, y_pred)

    training_time = time.time() - start_time

    print(f"{name} Accuracy: {accuracy:.4f}")
    print(f"Training time: {training_time:.2f} seconds")

    ml_results[name] = {
        'accuracy': accuracy,
        'training_time': training_time,
        'predictions': y_pred
    }

    print(f"\nClassification Report for {name}:")
    print(classification_report(y_test, y_pred,
                              target_names=label_encoder.classes_))



Training SVM...
SVM Accuracy: 0.8054
Training time: 40.60 seconds

Classification Report for SVM:
                              precision    recall  f1-score   support

                    gaussian       0.55      0.61      0.58       398
            gaussian_poisson       0.92      0.96      0.94       398
    gaussian_poisson_uniform       0.84      0.92      0.88       399
        gaussian_salt_pepper       0.95      0.98      0.97       399
gaussian_salt_pepper_speckle       0.91      0.98      0.94       399
            gaussian_speckle       0.64      0.68      0.66       398
            gaussian_uniform       0.80      0.74      0.77       399
                     poisson       0.96      0.90      0.93       399
             poisson_speckle       0.85      0.90      0.87       399
             poisson_uniform       0.83      0.84      0.84       399
                 salt_pepper       0.89      0.93      0.91       399
         salt_pepper_speckle       0.85      0.79      0.82 