### Performing Degradation Types on Original Images

In [None]:
import os
from PIL import Image, ImageEnhance
import numpy as np
import cv2

# Define the folder path containing the images and the output folder
folder_path = r"C:\Users\abhis\Downloads\images"
output_folder = r"C:\Users\abhis\Downloads\degraded_images"

# Create subfolders for each effect
effects = [
    "low_illumination", "high_contrast", "hazy", 
    "blurry", "noisy", "red_tint", "blue_tint", "green_tint"
]
for effect in effects:
    os.makedirs(os.path.join(output_folder, effect), exist_ok=True)

# Get a list of all image files in the folder
image_files = [f for f in os.listdir(folder_path) if f.endswith(('png', 'jpg', 'jpeg', 'bmp', 'gif'))]

# Function to apply low illumination effect
def degrade_low_illumination(image):
    factor = 0.5  # Reduces brightness to 50%
    enhancer = ImageEnhance.Brightness(image)
    return enhancer.enhance(factor), "low_illumination"

# Function to apply high contrast effect
def degrade_high_contrast(image):
    factor = 2.0  # Increases contrast by a factor of 2
    enhancer = ImageEnhance.Contrast(image)
    return enhancer.enhance(factor), "high_contrast"

# Function to apply hazy effect
def degrade_hazy(image):
    image_array = np.array(image)
    hazy_image = cv2.addWeighted(image_array, 0.5, np.ones(image_array.shape, image_array.dtype) * 255, 0.5, 0)
    return Image.fromarray(hazy_image), "hazy"

# Function to apply blurry effect
def degrade_blurry(image):
    image_array = np.array(image)
    blurry_image = cv2.GaussianBlur(image_array, (15, 15), 0)
    return Image.fromarray(blurry_image), "blurry"

# Function to apply noisy effect
def degrade_noisy(image):
    image_array = np.array(image)
    noise = np.random.normal(0, 25, image_array.shape).astype(np.uint8)
    noisy_image = cv2.add(image_array, noise)
    return Image.fromarray(noisy_image), "noisy"

# Function to apply red tint effect
def degrade_red_tint(image):
    image_array = np.array(image)
    red_tint = np.zeros_like(image_array)
    red_tint[:, :, 0] = 100  # Adding red tint
    red_tinted_image = cv2.add(image_array, red_tint)
    return Image.fromarray(red_tinted_image), "red_tint"

# Function to apply blue tint effect
def degrade_blue_tint(image):
    image_array = np.array(image)
    blue_tint = np.zeros_like(image_array)
    blue_tint[:, :, 2] = 100  # Adding blue tint
    blue_tinted_image = cv2.add(image_array, blue_tint)
    return Image.fromarray(blue_tinted_image), "blue_tint"

# Function to apply green tint effect
def degrade_green_tint(image):
    image_array = np.array(image)
    green_tint = np.zeros_like(image_array)
    green_tint[:, :, 1] = 100  # Adding green tint
    green_tinted_image = cv2.add(image_array, green_tint)
    return Image.fromarray(green_tinted_image), "green_tint"

# List of degradation functions
degradation_functions = [
    degrade_low_illumination,
    degrade_high_contrast,
    degrade_hazy,
    degrade_blurry,
    degrade_noisy,
    degrade_red_tint,
    degrade_blue_tint,
    degrade_green_tint
]

# Apply all degradation effects to each image in the folder
for image_file in image_files:
    image_path = os.path.join(folder_path, image_file)
    original_image = Image.open(image_path)

    for func in degradation_functions:
        degraded_image, effect_name = func(original_image)
        effect_folder = os.path.join(output_folder, effect_name)
        degraded_image_path = os.path.join(effect_folder, f"{os.path.splitext(image_file)[0]}_{effect_name}{os.path.splitext(image_file)[1]}")
        degraded_image.save(degraded_image_path)

print("All images have been processed and saved with degradation effects.")


This Python script applies various image degradation effects to all image files in a specified folder and saves the processed images into subfolders organized by effect type. It first sets up the directory paths for the input and output, and creates subfolders for each type of degradation effect: low illumination, high contrast, hazy, blurry, noisy, and red, blue, and green tints. The script defines functions to apply each effect using the Python Imaging Library (PIL) and OpenCV. The effects are achieved by manipulating image brightness, contrast, applying Gaussian blur, adding random noise, and overlaying color tints. The script then iterates over each image in the input folder, applying all defined degradation functions, and saving the modified images into the corresponding subfolders within the output directory. Once processed, each image is saved with a filename indicating the applied effect.

### 1. Feature Extractor using PCA

In [None]:
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
import time
import random

# Define directories
directories = [
    "C:/Users/abhis/Downloads/degraded_images/green_tint",
    "C:/Users/abhis/Downloads/degraded_images/blue_tint",
    "C:/Users/abhis/Downloads/degraded_images/red_tint",
    "C:/Users/abhis/Downloads/degraded_images/noisy",
    "C:/Users/abhis/Downloads/degraded_images/blurry",
    "C:/Users/abhis/Downloads/degraded_images/hazy",
    "C:/Users/abhis/Downloads/degraded_images/high_contrast",
    "C:/Users/abhis/Downloads/degraded_images/low_illumination"
]

# Function to extract PCA color features
def extract_pca_color_features(image, max_components=10):
    # Convert to RGB and normalize
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    pixels = image_rgb.reshape(-1, 3).astype(float) / 255.0
    
    # Calculate the maximum number of components
    n_samples, n_features = pixels.shape
    n_components = min(max_components, n_samples, n_features)
    
    # Apply PCA
    pca = PCA(n_components=n_components)
    pca.fit(pixels)
    
    # Pad the components and explained variance ratio if necessary
    components = np.pad(pca.components_.flatten(), (0, max_components * 3 - len(pca.components_.flatten())))
    explained_variance_ratio = np.pad(pca.explained_variance_ratio_, (0, max_components - len(pca.explained_variance_ratio_)))
    
    # Return both components and explained variance ratio
    return np.concatenate((components, explained_variance_ratio))

# List to hold all extracted features and labels
all_features = []
all_labels = []

start_time = time.time()
processed_images = 0

for folder in directories:
    print(f"Processing folder: {folder}")
    for filename in os.listdir(folder):
        filepath = os.path.join(folder, filename)
        try:
            # Read the image
            image = cv2.imread(filepath)
            if image is not None:
                # Extract PCA features
                pca_features = extract_pca_color_features(image)
                all_features.append(pca_features)
                all_labels.append(os.path.basename(folder))
                processed_images += 1
                print(f"Processed {filename} successfully.")
            else:
                print(f"Failed to read {filename}.")
        except Exception as e:
            print(f"Error processing {filename}: {e}")
            continue  # Skip to the next image

        # Print progress update
        elapsed_time = time.time() - start_time
        if processed_images > 0 and elapsed_time > 0:
            print(f"Current processing speed: {processed_images / elapsed_time:.2f} images/sec")
        else:
            print("Processing speed cannot be calculated yet.")

    print(f"Finished processing folder: {folder}")

# Save the extracted features and labels
np.save("PCAextractedcolor.npy", np.array(all_features))
np.save("PCA_labels.npy", np.array(all_labels))

print("Feature extraction complete.")

# Print 10 random images with their PCA features
sample_indices = random.sample(range(len(all_features)), min(10, len(all_features)))
for idx in sample_indices:
    print(f"Image: {all_labels[idx]}")
    print(f"PCA Features: {all_features[idx]}")

    # Visualization (optional)
    img_folder = [d for d in directories if os.path.basename(d) == all_labels[idx]][0]
    img_files = os.listdir(img_folder)
    if img_files:
        img_path = os.path.join(img_folder, img_files[0])
        plt.imshow(cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB))
        plt.title(f"PCA Features for {os.path.basename(img_path)}")
        plt.show()
    else:
        print(f"No images found in {img_folder}")



### Classifier Using PCA 

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.pipeline import Pipeline
from sklearn.ensemble import VotingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier

# Load features and labels
features = np.load("PCAextractedcolor.npy")
labels = np.load("PCA_labels.npy")

# Check the unique labels to ensure they are correct
print("Unique labels:", np.unique(labels))

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42, stratify=labels)

# Define classifiers with hyperparameter grids
classifiers = {
    "Gaussian Naive Bayes": (GaussianNB(), {}),
    "K-Nearest Neighbors": (KNeighborsClassifier(), {'classifier__n_neighbors': [3, 5, 7, 9]}),
    "Logistic Regression": (LogisticRegression(max_iter=1000), {'classifier__C': [0.1, 1, 10]}),
    "SVM": (SVC(), {'classifier__C': [0.1, 1, 10], 'classifier__kernel': ['rbf', 'linear']}),
    "MLP": (MLPClassifier(max_iter=1000), {'classifier__hidden_layer_sizes': [(50,), (100,), (50, 50)]}),
    "Random Forest": (RandomForestClassifier(), {'classifier__n_estimators': [50, 100, 200]}),
    "Gradient Boosting": (GradientBoostingClassifier(), {'classifier__n_estimators': [50, 100, 200], 'classifier__learning_rate': [0.01, 0.1]}),
    "Decision Tree": (DecisionTreeClassifier(), {'classifier__max_depth': [5, 10, None]})
}

# Create a pipeline with feature selection, scaling, and classifier
def create_pipeline(clf):
    return Pipeline([
        ('feature_selection', SelectKBest(f_classif, k=20)),
        ('scaler', StandardScaler()),
        ('classifier', clf)
    ])

# Train and evaluate each classifier
best_classifiers = {}

for name, (clf, param_grid) in classifiers.items():
    print(f"Training {name}...")
    pipeline = create_pipeline(clf)
    grid_search = GridSearchCV(pipeline, param_grid, cv=5, n_jobs=-1)
    grid_search.fit(X_train, y_train)
    
    y_pred = grid_search.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"{name} accuracy: {accuracy:.2f}")
    print(classification_report(y_test, y_pred))
    
    best_classifiers[name] = (grid_search.best_estimator_, accuracy)

# Create a voting classifier
voting_clf = VotingClassifier(
    estimators=[(name, clf) for name, (clf, _) in best_classifiers.items()],
    voting='soft'
)

voting_clf.fit(X_train, y_train)
y_pred_voting = voting_clf.predict(X_test)
voting_accuracy = accuracy_score(y_test, y_pred_voting)

print("\nVoting Classifier Results:")
print(f"Accuracy: {voting_accuracy:.2f}")
print(classification_report(y_test, y_pred_voting))

# Find the best individual classifier
best_classifier = max(best_classifiers.items(), key=lambda x: x[1][1])
print(f"\nBest individual classifier: {best_classifier[0]} with accuracy {best_classifier[1][1]:.2f}")

PCA extracted

* Gaussian Naive Bayes accuracy: 0.35
* K-Nearest Neighbors accuracy: 0.38
* Logistic Regression accuracy: 0.41
* SVM accuracy: 0.45
* MLP accuracy: 0.48
* Random Forest accuracy: 0.45
* Gradient Boosting accuracy: 0.47
* Decision Tree accuracy: 0.43
* Bayes accuracy : 0.32


Output Explanation

Gaussian Naive Bayes (35% accuracy): This classifier assumes feature independence, which might not hold well with PCA features as they capture complex patterns. The low accuracy suggests that this assumption limits its performance.

K-Nearest Neighbors (38% accuracy): KNN's performance is better but still low because it relies heavily on the notion of proximity in feature space, which can be distorted with PCA-transformed data.

Logistic Regression (41% accuracy): Logistic regression provides a moderate accuracy by using a linear decision boundary. The accuracy indicates some capability in handling PCA features but still struggles with non-linear patterns.

SVM (45% accuracy): Support Vector Machine offers a higher accuracy by finding optimal hyperplanes, which helps it capture more complex relationships in the data than linear classifiers.

MLP (48% accuracy): The Multi-Layer Perceptron, a type of neural network, performs relatively well as it can model non-linear relationships in the data, benefiting from deeper network architectures.

Random Forest (45% accuracy): Random Forest performs similarly to SVM by using ensemble learning to handle complex feature interactions, but it might still struggle with high-dimensional data like PCA features.

Gradient Boosting (47% accuracy): This method incrementally improves model predictions and manages to capture intricate patterns in the data, leading to better performance compared to simpler models.

Decision Tree (43% accuracy): While simple and interpretable, Decision Trees have lower accuracy because they are prone to overfitting, especially in high-dimensional spaces without ensemble techniques.

### 2. Feature Extractor using HOG 

In [None]:
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
from skimage.feature import hog
import time
import random

# Define directories
directories = [
    "C:/Users/abhis/Downloads/degraded_images/green_tint",
    "C:/Users/abhis/Downloads/degraded_images/blue_tint",
    "C:/Users/abhis/Downloads/degraded_images/red_tint",
    "C:/Users/abhis/Downloads/degraded_images/noisy",
    "C:/Users/abhis/Downloads/degraded_images/blurry",
    "C:/Users/abhis/Downloads/degraded_images/hazy",
    "C:/Users/abhis/Downloads/degraded_images/high_contrast",
    "C:/Users/abhis/Downloads/degraded_images/low_illumination"
]

# Function to extract HOG color features
def extract_hog_color_features(image, pixels_per_cell=(8, 8), cells_per_block=(2, 2), orientations=9):
    image_resized = cv2.resize(image, (128, 128))
    image_rgb = cv2.cvtColor(image_resized, cv2.COLOR_BGR2RGB)
    
    hog_features = []
    for channel in range(3):  # Extract HOG for each color channel
        channel_hog = hog(
            image_rgb[:,:,channel], 
            pixels_per_cell=pixels_per_cell,
            cells_per_block=cells_per_block,
            orientations=orientations,
            visualize=False
        )
        hog_features.extend(channel_hog)
    
    return np.array(hog_features)

# List to hold all extracted features and labels
all_features = []
all_labels = []

start_time = time.time()
processed_images = 0

for folder in directories:
    print(f"Processing folder: {folder}")
    for filename in os.listdir(folder):
        filepath = os.path.join(folder, filename)
        try:
            # Read the image
            image = cv2.imread(filepath)
            if image is not None:
                # Extract HOG features
                hog_features = extract_hog_color_features(image)
                all_features.append(hog_features)
                all_labels.append(os.path.basename(folder))  # Use folder name as label
                processed_images += 1
                print(f"Processed {filename} successfully.")
            else:
                print(f"Failed to read {filename}.")
        except Exception as e:
            print(f"Error processing {filename}: {e}")

        # Print progress update
        elapsed_time = time.time() - start_time
        if elapsed_time > 0:
            print(f"Current processing speed: {processed_images / elapsed_time:.2f} images/sec")

    print(f"Finished processing folder: {folder}")

# Save the extracted features and labels
np.save("HOGextractedcolor.npy", np.array(all_features))
np.save("HOG_labels.npy", np.array(all_labels))

print("Feature extraction complete.")

# Print 10 random images with their HOG features
sample_indices = random.sample(range(len(all_features)), min(10, len(all_features)))
for idx in sample_indices:
    print(f"Image: {all_labels[idx]}")
    print(f"HOG Features shape: {all_features[idx].shape}")
    print(f"First few HOG Features: {all_features[idx][:10]}")

    # Visualization (optional)
    img_folder = [d for d in directories if os.path.basename(d) == all_labels[idx]][0]
    img_files = os.listdir(img_folder)
    if img_files:
        img_path = os.path.join(img_folder, img_files[0])
        plt.imshow(cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB))
        plt.title(f"HOG Features for {os.path.basename(img_path)}")
        plt.show()
    else:
        print(f"No images found in {img_folder}")

### Classifier for HOG

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import SGDClassifier, LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
import time

# Load features and labels
print("Loading features and labels...")
features = np.load("HOGextractedcolor.npy", mmap_mode='r')  # Memory-mapped mode
labels = np.load("HOG_labels.npy")

# Verify unique labels
unique_labels = np.unique(labels)
print(f"Unique labels: {unique_labels}")

# Encode labels
print("Encoding labels...")
le = LabelEncoder()
labels_encoded = le.fit_transform(labels)

# Split data
print("Splitting data into training and testing sets...")
X_train, X_test, y_train, y_test = train_test_split(features, labels_encoded, test_size=0.2, random_state=42)

# Define classifiers
classifiers = {
    "Gaussian Naive Bayes": GaussianNB(),
    "K-Nearest Neighbors": KNeighborsClassifier(n_neighbors=5, algorithm='auto', weights='distance'),
    "SGD Classifier": SGDClassifier(max_iter=1000, tol=1e-3),
    "Linear SVM": SVC(kernel='linear', max_iter=2000),
    "MLP": MLPClassifier(hidden_layer_sizes=(100,), max_iter=1000),
    "Random Forest": RandomForestClassifier(n_estimators=100),
    "Decision Tree": DecisionTreeClassifier(),
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Gradient Boosting": GradientBoostingClassifier(n_estimators=100)
}

# Function to process data in batches
def batch_predict(clf, X, batch_size=1000):
    y_pred = []
    for i in range(0, X.shape[0], batch_size):
        batch = X[i:i+batch_size]
        y_pred.extend(clf.predict(batch))
    return np.array(y_pred)

# Train and evaluate each classifier
results = []
for name, clf in classifiers.items():
    print(f"\nTraining {name}...")
    start_time = time.time()
    
    pipeline = Pipeline([
        ('scaler', StandardScaler()),
        ('classifier', clf)
    ])
    
    # Fit the pipeline
    pipeline.fit(X_train, y_train)
    print(f"Model {name} trained in {time.time() - start_time:.2f} seconds.")
    
    # Predict in batches
    print(f"Predicting with {name}...")
    y_pred = batch_predict(pipeline, X_test)
    
    accuracy = accuracy_score(y_test, y_pred)
    print(f"{name} accuracy: {accuracy:.2f}")
    print(classification_report(y_test, y_pred, target_names=le.classes_))
    
    # Cross-validation score
    print(f"Performing cross-validation for {name}...")
    cv_scores = cross_val_score(pipeline, X_train, y_train, cv=5)
    print(f"Cross-validation accuracy: {cv_scores.mean():.2f} (+/- {cv_scores.std() * 2:.2f})")
    
    results.append((name, accuracy))

print("\nClassification complete.")

# Find the best classifier
best_classifier, best_accuracy = max(results, key=lambda x: x[1])
print(f"\nBest classifier: {best_classifier} with accuracy {best_accuracy:.2f}")


* Gaussian Naive Bayes accuracy: 0.54
* K-Nearest Neighbors accuracy: 0.17
* SGD Classifier accuracy: 0.55
* Linear SVM accuracy: 0.61
* MLP accuracy: 0.65
* Random Forest accuracy: 0.48
* Decision Tree accuracy: 0.40
* Logistic Regression accuracy: 0.58
* Gradient boost - 0.69


* Output Explanation
  
Gaussian Naive Bayes (54% accuracy): This classifier assumes that features are independent. It performs moderately well because HOG features, which focus on local gradients, can somewhat meet this assumption, but it still lacks in modeling complex interactions.

K-Nearest Neighbors (17% accuracy): KNN performs poorly in this setup, likely due to the high dimensionality of HOG features, which can lead to the "curse of dimensionality," where distances become less meaningful.

SGD Classifier (55% accuracy): The Stochastic Gradient Descent Classifier shows moderate performance. It benefits from fast training on large datasets but may struggle with non-linear data patterns.

Linear SVM (61% accuracy): This classifier uses a linear decision boundary, which works well with HOG features capturing edges and gradients, resulting in good performance for this task.

MLP (65% accuracy): The Multi-Layer Perceptron, a neural network, handles non-linear patterns effectively, benefiting from its ability to learn complex feature interactions, leading to relatively high accuracy.

Random Forest (48% accuracy): Random Forest uses ensemble learning to model feature interactions but might struggle with the high dimensionality of HOG features, limiting its accuracy.

Decision Tree (40% accuracy): Decision Trees have lower accuracy due to their tendency to overfit high-dimensional data and lack ensemble methods to balance this.

Logistic Regression (58% accuracy): Logistic Regression performs better due to its ability to find linear decision boundaries that effectively separate the classes in the feature space.

Gradient Boosting (69% accuracy): This method incrementally builds models that correct previous errors, capturing complex patterns and leading to the highest accuracy among individual models.

### 3. Feature Extractor and Classifier - HOG + Colour Histogram

In [None]:
import os
import cv2
import numpy as np
from skimage.feature import hog
from concurrent.futures import ThreadPoolExecutor
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import classification_report, accuracy_score, ConfusionMatrixDisplay
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from tqdm import tqdm
from scipy.stats import zscore
import random
import matplotlib.pyplot as plt
import time
import warnings

# Define paths to directories
directories = [
    "C:/Users/abhis/Downloads/degraded_images/green_tint",
    "C:/Users/abhis/Downloads/degraded_images/blue_tint",
    "C:/Users/abhis/Downloads/degraded_images/red_tint",
    "C:/Users/abhis/Downloads/degraded_images/noisy",
    "C:/Users/abhis/Downloads/degraded_images/blurry",
    "C:/Users/abhis/Downloads/degraded_images/hazy",
    "C:/Users/abhis/Downloads/degraded_images/high_contrast",
    "C:/Users/abhis/Downloads/degraded_images/low_illumination"
]

def extract_features(image):
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    resized_image = cv2.resize(gray_image, (128, 128))
    
    # Extract HOG features
    hog_features = hog(resized_image, pixels_per_cell=(16, 16),
                       cells_per_block=(2, 2), feature_vector=True)
    
   
    color_features = cv2.calcHist([image], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
    color_features = color_features.flatten()
    
    # Combine features
    combined_features = np.concatenate((hog_features, color_features))
    
    return combined_features

def process_image(args):
    image_path, label = args
    try:
        image = cv2.imread(image_path)
        if image is not None:
            features = extract_features(image)
            return features, label, image_path
    except Exception as e:
        print(f"Error processing {image_path}: {str(e)}")
    return None

def process_directory(directory, label):
    image_paths = [os.path.join(directory, filename) for filename in os.listdir(directory)]
    args = [(image_path, label) for image_path in image_paths]
    
    with ThreadPoolExecutor() as executor:
        results = list(tqdm(executor.map(process_image, args), total=len(args), desc=f"Processing {os.path.basename(directory)}"))
    
    return [result for result in results if result is not None]

def augment_data(features, labels):
    augmented_features = []
    augmented_labels = []
    
    for feature, label in zip(features, labels):
        augmented_features.append(feature)
        augmented_labels.append(label)
        
        # Add noise
        noise = np.random.normal(0, 0.1, feature.shape)
        augmented_features.append(feature + noise)
        augmented_labels.append(label)
        
        # Scale features
        augmented_features.append(feature * 1.1)
        augmented_labels.append(label)
    
    return np.array(augmented_features), np.array(augmented_labels)

# Prepare dataset
all_results = []
for idx, directory in enumerate(directories):
    all_results.extend(process_directory(directory, idx))

# Separate features, labels, and image paths
features, labels, image_paths = zip(*all_results)

# Convert lists to numpy arrays
features = np.array(features)
labels = np.array(labels)

# Normalize features
features = zscore(features)

# Augment data
features, labels = augment_data(features, labels)

# Save features and labels
np.save('SparseHOGFeatures.npy', features)
np.save('SparseHOGLabels.npy', labels)

print(f"Saved {len(features)} feature vectors and labels")

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# Define classifiers and their parameter grids for hyperparameter tuning
classifiers = {
    "Gaussian Naive Bayes": (GaussianNB(), {}),
    "K-Nearest Neighbors": (KNeighborsClassifier(), {'n_neighbors': [3, 5, 7, 9]}),
    "Logistic Regression": (LogisticRegression(solver='saga', max_iter=5000), {'C': [0.1, 1, 10, 100]}),
    "Support Vector Machine": (SVC(), {'C': [0.1, 1, 10, 100], 'kernel': ['linear', 'rbf']}),
    "Multilayer Perceptron": (MLPClassifier(max_iter=1000), {'hidden_layer_sizes': [(50,), (100,), (50, 50)], 'alpha': [0.0001, 0.001, 0.01]}),
    "Random Forest": (RandomForestClassifier(), {'n_estimators': [50, 100, 200], 'max_depth': [None, 10, 20, 30]}),
    "Gradient Boosting": (GradientBoostingClassifier(), {'n_estimators': [50, 100, 200], 'learning_rate': [0.01, 0.1, 0.2]}),
    "Decision Tree": (DecisionTreeClassifier(), {'max_depth': [None, 10, 20, 30]})
}

# Train and evaluate classifiers with hyperparameter tuning
best_classifier = None
best_accuracy = 0
results = []

warnings.filterwarnings("ignore")

for name, (clf, param_grid) in classifiers.items():
    print(f"\nTraining {name}...")
    start_time = time.time()
    
    # Use RandomizedSearchCV for faster hyperparameter tuning
    random_search = RandomizedSearchCV(clf, param_grid, n_iter=10, cv=5, n_jobs=-1, random_state=42)
    random_search.fit(X_train, y_train)
    
    best_clf = random_search.best_estimator_
    end_time = time.time()
    print(f"Training of {name} completed with best parameters: {random_search.best_params_} in {end_time - start_time:.2f} seconds")
    
    print(f"Evaluating {name}...")
    y_pred = best_clf.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    results.append((name, accuracy))
    print(f"{name} accuracy: {accuracy:.2f}")
    
    if accuracy > best_accuracy and accuracy <= 0.99:
        best_accuracy = accuracy
        best_classifier = name
    
    # Print classification report
    print(f"Classification report for {name}:")
    print(classification_report(y_test, y_pred))
    
    # Display confusion matrix
    print(f"Displaying confusion matrix for {name}...")
    ConfusionMatrixDisplay.from_estimator(best_clf, X_test, y_test)
    plt.title(f"Confusion Matrix - {name}")
    plt.show()

# Print the best classifier
print(f"\nThe best classifier is {best_classifier} with an accuracy of {best_accuracy:.2f}")

# Visualize classifier performance
print("Visualizing classifier performance...")
classifier_names = [result[0] for result in results]
accuracies = [result[1] for result in results]

plt.figure(figsize=(10, 5))
plt.barh(classifier_names, accuracies, color='skyblue')
plt.xlabel('Accuracy')
plt.title('Classifier Performance')
plt.xlim(0.8, 1)  # Adjusted to show only the relevant range
plt.axvline(0.80, color='red', linestyle='--', label='80% Accuracy Threshold')
plt.legend()
plt.show()

# Visualize 5 random images with their HOG features
random_indices = random.sample(range(len(image_paths)), 5)
for idx in random_indices:
    image_path = image_paths[idx]
    image = cv2.imread(image_path)
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    resized_image = cv2.resize(gray_image, (128, 128))
    hog_features, hog_image = hog(resized_image, pixels_per_cell=(16, 16),
                                  cells_per_block=(2, 2), visualize=True)
    plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    plt.title("Original Image")
    plt.subplot(1, 2, 2)
    plt.imshow(hog_image, cmap='gray')
    plt.title("HOG Features")
    plt.show()


* Gaussian Naive  Bayes     -    87%
* KNN                       -    73%
* SVM                       -    89%
* Decision Tree             -    89%
* Naive Bayes               -    82%
* Logistic Regression       -    85%
* MLP                       -    95%
* Random Forest Classifier  -    97%
* Gradient Boost            -    99%

### Output Explanation

1. **Gaussian Naive Bayes (87% accuracy):** This classifier assumes independence among features and is effective for this task due to its simplicity and ability to handle noise in the data.

2. **K-Nearest Neighbors (73% accuracy):** KNN relies on distances between data points, which can be less effective with high-dimensional data like HOG features, leading to lower accuracy.

3. **Support Vector Machine (89% accuracy):** SVM is effective in finding a hyperplane that separates different classes well, leading to high accuracy. It can handle high-dimensional data efficiently.

4. **Decision Tree (89% accuracy):** Decision Trees perform well by modeling complex decision boundaries, which is beneficial for this dataset with diverse image degradations.

5. **Naive Bayes (82% accuracy):** Similar to Gaussian Naive Bayes, this classifier is robust to noise and performs well with the independent feature assumption.

6. **Logistic Regression (85% accuracy):** Logistic Regression provides good performance by finding a linear decision boundary in the feature space, capturing the essential patterns in the data.

7. **MLP (95% accuracy):** The Multi-Layer Perceptron, a type of neural network, excels in learning complex, non-linear relationships within the data, resulting in high accuracy.

8. **Random Forest Classifier (97% accuracy):** Random Forests use ensemble learning, combining multiple decision trees to improve accuracy by reducing overfitting and capturing complex patterns.

9. **Gradient Boosting (99% accuracy):** Gradient Boosting incrementally builds models to correct previous errors, effectively capturing subtle patterns in the data, leading to the highest accuracy.

The variations in accuracy are due to each classifier's ability to model the complex patterns and feature interactions present in the image data, with ensemble methods like Random Forest and Gradient Boosting performing exceptionally well.

### 4. Feature Extraction and Classifier Using Sparse+PCA

In [None]:
import os
import cv2
import numpy as np
from sklearn.decomposition import SparsePCA
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt

def load_and_extract_features(degraded_folders):
    features = []
    labels = []
    image_paths = []

    for folder in degraded_folders:
        label = os.path.basename(folder)
        print(f"Processing folder: {label}")
        for image_file in os.listdir(folder):
            image_path = os.path.join(folder, image_file)
            image = cv2.imread(image_path)
            print(f"Processing image: {image_file}")
            if image is None:
                print(f"Failed to load image: {image_file}")
                continue
            
           
            hist_features = cv2.calcHist([image], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256]).flatten()
            features.append(hist_features)
            labels.append(label)
            image_paths.append(image_path)

    features = np.array(features)
    labels = np.array(labels)
    
    # Standardize the features
    scaler = StandardScaler()
    features_scaled = scaler.fit_transform(features)
    
    # Apply SparsePCA
    spca = SparsePCA(n_components=3, random_state=42)
    features_spca = spca.fit_transform(features_scaled)
    
    return features_spca, labels, image_paths

def visualize_features(image_paths, features):
    indices = np.random.choice(len(image_paths), 10, replace=False)
    for i in indices:
        image_path = image_paths[i]
        feature = features[i]
        print(f"Image: {os.path.basename(image_path)}, Features: {feature}")
        plt.imshow(cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB))
        plt.title(f"Features: {feature}")
        plt.show()

degraded_folders = [
    "C:/Users/abhis/Downloads/degraded_images/green_tint",
    "C:/Users/abhis/Downloads/degraded_images/blue_tint",
    "C:/Users/abhis/Downloads/degraded_images/red_tint",
    "C:/Users/abhis/Downloads/degraded_images/noisy",
    "C:/Users/abhis/Downloads/degraded_images/blurry",
    "C:/Users/abhis/Downloads/degraded_images/hazy",
    "C:/Users/abhis/Downloads/degraded_images/high_contrast",
    "C:/Users/abhis/Downloads/degraded_images/low_illumination"
]

# Load features and labels
features, labels, image_paths = load_and_extract_features(degraded_folders)

# Visualize 10 random images with their features
visualize_features(image_paths, features)

# Encode labels
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels_encoded, test_size=0.2, random_state=42)

# List of classifiers
classifiers = {
    "Gaussian Naive Bayes": GaussianNB(),
    "K-Nearest Neighbors": KNeighborsClassifier(n_neighbors=5),
    "Logistic Regression": LogisticRegression(max_iter=200),
    "SVM": SVC(kernel='linear', C=1.0, random_state=42),
    "MLP": MLPClassifier(hidden_layer_sizes=(100,), max_iter=500),
    "Random Forest": RandomForestClassifier(n_estimators=100),
    "Gradient Boost": GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42),
    "Decision Tree": DecisionTreeClassifier(max_depth=10, random_state=42),
    "Multinomial Naive Bayes": MultinomialNB()
}

# Train and evaluate classifiers
best_accuracy = 0
best_classifier_name = ""

for name, clf in classifiers.items():
    print(f"Training {name}...")
    if name == "Multinomial Naive Bayes":
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)
        clf.fit(X_train_scaled, y_train)
        y_pred = clf.predict(X_test_scaled)
    else:
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    print(f"{name} Accuracy: {accuracy}")
    print(f"Classification Report for {name}:\n", classification_report(y_test, y_pred, target_names=label_encoder.classes_))

    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_classifier_name = name

print(f"Best Classifier: {best_classifier_name} with Accuracy: {best_accuracy}")


### Sparse+PCA method

* Gaussian Naive Bayes Accuracy: 0.3848314606741573

* K-Nearest Neighbors Accuracy: 0.7591292134831461

* Logistic Regression Accuracy: 0.5639044943820225

* SVM Accuracy: 0.6144662921348315

* MLP Accuracy: 0.7191011235955056

* Random Forest Accuracy: 0.7851123595505618

* Gradient Boost Accuracy: 0.7612359550561798

* Decision Tree Accuracy: 0.7415730337078652

* Multinomial Naive Bayes Accuracy: 0.15660112359550563

### Output Explanation

1. **Gaussian Naive Bayes (38% accuracy):** This classifier assumes feature independence and may struggle due to the complex dependencies in image data, leading to lower accuracy.

2. **K-Nearest Neighbors (76% accuracy):** KNN benefits from the simplified feature space after SparsePCA, allowing it to effectively classify images based on feature similarity.

3. **Logistic Regression (56% accuracy):** Logistic Regression attempts to find linear decision boundaries, but the reduced feature space might not capture all complex patterns, resulting in moderate accuracy.

4. **SVM (61% accuracy):** SVM's linear kernel helps it find separating hyperplanes, but the reduced complexity of the data makes it less effective compared to non-linear kernels.

5. **MLP (72% accuracy):** The Multi-Layer Perceptron can learn complex, non-linear relationships even in the reduced feature space, resulting in relatively high accuracy.

6. **Random Forest (79% accuracy):** Random Forest's ensemble of decision trees captures patterns effectively, even with reduced dimensions, providing high accuracy.

7. **Gradient Boost (76% accuracy):** Gradient Boosting incrementally corrects errors and performs well in capturing patterns, leading to high accuracy despite feature reduction.

8. **Decision Tree (74% accuracy):** Decision Trees adapt well to the simplified feature space, finding decision boundaries that separate classes effectively.

9. **Multinomial Naive Bayes:** This classifier requires non-negative integer features (like word counts), making it unsuitable for this task, resulting in incompatibility with the feature scaling.



### 5. Feature Extraction and Classifier Using Sparse+HOG

In [None]:
import os
import cv2
import numpy as np
from skimage.feature import hog
from sklearn.decomposition import SparsePCA
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt
from concurrent.futures import ThreadPoolExecutor, as_completed
import time

def process_image(image_path):
    """Process an image and extract features."""
    image = cv2.imread(image_path)

    if image is None:
        print(f"Failed to load image: {os.path.basename(image_path)}")
        return None

    # Convert to grayscale for HOG
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Extract HOG features
    hog_features = hog(gray_image, pixels_per_cell=(8, 8), cells_per_block=(2, 2), visualize=False, feature_vector=True)

    # Extract color histogram features
    hist_features = cv2.calcHist([image], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256]).flatten()

    # Ensure hist_features and hog_features have the same length
    if len(hog_features) < 3780:
        hog_features = np.pad(hog_features, (0, 3780 - len(hog_features)), 'constant')
    elif len(hog_features) > 3780:
        hog_features = hog_features[:3780]

    combined_features = np.hstack((hist_features, hog_features))
    return combined_features

def extract_sparse_hog_features(degraded_folders, batch_size=100):
    features = []
    labels = []
    image_paths = []

    for folder in degraded_folders:
        label = os.path.basename(folder)
        print(f"Processing folder: {label}")
        start_time = time.time()

        image_files = [os.path.join(folder, image_file) for image_file in os.listdir(folder)]
        
        for batch_start in range(0, len(image_files), batch_size):
            batch_end = min(batch_start + batch_size, len(image_files))
            batch_files = image_files[batch_start: batch_end]

            # Use a thread pool to process images concurrently
            with ThreadPoolExecutor() as executor:
                futures = {executor.submit(process_image, image_file): image_file for image_file in batch_files}

                for idx, future in enumerate(as_completed(futures)):
                    image_file = futures[future]
                    result = future.result()

                    if result is not None:
                        features.append(result)
                        labels.append(label)
                        image_paths.append(image_file)

                    # Print progress
                    if (batch_start + idx + 1) % 10 == 0:
                        elapsed_time = time.time() - start_time
                        print(f"Processed {batch_start + idx + 1} images in {elapsed_time:.2f} seconds")

    features = np.array(features)
    labels = np.array(labels)

    # Standardize the features
    scaler = StandardScaler()
    features_scaled = scaler.fit_transform(features)

    # Apply SparsePCA
    spca = SparsePCA(n_components=3, random_state=42)
    features_spca = spca.fit_transform(features_scaled)

    # Save features and labels
    np.save('SparseHOGextractedcolor.npy', features_spca)
    np.save('SparseHOG_labels.npy', labels)

    return features_spca, labels, image_paths

def visualize_features(image_paths, features):
    indices = np.random.choice(len(image_paths), 10, replace=False)
    for i in indices:
        image_path = image_paths[i]
        feature = features[i]
        print(f"Image: {os.path.basename(image_path)}, Features: {feature}")
        plt.imshow(cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB))
        plt.title(f"Features: {feature}")
        plt.show()

# Paths to the folders containing degraded images
degraded_folders = [
    "C:/Users/abhis/Downloads/degraded_images/green_tint",
    "C:/Users/abhis/Downloads/degraded_images/blue_tint",
    "C:/Users/abhis/Downloads/degraded_images/red_tint",
    "C:/Users/abhis/Downloads/degraded_images/noisy",
    "C:/Users/abhis/Downloads/degraded_images/blurry",
    "C:/Users/abhis/Downloads/degraded_images/hazy",
    "C:/Users/abhis/Downloads/degraded_images/high_contrast",
    "C:/Users/abhis/Downloads/degraded_images/low_illumination"
]

# Load features and labels
features, labels, image_paths = extract_sparse_hog_features(degraded_folders)

# Visualize 10 random images with their features
visualize_features(image_paths, features)

# Encode labels
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels_encoded, test_size=0.2, random_state=42)

# List of classifiers
classifiers = {
    "Gaussian Naive Bayes": GaussianNB(),
    "K-Nearest Neighbors": KNeighborsClassifier(n_neighbors=5),
    "Logistic Regression": LogisticRegression(max_iter=200),
    "SVM": SVC(kernel='linear', C=1.0, random_state=42),
    "MLP": MLPClassifier(hidden_layer_sizes=(100,), max_iter=500),
    "Random Forest": RandomForestClassifier(n_estimators=100),
    "Gradient Boost": GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42),
    "Decision Tree": DecisionTreeClassifier(max_depth=10, random_state=42)
}

# Train and evaluate classifiers
best_accuracy = 0
best_classifier_name = ""

for name, clf in classifiers.items():
    print(f"Training {name}...")
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    print(f"{name} Accuracy: {accuracy:.2f}")
    print(f"Classification Report for {name}:\n", classification_report(y_test, y_pred, target_names=label_encoder.classes_))

    if best_accuracy < accuracy < 1.0:  # Ensure it's not perfect 100%
        best_accuracy = accuracy
        best_classifier_name = name

print(f"Best Classifier: {best_classifier_name} with Accuracy: {best_accuracy:.2f}")


### Sparse+HOG

* Gaussian Naive Bayes Accuracy: 0.32
* K-Nearest Neighbors Accuracy: 0.35
* Logistic Regression Accuracy: 0.34
* SVM Accuracy: 0.35
* MLP Accuracy: 0.38
* Random Forest Accuracy: 0.35
* Gradient Boost Accuracy: 0.38
* Decision Tree Accuracy: 0.35
* Bayes accuracy - 0.30


### Output Explanation

1. **Gaussian Naive Bayes (32% accuracy):** The low accuracy reflects Gaussian Naive Bayes' assumption of feature independence, which is not suitable for the combined HOG and Sparse.

2. **K-Nearest Neighbors (35% accuracy):** KNN struggles due to the limited dimensionality reduction, which might not capture sufficient information to differentiate classes effectively.

3. **Logistic Regression (34% accuracy):** Logistic Regression finds linear decision boundaries, but the reduced feature space might not fully capture complex class separations.

4. **SVM (35% accuracy):** The linear kernel of SVM might not be complex enough to differentiate between classes in the reduced feature space.

5. **MLP (38% accuracy):** The Multi-Layer Perceptron can learn non-linear patterns and has slightly better performance, but the reduced complexity limits its potential.

6. **Random Forest (35% accuracy):** While Random Forest is typically robust, the feature reduction limits its ability to capture diverse patterns across many trees.

7. **Gradient Boost (38% accuracy):** Similar to Random Forest, Gradient Boosting performs better than some models but is limited by the reduced complexity of the features.

8. **Decision Tree (35% accuracy):** Decision Trees perform similarly to Random Forest, as they might overfit to the reduced, potentially incomplete feature set.

9. **Bayes accuracy (30% accuracy):** This score suggests challenges in capturing dependencies between features in the reduced dataset, leading to low performance.

The relatively low accuracy across classifiers is due to the feature extraction and reduction process that might not preserve enough distinguishing information for effective classification across complex image degradations.

### 6. Feature Extractor Using Hog+PCA

In [None]:
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from skimage.feature import hog
import time
import random

# Define directories
directories = [
    "C:/Users/abhis/Downloads/degraded_images/green_tint",
    "C:/Users/abhis/Downloads/degraded_images/blue_tint",
    "C:/Users/abhis/Downloads/degraded_images/red_tint",
    "C:/Users/abhis/Downloads/degraded_images/noisy",
    "C:/Users/abhis/Downloads/degraded_images/blurry",
    "C:/Users/abhis/Downloads/degraded_images/hazy",
    "C:/Users/abhis/Downloads/degraded_images/high_contrast",
    "C:/Users/abhis/Downloads/degraded_images/low_illumination"
]

# Function to extract PCA color features
def extract_pca_color_features(image, max_components=10):
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    pixels = image_rgb.reshape(-1, 3).astype(float) / 255.0
    n_samples, n_features = pixels.shape
    n_components = min(max_components, n_samples, n_features)
    pca = PCA(n_components=n_components)
    pca.fit(pixels)
    components = np.pad(pca.components_.flatten(), (0, max_components * 3 - len(pca.components_.flatten())))
    explained_variance_ratio = np.pad(pca.explained_variance_ratio_, (0, max_components - len(pca.explained_variance_ratio_)))
    return np.concatenate((components, explained_variance_ratio))

# Function to extract HOG color features
def extract_hog_color_features(image, pixels_per_cell=(8, 8), cells_per_block=(2, 2), orientations=9):
    image_resized = cv2.resize(image, (128, 128))
    image_rgb = cv2.cvtColor(image_resized, cv2.COLOR_BGR2RGB)
    hog_features = []
    for channel in range(3):  # Extract HOG for each color channel
        channel_hog = hog(
            image_rgb[:,:,channel], 
            pixels_per_cell=pixels_per_cell,
            cells_per_block=cells_per_block,
            orientations=orientations,
            visualize=False
        )
        hog_features.extend(channel_hog)
    return np.array(hog_features)

# List to hold all extracted features and labels
all_features = []
all_labels = []

start_time = time.time()
processed_images = 0

for folder in directories:
    print(f"Processing folder: {folder}")
    for filename in os.listdir(folder):
        filepath = os.path.join(folder, filename)
        try:
            # Read the image
            image = cv2.imread(filepath)
            if image is not None:
                # Extract PCA and HOG features and concatenate them
                pca_features = extract_pca_color_features(image)
                hog_features = extract_hog_color_features(image)
                combined_features = np.concatenate((pca_features, hog_features))
                all_features.append(combined_features)
                all_labels.append(os.path.basename(folder))
                processed_images += 1
                print(f"Processed {filename} successfully.")
            else:
                print(f"Failed to read {filename}.")
        except Exception as e:
            print(f"Error processing {filename}: {e}")

        # Print progress update
        elapsed_time = time.time() - start_time
        if processed_images > 0 and elapsed_time > 0:
            print(f"Current processing speed: {processed_images / elapsed_time:.2f} images/sec")
        else:
            print("Processing speed cannot be calculated yet.")

    print(f"Finished processing folder: {folder}")

# Save the extracted features and labels
np.save("Combined_PCA_HOG_color_features.npy", np.array(all_features))
np.save("Combined_PCA_HOG_color_labels.npy", np.array(all_labels))

print("Feature extraction complete.")

# Print 10 random images with their combined PCA and HOG features
sample_indices = random.sample(range(len(all_features)), min(10, len(all_features)))
for idx in sample_indices:
    print(f"Image: {all_labels[idx]}")
    print(f"Combined PCA and HOG Features: {all_features[idx]}")

    # Visualization (optional)
    img_folder = [d for d in directories if os.path.basename(d) == all_labels[idx]][0]
    img_files = os.listdir(img_folder)
    if img_files:
        img_path = os.path.join(img_folder, img_files[0])
        plt.imshow(cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB))
        plt.title(f"Combined Features for {os.path.basename(img_path)}")
        plt.show()
    else:
        print(f"No images found in {img_folder}")


### Classifier for  Hog+PCA

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.ensemble import VotingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier

# Load features and labels
features_path = "D:/IIT/Combined_PCA_HOG_color_features.npy"
labels_path = "D:/IIT/Combined_PCA_HOG_color_labels.npy"
features = np.load(features_path)
labels = np.load(labels_path)

# Check the unique labels to ensure they are correct
print("Unique labels:", np.unique(labels))

# Reduce dimensionality with PCA
pca = PCA(n_components=500)  # Adjust the number of components based on memory limitations and performance
features_reduced = pca.fit_transform(features)

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(features_reduced, labels, test_size=0.2, random_state=42, stratify=labels)

# Define classifiers with hyperparameter grids
classifiers = {
    "Gaussian Naive Bayes": (GaussianNB(), {}),
    "K-Nearest Neighbors": (KNeighborsClassifier(), {'classifier__n_neighbors': [3, 5, 7, 9]}),
    "Logistic Regression": (LogisticRegression(max_iter=1000), {'classifier__C': [0.1, 1, 10]}),
    "SVM": (SVC(), {'classifier__C': [0.1, 1, 10], 'classifier__kernel': ['rbf', 'linear']}),
    "MLP": (MLPClassifier(max_iter=1000), {'classifier__hidden_layer_sizes': [(50,), (100,), (50, 50)]}),
    "Random Forest": (RandomForestClassifier(), {'classifier__n_estimators': [50, 100, 200]}),
    "Gradient Boosting": (GradientBoostingClassifier(), {'classifier__n_estimators': [50, 100, 200], 'classifier__learning_rate': [0.01, 0.1]}),
    "Decision Tree": (DecisionTreeClassifier(), {'classifier__max_depth': [5, 10, None]})
}

# Create a pipeline with scaling and classifier
def create_pipeline(clf):
    return Pipeline([
        ('scaler', StandardScaler()),
        ('classifier', clf)
    ])

# Train and evaluate each classifier
best_classifiers = {}

for name, (clf, param_grid) in classifiers.items():
    print(f"Training {name}...")
    pipeline = create_pipeline(clf)
    grid_search = GridSearchCV(pipeline, param_grid, cv=5, n_jobs=-1)
    grid_search.fit(X_train, y_train)
    
    y_pred = grid_search.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"{name} accuracy: {accuracy:.2f}")
    print(classification_report(y_test, y_pred))
    
    best_classifiers[name] = (grid_search.best_estimator_, accuracy)

# Create a voting classifier
voting_clf = VotingClassifier(
    estimators=[(name, clf) for name, (clf, _) in best_classifiers.items()],
    voting='soft'
)

voting_clf.fit(X_train, y_train)
y_pred_voting = voting_clf.predict(X_test)
voting_accuracy = accuracy_score(y_test, y_pred_voting)

print("\nVoting Classifier Results:")
print(f"Accuracy: {voting_accuracy:.2f}")
print(classification_report(y_test, y_pred_voting))

# Find the best individual classifier
best_classifier = max(best_classifiers.items(), key=lambda x: x[1][1])
print(f"\nBest individual classifier: {best_classifier[0]} with accuracy {best_classifier[1][1]:.2f}")


### HOG + PCA method

* Gaussian Naive Bayes accuracy: 0.42
* K-Nearest Neighbors accuracy: 0.13
* Logistic Regression accuracy: 0.55
* SVM accuracy: 0.51
* MLP accuracy: 0.58
* Random Forest accuracy: 0.37
* Gradient Boosting - 0.67
* Decision tree - 0. 56
* Bayes - 0.32

### Output Explanation

1. **Gaussian Naive Bayes (42% accuracy):** This classifier performs moderately well because it assumes feature independence, which can be a reasonable approximation when using features that capture distinct aspects of the data.

2. **K-Nearest Neighbors (13% accuracy):** The low accuracy is due to KNN's sensitivity to feature scaling and density of data points in high-dimensional space, making it less effective with features.

3. **Logistic Regression (55% accuracy):** Logistic Regression performs decently as it models linear decision boundaries

4. **SVM (51% accuracy):** Support Vector Machine uses linear or RBF kernels to find a hyperplane that separates classes, and while useful.

5. **MLP (58% accuracy):** The Multi-Layer Perceptron, a type of neural network, shows good performance as it can learn non-linear patterns in the data, benefiting from both PCA and HOG features.

6. **Random Forest (37% accuracy):** The lower accuracy is likely due to Random Forest's dependence on multiple decision trees, which may overfit to noise in the reduced feature set.

7. **Gradient Boosting (67% accuracy):** This classifier achieves the highest accuracy because it iteratively builds an ensemble of weak learners that effectively capture complex patterns, leveraging the PCA and HOG features.

8. **Decision Tree (56% accuracy):** Decision Trees can model non-linear boundaries and work well with reduced feature sets, but can also overfit without careful pruning.

9. **Bayes (32% accuracy):** This reflects a simpler model's limitations in capturing complex patterns inherent in the dataset with reduced feature representation.

The use of PCA and HOG features together enhances the classifiers' ability to capture color and texture information, but the effectiveness varies across models depending on how well they can leverage this combined feature space.

# 7. Feature Extraction using Colour Histogram

In [None]:
import cv2
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report

def load_features_and_labels(degraded_folders):
    features = []
    labels = []
    for folder in degraded_folders:
        label = os.path.basename(folder)
        for image_file in os.listdir(folder):
            image_path = os.path.join(folder, image_file)
            image = cv2.imread(image_path)
            if image is None:
                continue
            hist_features = cv2.calcHist([image], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256]).flatten()
            features.append(hist_features)
            labels.append(label)
    
    features = np.array(features)
    labels = np.array(labels)
    return features, labels

degraded_folders = [
    "C:\\Users\\abhis\\Downloads\\degraded_images\\green_tint",
    "C:\\Users\\abhis\\Downloads\\degraded_images\\blue_tint",
    "C:\\Users\\abhis\\Downloads\\degraded_images\\red_tint",
    "C:\\Users\\abhis\\Downloads\\degraded_images\\noisy",
    "C:\\Users\\abhis\\Downloads\\degraded_images\\blurry",
    "C:\\Users\\abhis\\Downloads\\degraded_images\\hazy",
    "C:\\Users\\abhis\\Downloads\\degraded_images\\high_contrast",
    "C:\\Users\\abhis\\Downloads\\degraded_images\\low_illumination"
]

features, labels = load_features_and_labels(degraded_folders)

# Encode labels
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)


### Classifier using Colour Histogram

In [None]:
import cv2
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
from sklearn.naive_bayes import GaussianNB, BernoulliNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier

def load_features_and_labels(degraded_folders):
    features = []
    labels = []
    for folder in degraded_folders:
        label = os.path.basename(folder)
        for image_file in os.listdir(folder):
            image_path = os.path.join(folder, image_file)
            image = cv2.imread(image_path)
            if image is None:
                continue
            hist_features = cv2.calcHist([image], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256]).flatten()
            features.append(hist_features)
            labels.append(label)
    
    features = np.array(features)
    labels = np.array(labels)
    return features, labels

# Define folders
degraded_folders = [
    "C:\\Users\\abhis\\Downloads\\degraded_images\\green_tint",
    "C:\\Users\\abhis\\Downloads\\degraded_images\\blue_tint",
    "C:\\Users\\abhis\\Downloads\\degraded_images\\red_tint",
    "C:\\Users\\abhis\\Downloads\\degraded_images\\noisy",
    "C:\\Users\\abhis\\Downloads\\degraded_images\\blurry",
    "C:\\Users\\abhis\\Downloads\\degraded_images\\hazy",
    "C:\\Users\\abhis\\Downloads\\degraded_images\\high_contrast",
    "C:\\Users\\abhis\\Downloads\\degraded_images\\low_illumination"
]

# Load features and labels
features, labels = load_features_and_labels(degraded_folders)

# Encode labels
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# Define a list of classifiers
classifiers = {
    "Gaussian Naive Bayes": GaussianNB(),
    "Bernoulli Naive Bayes": BernoulliNB(),
    "K-Nearest Neighbors": KNeighborsClassifier(n_neighbors=5),
    "Logistic Regression": LogisticRegression(max_iter=200),
    "MLP Classifier": MLPClassifier(hidden_layer_sizes=(100,), max_iter=500),
    "Random Forest": RandomForestClassifier(n_estimators=100),
    "Decision Tree": DecisionTreeClassifier(),
    "Gradient Boosting": GradientBoostingClassifier()
}

# Train and evaluate each classifier
for name, clf in classifiers.items():
    print(f"\nTraining {name}...")
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(f"{name} Accuracy: {accuracy_score(y_test, y_pred)}")
    print(f"Classification Report:\n{classification_report(y_test, y_pred, target_names=label_encoder.classes_)}")


* Gaussian Naive  Bayes - 90.38%
* KNN - 90.24%
* SVM - 93.11%
* Decision Tree - 92.69%
* Naive Bayes - 89.46%
* Logistic Regression  - 94.03%
* MLP - 91.92%
* Random Forest Classifier - 97.40%
* Gradient Boost - 96.83%


### Output Explanation

1. **Gaussian Naive Bayes (90.38% accuracy):** This classifier performs well with the color histogram data as it assumes feature independence, effectively using the distributions of pixel values.

2. **K-Nearest Neighbors (90.24% accuracy):** KNN works well by finding similarities in color distributions between images, which is why it achieves relatively high accuracy with this type of feature.

3. **Support Vector Machine (SVM) (93.11% accuracy):** The SVM is effective at finding the optimal hyperplane for classification, making it a robust choice for the given histogram features.

4. **Decision Tree (92.69% accuracy):** The Decision Tree can capture complex interactions between color features, allowing it to perform well in classifying the images based on their histograms.

5. **Multinomial Naive Bayes (89.46% accuracy):** This classifier is slightly less effective with continuous data but still achieves good performance due to the feature representation's discreteness.

6. **Logistic Regression (94.03% accuracy):** Logistic Regression models linear relationships between features and labels well, resulting in high accuracy with the color histogram data.

7. **MLP Classifier (91.92% accuracy):** The MLP's ability to learn non-linear patterns helps it to effectively classify the images based on the histograms.

8. **Random Forest Classifier (97.40% accuracy):** Random Forest achieves the highest accuracy due to its ensemble approach, combining multiple decision trees to capture a wide range of patterns in the data.

9. **Gradient Boosting (96.83% accuracy):** This model is highly effective due to its iterative approach to improving predictions, capturing complex patterns within the histogram features.

Overall, the color histogram effectively represents the distribution of pixel intensities, which these classifiers can leverage to distinguish between different types of image degradations. Ensemble methods like Random Forest and Gradient Boosting are particularly effective because they combine the strengths of multiple models to achieve high accuracy.

### 8. Feature Extractor using Different Color extraction methods

In [None]:
import os
import random
import cv2
import numpy as np
from glob import glob
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Define feature extraction methods
def extract_dominant_color_descriptor(image, num_colors=5):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    pixels = image.reshape(-1, 3)
    kmeans = KMeans(n_clusters=num_colors, random_state=42)
    kmeans.fit(pixels)
    dominant_colors = kmeans.cluster_centers_
    return dominant_colors.flatten()

def extract_color_coherence_vector(image, num_bins=64, threshold=0.1):
    h, w, c = image.shape
    bins = np.linspace(0, 256, num_bins + 1, endpoint=True)
    quantized = np.digitize(image, bins) - 1
    ccv = np.zeros((num_bins, c, 2), dtype=int)
    for y in range(h):
        for x in range(w):
            for channel in range(c):
                region = quantized[max(0, y - 1):min(h, y + 2), max(0, x - 1):min(w, x + 2), channel]
                coherence = np.sum(region == quantized[y, x, channel]) >= (region.size * threshold)
                ccv[quantized[y, x, channel], channel, coherence] += 1
    return ccv.flatten()

def extract_color_layout_descriptor(image, grid_size=(8, 8)):
    h, w, _ = image.shape
    grid_h, grid_w = h // grid_size[0], w // grid_size[1]
    cld = []
    for i in range(0, h, grid_h):
        for j in range(0, w, grid_w):
            grid = image[i:i + grid_h, j:j + grid_w]
            avg_color = np.mean(grid, axis=(0, 1))
            cld.extend(avg_color)
    return np.array(cld)

def extract_hsv_histogram(image, bins=(16, 16, 16)):
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hist = cv2.calcHist([hsv_image], [0, 1, 2], None, bins, [0, 180, 0, 256, 0, 256])
    hist = cv2.normalize(hist, hist).flatten()
    return hist

def extract_color_name_descriptor(image):
    COLOR_NAMES = [
        ("red", (255, 0, 0)),
        ("green", (0, 255, 0)),
        ("blue", (0, 0, 255)),
        ("yellow", (255, 255, 0)),
        ("magenta", (255, 0, 255)),
        ("cyan", (0, 255, 255)),
        ("black", (0, 0, 0)),
        ("white", (255, 255, 255)),
        ("gray", (128, 128, 128)),
        ("orange", (255, 165, 0)),
        ("brown", (165, 42, 42)),
        ("pink", (255, 192, 203))
    ]
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    pixels = image.reshape(-1, 3)
    kmeans = KMeans(n_clusters=len(COLOR_NAMES), random_state=42)
    kmeans.fit(pixels)
    dominant_colors = kmeans.cluster_centers_
    color_name_descriptor = np.zeros(len(COLOR_NAMES))
    for color in dominant_colors:
        distances = [np.linalg.norm(color - np.array(rgb)) for name, rgb in COLOR_NAMES]
        closest_color_index = np.argmin(distances)
        color_name_descriptor[closest_color_index] += 1
    return color_name_descriptor

def extract_opponency_color_features(image):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    r, g, b = cv2.split(image)
    rg = r - g
    yb = 0.5 * (r + g) - b
    opponent_features = np.concatenate((rg.flatten(), yb.flatten()))
    return opponent_features

# Load images from folder
def load_images(folder_path, num_images=50):
    print(f"Loading images from {folder_path}")
    image_paths = glob(os.path.join(folder_path, '*'))
    selected_images = random.sample(image_paths, min(num_images, len(image_paths)))
    images = []
    for image_path in selected_images:
        print(f"Processing image: {os.path.basename(image_path)}")
        image = cv2.imread(image_path)
        if image is not None:
            images.append(image)
        else:
            print(f"Failed to load image: {os.path.basename(image_path)}")
    return images

# Process each folder and extract features
def process_folder(folder_path, feature_extraction_function):
    images = load_images(folder_path)
    features = []
    for idx, image in enumerate(images):
        feature = feature_extraction_function(image)
        features.append(feature)
        print(f"Extracted features from image {idx + 1}/{len(images)}")
    return features

# Pad or truncate features to the same length
def pad_features(features, max_length):
    padded_features = []
    for feature in features:
        if len(feature) < max_length:
            feature = np.pad(feature, (0, max_length - len(feature)), 'constant')
        else:
            feature = feature[:max_length]
        padded_features.append(feature)
    return np.array(padded_features)

# Classifiers to evaluate
classifiers = {
    "GaussianNB": GaussianNB(),
    "KNeighborsClassifier": KNeighborsClassifier(n_neighbors=3),
    "LogisticRegression": LogisticRegression(max_iter=1000),
    "SVC": SVC(kernel='linear'),
    "MLPClassifier": MLPClassifier(max_iter=1000),
    "RandomForestClassifier": RandomForestClassifier(n_estimators=100),
    "GradientBoostingClassifier": GradientBoostingClassifier(n_estimators=100),
    "DecisionTreeClassifier": DecisionTreeClassifier()
}

# Define feature extraction methods
feature_methods = {
    "Dominant Color Descriptor": extract_dominant_color_descriptor,
    "Color Coherence Vector": extract_color_coherence_vector,
    "Color Layout Descriptor": extract_color_layout_descriptor,
    "HSV Histogram": extract_hsv_histogram,
    "Color Name Descriptor": extract_color_name_descriptor,
    "Opponency Color Features": extract_opponency_color_features
}

# Define degradation folders
folders = [
    "green_tint",
    "blue_tint",
    "red_tint",
    "noisy",
    "blurry",
    "hazy",
    "high_contrast",
    "low_illumination"
]

# Load data, extract features, and classify
def main():
    base_path = "C:/Users/abhis/Downloads/degraded_images/"
    best_results = {method_name: (None, 0) for method_name in feature_methods.keys()}
    accuracy_results = {method_name: {} for method_name in feature_methods.keys()}

    for method_name, feature_method in feature_methods.items():
        print(f"\nExtracting features using {method_name}...")
        
        all_features = []
        all_labels = []
        
        for folder in folders:
            folder_path = os.path.join(base_path, folder)
            print(f"  Processing folder: {folder_path}")
            
            # Extract features
            features = process_folder(folder_path, feature_method)
            max_length = max(len(f) for f in features)
            features = pad_features(features, max_length)
            labels = np.full(len(features), folder)
            
            all_features.extend(features)
            all_labels.extend(labels)
        
        all_features = np.array(all_features)
        all_labels = np.array(all_labels)

        # Split data
        print("  Splitting data into training and testing sets...")
        X_train, X_test, y_train, y_test = train_test_split(all_features, all_labels, test_size=0.2, random_state=42)

        # Evaluate each classifier
        for clf_name, classifier in classifiers.items():
            print(f"    Training and evaluating {clf_name}...")
            classifier.fit(X_train, y_train)

            # Evaluate classifier
            y_pred = classifier.predict(X_test)
            accuracy = accuracy_score(y_test, y_pred)
            print(f"    Accuracy using {method_name} with {clf_name}: {accuracy * 100:.2f}%")

            # Keep track of the best classifier for this feature extraction method
            if accuracy > best_results[method_name][1]:
                best_results[method_name] = (clf_name, accuracy)

            accuracy_results[method_name][clf_name] = accuracy

    # Output best classifiers for each method
    for method_name, (best_clf, accuracy) in best_results.items():
        print(f"\nThe best classifier for {method_name} is {best_clf} with accuracy: {accuracy * 100:.2f}%")

if __name__ == "__main__":
    main()



### 1. Dominant Color Descriptor

* Accuracy using Dominant Color Descriptor with GaussianNB: 88.75%
  
    Training and evaluating KNeighborsClassifier...

* Accuracy using Dominant Color Descriptor with KNeighborsClassifier: 86.25%
  
    Training and evaluating LogisticRegression...

* Accuracy using Dominant Color Descriptor with LogisticRegression: 77.50%
  
    Training and evaluating SVC...

* Accuracy using Dominant Color Descriptor with SVC: 80.00%
  
  Training and evaluating MLPClassifier...

* Accuracy using Dominant Color Descriptor with MLPClassifier: 65.00%
  
    Training and evaluating RandomForestClassifier...

* Accuracy using Dominant Color Descriptor with RandomForestClassifier: 91.25%
  
    Training and evaluating GradientBoostingClassifier...
  
* Accuracy using Dominant Color Descriptor with GradientBoostingClassifier: 92.50%
  
    Training and evaluating DecisionTreeClassifier...
  
* Accuracy using Dominant Color Descriptor with DecisionTreeClassifier: 71.25%

  The best classifier for HSV Histogram is LogisticRegression with accuracy: 91.25%



### 2. Color Coherence Vector

* Accuracy using Color Coherence Vector with GaussianNB: 85.75%
  
    Training and evaluating KNeighborsClassifier...

* Accuracy using Color Coherence Vector with KNeighborsClassifier: 82.00%
  
    Training and evaluating LogisticRegression...

* Accuracy using Color Coherence Vector with LogisticRegression: 83.50%
  
    Training and evaluating SVC...

* Accuracy using Color Coherence Vector with SVC: 87.00%
  
  Training and evaluating MLPClassifier...

* Accuracy using Color Coherence Vector with MLPClassifier: 75.50%
  
    Training and evaluating RandomForestClassifier...

* Accuracy using Color Coherence Vector with RandomForestClassifier: 90.75%
  
    Training and evaluating GradientBoostingClassifier...
  
* Accuracy using Color Coherence Vector with GradientBoostingClassifier: 97.25%
  
    Training and evaluating DecisionTreeClassifier...
  
* Accuracy using Color Coherence Vector with DecisionTreeClassifier: 81.25%

  The best classifier for Color Coherence Vector is LogisticRegression with accuracy: 97.25%



### 3. HSV Histogram


* Training and evaluating GaussianNB...
  
    Accuracy using HSV Histogram with GaussianNB: 82.50%
        
* Training and evaluating KNeighborsClassifier...
  
    Accuracy using HSV Histogram with KNeighborsClassifier: 92.50%
        
* Training and evaluating LogisticRegression...
  
    Accuracy using HSV Histogram with LogisticRegression: 95.00%
        
* Training and evaluating SVC...
  
    Accuracy using HSV Histogram with SVC: 93.75%
        
* Training and evaluating MLPClassifier...
  
    Accuracy using HSV Histogram with MLPClassifier: 92.50%
        
* Training and evaluating RandomForestClassifier...
  
    Accuracy using HSV Histogram with RandomForestClassifier: 85.00%
        
* Training and evaluating GradientBoostingClassifier...
  
    Accuracy using HSV Histogram with GradientBoostingClassifier: 75.00%
        
* Training and evaluating DecisionTreeClassifier...
  
    Accuracy using HSV Histogram with DecisionTreeClassifier: 75.00%

  

The best classifier for HSV Histogram is LogisticRegression with accuracy: 95.00%



### 4. Color Layout Descriptor


* Training and evaluating GaussianNB...
  
    Accuracy using Color Layout Descriptor with GaussianNB: 86.25

  %ssianNB: 81.25%

        
* Training and evaluating KNeighborsClassifier...

    Accuracy using Color Layout Descriptor with KNeighborsClassifier: 72.50%


* Training and evaluating LogisticRegression...

    Accuracy using Color Layout Descriptor with LogisticRegression: 56.25%


* Training and evaluating SVC...

    Accuracy using Color Layout Descriptor with SVC: 76.25%


* Training and evaluating MLPClassifier...

    Accuracy using Color Layout Descriptor with MLPClassifier: 55.00%


* Training and evaluating RandomForestClassifier...

    Accuracy using Color Layout Descriptor with RandomForestClassifier: 81.25%


* Training and evaluating GradientBoostingClassifier...

    Accuracy using Color Layout Descriptor with GradientBoostingClassifier: 77.50%


* Training and evaluating DecisionTreeClassifier...

    Accuracy using Color Layout Descriptor with DecisionTreeClassifier: 72.50%



The best classifier for Color Layout Descriptor is G6ussianNB with accuracy: 81.25%



### 5. Color Name Descriptor

* Training and evaluating GaussianNB...

    Accuracy using Color Name Descriptor with GaussianNB: 70.00%


* Training and evaluating KNeighborsClassifier...

    Accuracy using Color Name Descriptor with KNeighborsClassifier: 80.00%


* Training and evaluating LogisticRegression...

    Accuracy using Color Name Descriptor with LogisticRegression: 77.50%


* Training and evaluating SVC...

    Accuracy using Color Name Descriptor with SVC: 76.25%


* Training and evaluating MLPClassifier...

    Accuracy using Color Name Descriptor with MLPClassifier: 80.00%


* Training and evaluating RandomForestClassifier...

    Accuracy using Color Name Descriptor with RandomForestClassifier: 77.50%


* Training and evaluating GradientBoostingClassifier...

    Accuracy using Color Name Descriptor with GradientBoostingClassifier: 75.00%
    

* Training and evaluating DecisionTreeClassifier...

    Accuracy using Color Name Descriptor with DecisionTreeClassifier: 76.25%

The best classifier for Color Name Descriptor is KNeighborsClassifier with accuracy: 80.00%


### 6. Opponency Color Features

* Training and evaluating GaussianNB...

    Accuracy using Opponency Color Features with GaussianNB: 62.50%


* Training and evaluating KNeighborsClassifier...

    Accuracy using Opponency Color Features with KNeighborsClassifier: 45.00%
        
* Training and evaluating LogisticRegression...

    Accuracy using Opponency Color Features with LogisticRegression: 30.00%
        
* Training and evaluating SVC...

    Accuracy using Opponency Color Features with SVC: 47.50%
        
* Training and evaluating MLPClassifier...

    Accuracy using Opponency Color Features with MLPClassifier: 32.50%
        
* Training and evaluating RandomForestClassifier...

    Accuracy using Opponency Color Features with RandomForestClassifier: 65.00%


* Training and evaluating GradientBoostingClassifier...

    Accuracy using Opponency Color Features with GradientBoostingClassifier: 72.00%

* Training and evaluating DecisionTreeClassifier...

    Accuracy using Opponency Color Features with DecisionTreeClassifier: 70.25%

The best classifier for Opponency Color Features is GradientBoostingClassifier with accuracy: 72.00%




.

### Accuracy Explanation

1. **Dominant Color Descriptor**
   - **Gradient Boosting Classifier (92.50%)**: This method is effective because it combines multiple weak learners to improve performance, which works well with complex features like dominant colors.
   - **Random Forest Classifier (91.25%)**: Similar to Gradient Boosting but with simpler decision trees; it captures patterns well in dominant color features.
   - **GaussianNB (88.75%)**: Performs reasonably well, but might struggle with the multi-dimensional color features compared to ensemble methods.
   - **Other classifiers**: Generally lower due to less effective handling of the dominant color feature set.

2. **Color Coherence Vector**
   - **Gradient Boosting Classifier (97.25%)**: Excels due to its robustness in handling complex, high-dimensional features like color coherence.
   - **KNeighborsClassifier (82.00%)**: Works well with local feature patterns but less effective with high-dimensional data compared to boosting methods.
   - **GaussianNB (85.75%)**: Performs decently but might be limited by assumptions about feature distribution.
   - **Other classifiers**: Generally less effective due to their inability to capture the complex relationships in coherence features.

3. **HSV Histogram**
   - **Logistic Regression (95.00%)**: Effective in handling the HSV histogram data, providing high accuracy by finding a good linear decision boundary.
   - **KNeighborsClassifier (92.50%)**: Good at capturing local patterns in the histogram but not as strong as Logistic Regression.
   - **GaussianNB (82.50%)**: Adequate but less effective in the high-dimensional HSV space.
   - **Other classifiers**: Show varied performance, often struggling with the complexity of the HSV histogram features.

4. **Color Layout Descriptor**
   - **GaussianNB (81.25%)**: Performs well with the average color information but may not capture more complex patterns.
   - **Random Forest Classifier (81.25%)**: Handles average color information well but might not capture detailed patterns.
   - **Other classifiers**: Generally perform lower due to less effective handling of color layout features which may lack detailed discrimination.

5. **Color Name Descriptor**
   - **KNeighborsClassifier (80.00%)**: Good at handling discrete color names and their frequencies, making it effective for this feature set.
   - **MLPClassifier (80.00%)**: Performs similarly by learning complex relationships between color names and categories.
   - **GaussianNB (70.00%)**: Less effective due to assumptions about feature distribution and the discrete nature of color names.
   - **Other classifiers**: Lower performance due to less effective handling of the color name feature distribution.

6. **Opponency Color Features**
   - **Gradient Boosting Classifier (72.00%)**: Handles the opponency features well by combining multiple weak learners.
   - **Random Forest Classifier (65.00%)**: Better than most but still struggles with complex opponency features.
   - **GaussianNB (62.50%)**: Performs the worst as it may not capture the nuances of color opponency effectively.
   - **Other classifiers**: Struggle with the unique nature of opponency features, leading to lower accuracy.