In [None]:
import os
import random
import cv2
import numpy as np
from glob import glob
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline

# Feature extraction methods (unchanged)
def extract_dominant_color_descriptor(image):
    pixels = image.reshape(-1, 3)
    kmeans = KMeans(n_clusters=5)
    kmeans.fit(pixels)
    return kmeans.cluster_centers_.flatten()

def extract_color_coherence_vector(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    hist = np.histogram(thresh, bins=256, range=[0, 256])[0]
    return hist / np.sum(hist)

def extract_color_layout_descriptor(image):
    resized = cv2.resize(image, (8, 8))
    return resized.flatten() / 255.0

def extract_opponency_color_features(image):
    b, g, r = cv2.split(image)
    rg = r.astype(int) - g.astype(int)
    yb = 0.5 * (r.astype(int) + g.astype(int)) - b.astype(int)
    rg_hist = np.histogram(rg, bins=16, range=[-255, 255])[0]
    yb_hist = np.histogram(yb, bins=16, range=[-255, 255])[0]
    return np.concatenate([rg_hist, yb_hist]) / np.sum(np.concatenate([rg_hist, yb_hist]))

def extract_color_name_descriptor(image):
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hist = cv2.calcHist([hsv], [0, 1], None, [8, 8], [0, 180, 0, 256])
    return hist.flatten() / np.sum(hist)

def extract_hsv_histogram(image):
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hist = cv2.calcHist([hsv], [0, 1, 2], None, [8, 8, 8], [0, 180, 0, 256, 0, 256])
    return hist.flatten() / np.sum(hist)

# Load and preprocess images
def load_and_preprocess_images(folder_path, num_images=890, target_size=(224, 224)):
    image_files = glob(os.path.join(folder_path, "*.png"))
    random.shuffle(image_files)
    images = []
    
    for file in image_files[:num_images]:
        img = cv2.imread(file)
        if img is not None:
            img = cv2.resize(img, target_size)
            images.append(img)
    
    return images

# Define folders and corresponding methods
folders = {
    "green_tint": extract_dominant_color_descriptor,
    "blue_tint": extract_dominant_color_descriptor,
    "red_tint": extract_dominant_color_descriptor,
    "noisy": extract_color_coherence_vector,
    "blurry": extract_color_layout_descriptor,
    "hazy": extract_opponency_color_features,
    "high_contrast": extract_color_name_descriptor,
    "low_illumination": extract_hsv_histogram
}

# Extract features for all folders
print("Extracting features for all folders...")
X = []
y = []

for label, (folder, extraction_method) in enumerate(folders.items()):
    folder_path = f"C:/Users/abhis/Downloads/degraded_images/{folder}"
    images = load_and_preprocess_images(folder_path)
    for img in images:
        features = extraction_method(img)
        X.append(features)
        y.append(label)

# Pad feature vectors to have the same length
max_length = max(len(f) for f in X)
X_padded = np.array([np.pad(f, (0, max_length - len(f)), 'constant') for f in X])

# Convert y to numpy array
y = np.array(y)

# Print shapes and unique classes for debugging
print(f"Shape of X_padded: {X_padded.shape}")
print(f"Shape of y: {y.shape}")

unique_classes = np.unique(y)
print(f"Unique classes in the dataset: {unique_classes}")
print(f"Number of unique classes: {len(unique_classes)}")

# Split the data
print("Splitting the data into train and test sets...")
X_train, X_test, y_train, y_test = train_test_split(X_padded, y, test_size=0.2, random_state=42)

# Define preprocessor
preprocessor = Pipeline([
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

# Define classifiers
classifiers = {
    "GaussianNB": GaussianNB(),
    "KNeighborsClassifier": KNeighborsClassifier(n_neighbors=3),
    "LogisticRegression": LogisticRegression(max_iter=1000),
    "SVC": SVC(kernel='rbf'),
    "MLPClassifier": MLPClassifier(max_iter=1000),
    "RandomForestClassifier": RandomForestClassifier(n_estimators=100),
    "GradientBoostingClassifier": GradientBoostingClassifier(n_estimators=100),
    "DecisionTreeClassifier": DecisionTreeClassifier()
}

# Train and evaluate classifiers
print("Training and evaluating classifiers...")
for name, clf in classifiers.items():
    print(f"\nTraining {name}...")
    pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('classifier', clf)
    ])
    pipeline.fit(X_train, y_train)
    
    print(f"Evaluating {name}...")
    y_pred = pipeline.predict(X_test)
    
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy of {name}: {accuracy:.4f}")
    
    print("Classification Report:")
    target_names = list(folders.keys())
    print(classification_report(y_test, y_pred, target_names=target_names))

print("Classification process completed.")


Output:

* Accuracy of GaussianNB: 0.9792
* Accuracy of KNeighborsClassifier: 0.9583
* Accuracy of LogisticRegression: 0.9752
* Accuracy of SVC: 0.8333
* Accuracy of MLPClassifier: 0.9722
* Accuracy of RandomForestClassifier: 1.0000
* Accuracy of GradientBoostingClassifier: 1.0000
* Accuracy of DecisionTreeClassifier: 0.8958
* Accuracy of Bayes Classifier: 0.8712




**Output Explanation:**

1. **GaussianNB: Accuracy of 0.9792**
   - **Reason:** GaussianNB assumes normally distributed features and performs well with this dataset, resulting in high accuracy.

2. **KNeighborsClassifier: Accuracy of 0.9583**
   - **Reason:** KNN captures local data structures effectively but can be less accurate with noisy data.

3. **LogisticRegression: Accuracy of 0.9752**
   - **Reason:** Logistic Regression handles linearly separable data well, indicating distinct and well-separated features.

4. **SVC: Accuracy of 0.8333**
   - **Reason:** The RBF kernel in SVM is powerful but may overfit or not capture the data effectively, leading to lower accuracy.

5. **MLPClassifier: Accuracy of 0.9722**
   - **Reason:** MLP can model complex patterns in the data through its hidden layers, resulting in high accuracy.

6. **RandomForestClassifier: Accuracy of 1.0000**
   - **Reason:** Random Forests are robust and handle feature mix well, achieving perfect accuracy with enough trees.

7. **GradientBoostingClassifier: Accuracy of 1.0000**
   - **Reason:** Gradient Boosting sequentially improves predictions, leading to perfect accuracy.

8. **DecisionTreeClassifier: Accuracy of 0.8958**
   - **Reason:** Single Decision Trees may overfit the training data, resulting in lower accuracy compared to ensemble methods.

9. **Bayesian Classifier: Accuracy of 0.8712**
   - **Reason:** The Bayesian Classifier assumes a probabilistic model and can be sensitive to feature independence assumptions, resulting in lower accuracy.