# Assignment 4: Deep learning and unsupervised learning
For this assignment you are allowed to use data augmentation

# Task 1
Pick any image based dataset from the list, implement the preprocessing and justify the preprocessing steps, extract features and justify the methods used, select features and justify the methods used. Some of this is done already in one of the previous assignments. You can reuse
things.

- [] Implement (using the selected features) one basic machine learning algorithm for classification and justify your choice 20 (without justification 10).

- [] Implement (using the selected features) one advanced machine learning algorithm for classification and justify your choice 20 (without justification 10).

- [] Implement a CNN with hyperparameter tuning (for this you can directly use the data after the preprocessing) (30)

- [] Compare and Explain the results in terms of both the computation time and the performance of the classification algorithms. (30)

In [None]:
# Preprocessing, extract features, select features.
import os
import cv2
import numpy as np

# Define paths and parameters
dataset_path = "image_dataset"
preprocessed_path = "preprocessed_arrays"
image_size = (224, 224)  # Image size
class_names = ["hatchback", "motorcycle", "pickup", "sedan", "suv"]

# Create directory to save preprocessed arrays
os.makedirs(preprocessed_path, exist_ok=True)

def preprocess_image(img):
    """
    Preprocess the image: convert to grayscale, resize, and normalize.
    """
    # Step 1: Convert to grayscale
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    # Step 2: Resize the image
    img_resized = cv2.resize(gray, image_size)
    # Step 3: Normalize the image
    img_normalized = img_resized / 255.0
    return img_normalized

# Preprocess images and save as NumPy arrays
for class_name in class_names:
    folder_path = os.path.join(dataset_path, class_name)
    class_output_path = os.path.join(preprocessed_path, class_name)
    os.makedirs(class_output_path, exist_ok=True)

    if not os.path.exists(folder_path):
        print(f"Folder not found: {folder_path}")
        continue

    for file_name in os.listdir(folder_path):
        file_path = os.path.join(folder_path, file_name)

        if file_name.lower().endswith(('.png', '.jpg', '.jpeg')):
            try:
                # Read and preprocess the image
                img = cv2.imread(file_path)
                img_preprocessed = preprocess_image(img)

                # Save the preprocessed image as a NumPy array
                save_path = os.path.join(class_output_path, file_name.split('.')[0] + ".npy")
                np.save(save_path, img_preprocessed)
            except Exception as e:
                print(f"Error processing file {file_path}: {e}")

print(f"Preprocessing completed. Preprocessed arrays saved in '{preprocessed_path}'")

In [None]:
# Feature Extraction
# Define paths
edge_detected_path = "edge_detected_arrays"

# Create directory to save edge-detected arrays
os.makedirs(edge_detected_path, exist_ok=True)

def apply_edge_detection(img):
    """
    Apply edge detection to a preprocessed image (already in grayscale).
    """
    # Apply Gaussian Blur to reduce noise
    blurred = cv2.GaussianBlur(img, (5, 5), 0)
    # Apply Canny Edge Detection
    edges = cv2.Canny((blurred * 255).astype(np.uint8), threshold1=50, threshold2=150)
    return edges / 255.0  # Normalize to [0, 1]

# Perform edge detection and save as NumPy arrays
for class_name in class_names:
    class_input_path = os.path.join(preprocessed_path, class_name)
    class_output_path = os.path.join(edge_detected_path, class_name)
    os.makedirs(class_output_path, exist_ok=True)

    if not os.path.exists(class_input_path):
        print(f"Folder not found: {class_input_path}")
        continue

    for file_name in os.listdir(class_input_path):
        file_path = os.path.join(class_input_path, file_name)

        if file_name.lower().endswith('.npy'):
            try:
                # Load the preprocessed image as a NumPy array
                img_preprocessed = np.load(file_path)
                # Apply edge detection
                img_edges = apply_edge_detection(img_preprocessed)

                # Save the edge-detected image as a NumPy array
                save_path = os.path.join(class_output_path, file_name)
                np.save(save_path, img_edges)
            except Exception as e:
                print(f"Error processing file {file_path}: {e}")

print(f"Edge detection completed. Edge-detected arrays saved in '{edge_detected_path}'")

In [None]:
# Feature Selection
from sklearn.neighbors import KNeighborsClassifier
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Define paths
preprocessed_path = "edge_detected_arrays"
feature_selected_path = "feature_selected_arrays"

# Create directory to save feature-selected arrays
os.makedirs(feature_selected_path, exist_ok=True)

# Load preprocessed data
X = []  # Feature data
y = []  # Labels

for class_name in sorted(os.listdir(preprocessed_path)):  # Sort class directories
    class_input_path = os.path.join(preprocessed_path, class_name)

    if not os.path.exists(class_input_path):
        print(f"Folder not found: {class_input_path}")
        continue

    for file_name in sorted(os.listdir(class_input_path)):  # Sort files within each class
        if file_name.lower().endswith('.npy'):
            try:
                # Load preprocessed image
                file_path = os.path.join(class_input_path, file_name)
                img_array = np.load(file_path).flatten()  # Flatten image array
                X.append(img_array)
                y.append(class_name)  # Add corresponding class label
            except Exception as e:
                print(f"Error loading file {file_path}: {e}")

# Convert to NumPy arrays
X = np.array(X)
y = np.array(y)

pca = PCA(n_components=20)
image_data_reduced = pca.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(image_data_reduced, y, test_size=0.3, random_state=42, stratify=y)

In [None]:
# Implement Basic Classification

In [None]:
# Implement Advanced Classification

In [None]:
# Implement a CNN with hyperparameter tuning (use data directly after preprocessing)

# Task 2
Pick any dataset from the list, implement the preprocessing and justify the preprocessing steps, extract features and justify the methods used, select features and justify the methods used. Some of this is done already in one of the previous assignments. You can reuse things.

Implement three clustering methods out of the following and justify your choices (30)

- K-means
- Hierarchical Clustering
- Fuzzy-C-means
- DBSCAN
- Gaussian mixture models
- Self-organizing maps

Compare and Explain the results (30).

In [None]:
# Preprocessing, extract features, select features. (Can reuse)

In [None]:
# Implement cluster method 1

In [None]:
# Implement cluster method 2

In [None]:
# Implement cluster method 3