## Data Download and Preprocessing
Download the dataset from the provided Google Drive link.
Implement preprocessing techniques to prepare the images for feature extraction.

In [1]:
import os
import cv2
import numpy as np
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
import pickle

# Function to load images and labels from subdirectories
def load_images_from_folder(folder):
    images = []
    labels = []

    if not os.path.exists(folder):
        print(f"Folder path '{folder}' does not exist.")
        return np.array(images), np.array(labels)

    for sub in os.listdir(folder):
        sub_path = os.path.join(folder, sub)
        if not os.path.isdir(sub_path):
            print(f"'{sub_path}' is not a directory.")
            continue
        for filename in os.listdir(sub_path):
            img_path = os.path.join(sub_path, filename)
            if not os.path.isfile(img_path):
                print(f"File '{img_path}' is not a valid file.")
                continue
            img = cv2.imread(img_path)
            if img is None:
                print(f"Failed to read image '{img_path}'.")
                continue
            img = cv2.resize(img, (128, 128))  # Resize images to 128x128
            img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            images.append(img_gray)
            labels.append(sub)  # Use the subdirectory name as the label
            print(f"Loaded image '{img_path}' with label '{sub}'.")

    if not images:
        print(f"No images were loaded from the folder '{folder}'.")
        
    return np.array(images), np.array(labels)

# Load dataset 
images, labels = load_images_from_folder('C:\\Users\\91990\\Downloads\\dataset\\data\\dataset_full')

# Check if images and labels are loaded correctly
print(f"Number of images loaded: {len(images)}")
print(f"Number of labels loaded: {len(labels)}")

Failed to read image 'C:\Users\91990\Downloads\dataset\data\dataset_full\Building\.DS_Store'.
File 'C:\Users\91990\Downloads\dataset\data\dataset_full\Building\.ipynb_checkpoints' is not a valid file.
Loaded image 'C:\Users\91990\Downloads\dataset\data\dataset_full\Building\0.jpg' with label 'Building'.
Loaded image 'C:\Users\91990\Downloads\dataset\data\dataset_full\Building\10014.jpg' with label 'Building'.
Loaded image 'C:\Users\91990\Downloads\dataset\data\dataset_full\Building\10144.jpg' with label 'Building'.
Loaded image 'C:\Users\91990\Downloads\dataset\data\dataset_full\Building\10191.jpg' with label 'Building'.
Loaded image 'C:\Users\91990\Downloads\dataset\data\dataset_full\Building\10210.jpg' with label 'Building'.
Loaded image 'C:\Users\91990\Downloads\dataset\data\dataset_full\Building\10228.jpg' with label 'Building'.
Loaded image 'C:\Users\91990\Downloads\dataset\data\dataset_full\Building\10251.jpg' with label 'Building'.
Loaded image 'C:\Users\91990\Downloads\dataset\

# Feature Extraction

Extract multiple sets of handcrafted features, with at least three features per set.
Examples of features to extract:
Low-level Vision: Histogram, Histogram Equalization, Gray-scale Transformation, Image Smoothing, Connected Components in Images.
Mid-level Vision: Edge Detection (using Gradients, Sobel, Canny), Line Detection (using Hough Transforms), Semantic Information (using RANSAC), Image Region Descriptors (using SIFT).

In [2]:
# Function to calculate histogram and flatten it
def calc_histogram(img):
    hist = cv2.calcHist([img], [0], None, [256], [0, 256])
    return hist.flatten()

# Function to detect edges using Canny and flatten the result
def canny_edge_detection(img):
    edges = cv2.Canny(img, 100, 200)
    return edges.flatten()

# Function to extract SIFT features and ensure the length is consistent
def sift_features(img, num_features=128):
    sift = cv2.SIFT_create()
    keypoints, descriptors = sift.detectAndCompute(img, None)
    if descriptors is None:
        descriptors = np.zeros((num_features, 128))
    if len(descriptors) < num_features:
        descriptors = np.vstack([descriptors, np.zeros((num_features - len(descriptors), 128))])
    elif len(descriptors) > num_features:
        descriptors = descriptors[:num_features]
    return descriptors.flatten()

# Apply feature extraction to all images
features_hist = np.array([calc_histogram(img) for img in images])
features_canny = np.array([canny_edge_detection(img) for img in images])
features_sift = np.array([sift_features(img) for img in images])

print(f"Histogram features shape: {features_hist.shape}")
print(f"Canny edge features shape: {features_canny.shape}")
print(f"SIFT features shape: {features_sift.shape}")

Histogram features shape: (5245, 256)
Canny edge features shape: (5245, 16384)
SIFT features shape: (5245, 16384)


# Dimensionality Reduction
Apply dimensionality reduction techniques if the feature set size is too large.

# Classification Algorithm
Choose and implement a classification algorithm.

# Evaluation Components
Evaluate the trained models using appropriate metrics.

In [3]:
# Function to apply PCA
def apply_pca(features, n_components=50):
    pca = PCA(n_components=n_components)
    reduced_features = pca.fit_transform(features)
    return pca, reduced_features

# Apply PCA
pca_hist, features_hist_pca = apply_pca(features_hist, n_components=50)
pca_canny, features_canny_pca = apply_pca(features_canny, n_components=50)
pca_sift, features_sift_pca = apply_pca(features_sift, n_components=50)

# Save PCA transformers
with open('pca_transformer_hist.pkl', 'wb') as pca_file:
    pickle.dump(pca_hist, pca_file)

with open('pca_transformer_canny.pkl', 'wb') as pca_file:
    pickle.dump(pca_canny, pca_file)

with open('pca_transformer_sift.pkl', 'wb') as pca_file:
    pickle.dump(pca_sift, pca_file)

# Combine PCA features for SVM training
combined_features = np.hstack((features_hist_pca, features_canny_pca, features_sift_pca))

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(combined_features, labels, test_size=0.2, random_state=42)

# Train the SVM classifier
classifier = SVC()
classifier.fit(X_train, y_train)

# Predict and evaluate
y_pred = classifier.predict(X_test)
print(classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))

# Save the trained model
with open('svm_algo_model.pkl', 'wb') as model_file:
    pickle.dump(classifier, model_file)

              precision    recall  f1-score   support

    Building       0.56      0.39      0.46       112
      Forest       0.87      0.96      0.92       539
     Glacier       0.47      0.52      0.49        94
   Mountains       0.57      0.48      0.52       107
         Sea       0.47      0.49      0.48        91
     Streets       0.69      0.57      0.62       106

    accuracy                           0.73      1049
   macro avg       0.60      0.57      0.58      1049
weighted avg       0.72      0.73      0.72      1049

Accuracy: 0.7311725452812202
