In [2]:
import os
import cv2
import numpy as np
from skimage import exposure
from sklearn.utils import resample
categories = ['Building', 'Forest', 'Glacier', 'Mountains', 'Sea', 'Streets']
def load_and_preprocess_images(data_folder, image_size=(300, 300)):
    categories = ['Building', 'Forest', 'Glacier', 'Mountains', 'Sea', 'Streets']
    images = []
    labels = []
    
    for category in categories:
        folder_path = os.path.join(data_folder, category)
        label = categories.index(category)  # Numeric label for each category
        image_files = os.listdir(folder_path)
        
        # Downsample the over-represented category to 500 images
        if len(image_files) > 500:
            image_files = resample(image_files, n_samples=500, random_state=42)
        
        for filename in image_files:
            file_path = os.path.join(folder_path, filename)
            image = cv2.imread(file_path)
            if image is not None:
                image = cv2.resize(image, image_size)
                gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
                normalized_image = gray_image / 255.0
                equalized_image = exposure.equalize_hist(normalized_image)
                images.append(equalized_image)
                labels.append(label)
    
    images = np.array(images)
    labels = np.array(labels)
    return images, labels

data_folder = r'data'
images, labels = load_and_preprocess_images(data_folder)


## Feature Extraction

In [3]:
from skimage.feature import hog, canny

def extract_features(images):
    feature_list = []
    
    for image in images:
        # Histogram of Oriented Gradients (HOG)
        hog_features = hog(image, pixels_per_cell=(8, 8), cells_per_block=(2, 2), block_norm='L2-Hys')
        
        # Canny Edges
        canny_edges = canny(image).flatten()
        
        # Combine features into a single feature vector
        features = np.hstack((hog_features, canny_edges))
        feature_list.append(features)
    
    feature_array = np.array(feature_list)
    return feature_array

features = extract_features(images)

### Dimensionality Reduction

In [4]:
from sklearn.decomposition import PCA

# Apply PCA to reduce the dimensionality of the feature set
pca = PCA(n_components=50)
reduced_features = pca.fit_transform(features)

### Classification

In [5]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(reduced_features, labels, test_size=0.2, random_state=42)

# Train an SVM classifier
svm_classifier = SVC(kernel='linear', class_weight='balanced')  # Use balanced class weights
svm_classifier.fit(X_train, y_train)

# Evaluate the classifier
y_pred = svm_classifier.predict(X_test)
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.60      0.59      0.60       118
           1       0.87      0.86      0.86        99
           2       0.38      0.44      0.41        96
           3       0.56      0.52      0.54       101
           4       0.54      0.56      0.55        91
           5       0.65      0.58      0.61        95

    accuracy                           0.59       600
   macro avg       0.60      0.59      0.59       600
weighted avg       0.60      0.59      0.60       600

[[70  5 14  4  3 22]
 [ 4 85  3  3  0  4]
 [ 8  3 42 20 21  2]
 [ 7  2 17 53 20  2]
 [ 4  1 21 14 51  0]
 [23  2 14  1  0 55]]


In [7]:
import joblib
# Save the trained model
joblib.dump(svm_classifier, 'svm_classifier.pkl')
joblib.dump(pca, 'pca_model.pkl')

['pca_model.pkl']