In [1]:
import os
import cv2
import numpy as np
from skimage.feature import hog, local_binary_pattern
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.metrics import accuracy_score, classification_report
from imblearn.over_sampling import SMOTE
from sklearn.svm import SVC

In [2]:
# Directory paths
dataset_path = '/content/drive/MyDrive/Bombay-Softwares'   #I've trained the model on google colab, the the dataset path is of google drive
categories = ['Building', 'Sea', 'Mountains', 'Streets', 'Glacier', 'Forest']

In [3]:
# Function to load and preprocess data
def load_and_preprocess_data(dataset_path, categories):
    data = []
    labels = []

    for category in categories:
        category_path = os.path.join(dataset_path, category)
        class_label = categories.index(category)

        if not os.path.isdir(category_path):
            print(f"Warning: {category_path} does not exist or is not a directory. Skipping...")
            continue

        for img_name in os.listdir(category_path):
            img_path = os.path.join(category_path, img_name)
            image = cv2.imread(img_path)

            if image is None:
                print(f"Warning: Could not read image {img_path}. Skipping...")
                continue

            try:
                image = cv2.resize(image, (150, 150))
                gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
                equalized_image = cv2.equalizeHist(gray_image)

                data.append(equalized_image)
                labels.append(class_label)
            except Exception as e:
                print(f"Error processing image {img_path}: {e}")

    return np.array(data), np.array(labels)


In [4]:
# Load and preprocess data
data, labels = load_and_preprocess_data(dataset_path, categories)



In [5]:
# Function to extract combined features
def extract_features(images):
    features = []
    for image in images:
        hist = cv2.calcHist([image], [0], None, [256], [0, 256]).flatten()

        sobelx = cv2.Sobel(image, cv2.CV_64F, 1, 0, ksize=5).flatten()
        sobely = cv2.Sobel(image, cv2.CV_64F, 0, 1, ksize=5).flatten()

        hog_feature, _ = hog(image, pixels_per_cell=(8, 8), cells_per_block=(2, 2), visualize=True)

        lbp = local_binary_pattern(image, P=8, R=1, method='uniform')
        lbp_hist = np.histogram(lbp, bins=np.arange(0, 27), range=(0, 26))[0]

        combined_features = np.hstack((hist, sobelx, sobely, hog_feature, lbp_hist))
        features.append(combined_features)

    return np.array(features)

In [6]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42, stratify=labels)

In [7]:
# Extract features
X_train_features = extract_features(X_train)
X_test_features = extract_features(X_test)


In [8]:
# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_features)
X_test_scaled = scaler.transform(X_test_features)

In [9]:
# Apply PCA for dimensionality reduction
pca = PCA(n_components=150)  # Adjust the number of components as needed
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

In [10]:
# Balance the dataset using SMOTE
smote = SMOTE(random_state=42)
X_train_balanced, y_train_balanced = smote.fit_resample(X_train_pca, y_train)

In [11]:
# Hyperparameter tuning for RandomForest using GridSearchCV
param_grid_rf = {
    'n_estimators': [100, 200, 300],
    'max_depth': [10, 20, 30],
    'min_samples_split': [2, 5, 10]
}

In [12]:
rf = RandomForestClassifier(class_weight='balanced', random_state=42)
grid_search_rf = GridSearchCV(rf, param_grid_rf, cv=5, n_jobs=-1, verbose=2)
grid_search_rf.fit(X_train_balanced, y_train_balanced)

Fitting 5 folds for each of 27 candidates, totalling 135 fits


In [13]:
# Best RandomForest model
best_rf_classifier = grid_search_rf.best_estimator_
print(f"Best parameters found for RandomForest: {grid_search_rf.best_params_}")

Best parameters found for RandomForest: {'max_depth': 30, 'min_samples_split': 2, 'n_estimators': 300}


In [17]:
# Create an ensemble classifier
svm = SVC(class_weight='balanced', kernel='rbf', gamma='scale', C=1, random_state=42)
svm.fit(X_train_balanced, y_train_balanced)

In [18]:
ensemble_clf = VotingClassifier(estimators=[('rf', best_rf_classifier), ('svm', svm)], voting='hard')
ensemble_clf.fit(X_train_balanced, y_train_balanced)

In [19]:
# Predict on test data
y_pred = ensemble_clf.predict(X_test_pca)

In [20]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, target_names=categories)

In [21]:
print(f'Accuracy: {accuracy}')
print(f'Classification Report:\n{report}')

Accuracy: 0.8007626310772163
Classification Report:
              precision    recall  f1-score   support

    Building       0.66      0.80      0.72       100
         Sea       0.54      0.69      0.61       100
   Mountains       0.59      0.58      0.58       100
     Streets       0.82      0.68      0.74       100
     Glacier       0.53      0.43      0.48       100
      Forest       0.97      0.95      0.96       549

    accuracy                           0.80      1049
   macro avg       0.69      0.69      0.68      1049
weighted avg       0.81      0.80      0.80      1049



# **Inference**

In [22]:
# Function to preprocess a single image
def preprocess_image1(image):
    # Resize image
    image = cv2.resize(image, (150, 150))
    # Convert to gray-scale
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # Apply histogram equalization
    equalized_image = cv2.equalizeHist(gray_image)
    return equalized_image

In [23]:
# Function to extract combined features from a single image
def extract_features1(image):
    # Histogram features
    hist = cv2.calcHist([image], [0], None, [256], [0, 256]).flatten()

    # Sobel edge detection
    sobelx = cv2.Sobel(image, cv2.CV_64F, 1, 0, ksize=5).flatten()
    sobely = cv2.Sobel(image, cv2.CV_64F, 0, 1, ksize=5).flatten()

    # HOG features
    hog_feature, _ = hog(image, pixels_per_cell=(8, 8), cells_per_block=(2, 2), visualize=True)

    # LBP features
    lbp = local_binary_pattern(image, P=8, R=1, method='uniform')
    lbp_hist = np.histogram(lbp, bins=np.arange(0, 27), range=(0, 26))[0]

    # Combine all features
    combined_features = np.hstack((hist, sobelx, sobely, hog_feature, lbp_hist))
    return combined_features

In [24]:
# Mapping numerical labels to class names
class_names = ['Building', 'Sea', 'Mountains', 'Streets', 'Glacier', 'Forest']

# Function for inference
def classify_image1(image_path, scaler, pca, classifier):
    # Read the image
    image = cv2.imread(image_path)
    if image is None:
        print("Error: Could not read the image.")
        return

    # Preprocess the image
    preprocessed_image = preprocess_image1(image)

    # Extract features
    features = extract_features1(preprocessed_image)

    # Scale the features
    scaled_features = scaler.transform([features])

    # Apply PCA
    pca_features = pca.transform(scaled_features)

    # Perform classification
    prediction = classifier.predict(pca_features)

    # Map numerical label to class name
    predicted_class = class_names[prediction[0]]

    return predicted_class

In [28]:
image_path = '/content/drive/MyDrive/Bombay-Softwares/Glacier/9778.jpg'
prediction = classify_image1(image_path, scaler, pca, ensemble_clf)

print(f"Predicted class: {prediction}")

Predicted class: Sea


In [30]:
import joblib

# Save the scaler, PCA, and classifier to files
joblib.dump(scaler, 'ens_scaler.pkl')
joblib.dump(pca, 'ens_pca.pkl')
joblib.dump(ensemble_clf, 'ens_classifier.pkl')


['ens_classifier.pkl']

In [31]:
import shutil

# Define the paths
scaler_path = '/content/ens_scaler.pkl'
pca_path = '/content/ens_pca.pkl'
classifier_path = '/content/ens_classifier.pkl'

# Destination paths in Google Drive
drive_scaler_path = '/content/drive/MyDrive/Bombay-Softwares/saved_models/ens_scaler.pkl'
drive_pca_path = '/content/drive/MyDrive/Bombay-Softwares/saved_models/ens_pca.pkl'
drive_classifier_path = '/content/drive/MyDrive/Bombay-Softwares/saved_models/ens_classifier.pkl'

# Copy files to Google Drive
shutil.copy(scaler_path, drive_scaler_path)
shutil.copy(pca_path, drive_pca_path)
shutil.copy(classifier_path, drive_classifier_path)

print("Files moved to Google Drive successfully.")

Files moved to Google Drive successfully.


In [32]:
!pip freeze > requirements.txt
