In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import numpy as np
import pandas as pd
import cv2
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from flask import Flask, request, jsonify
from werkzeug.utils import secure_filename
# ---Data download and preprocessing.
# here, i have used cv2 for image processing & i am making a  user defined function to load images and labels
def load_images_from_folders(folders):
    images = []
    labels = []
    for label, folder in enumerate(folders):
        for filename in tqdm(os.listdir(folder), desc=f"Processing folder {label}"):
            img_path = os.path.join(folder, filename)
            img = cv2.imread(img_path)
# here, i am  Resizing to a fixed size if needed
            if img is not None:
                img = cv2.resize(img, (32, 32))
                images.append(img)
                labels.append(label)
    print(f"Loaded {len(images)} images.")
    return np.array(images), np.array(labels)

# Updating the folder paths based on where my images are stored
folders = [
    '/content/drive/MyDrive/Bombaysoftware/dataset/dataset_full/Building',
    '/content/drive/MyDrive/Bombaysoftware/dataset/dataset_full/Forest',
    '/content/drive/MyDrive/Bombaysoftware/dataset/dataset_full/Glacier',
    '/content/drive/MyDrive/Bombaysoftware/dataset/dataset_full/Mountains',
    '/content/drive/MyDrive/Bombaysoftware/dataset/dataset_full/Sea',
    '/content/drive/MyDrive/Bombaysoftware/dataset/dataset_full/Streets'
]

X, y = load_images_from_folders(folders)

if len(X) == 0:
    raise ValueError("No images loaded. Check the folder paths and structure.")
# Normalizing the images for overfitting problem
X = X.astype('float32') / 255.0

# Spliting the dataset
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# i am making the user defined function to extract features
def extract_features(images):
    features = []
    for img in images:
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        if gray.dtype != np.uint8:
            gray = gray.astype(np.uint8)

        hist_eq = cv2.equalizeHist(gray).flatten()

        sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=5).flatten()
        sobely = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=5).flatten()

        canny = cv2.Canny(gray, 100, 200).flatten()

        combined_features = np.hstack([hist_eq, sobelx, sobely, canny])
        features.append(combined_features)

    return np.array(features)



# here, i am doing feature extraction
X_train_features = extract_features(X_train)
X_val_features = extract_features(X_val)
X_test_features = extract_features(X_test)

# dimensionality reduction using PCA
pca = PCA(n_components=100)
X_train_pca = pca.fit_transform(X_train_features)
X_val_pca = pca.transform(X_val_features)
X_test_pca = pca.transform(X_test_features)

# Classification algorithm of your choice with explanation.
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train_pca, y_train)

# evaluating on the validation set
val_accuracy = clf.score(X_val_pca, y_val)
print(f"Validation Accuracy: {val_accuracy * 100:.2f}%")

# Evaluation components.
y_pred = clf.predict(X_test_pca)
test_accuracy = clf.score(X_test_pca, y_test)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

# flask app: One should be able to upload an image and get the classification result.
app = Flask(__name__)

@app.route('/classify', methods=['POST'])
def classify_image():
    if 'image' not in request.files:
        return jsonify({'error': 'No image uploaded'}), 400

    file = request.files['image']
    filename = secure_filename(file.filename)
    file_path = os.path.join('uploads', filename)
    file.save(file_path)

    img = cv2.imread(file_path)
    img = cv2.resize(img, (32, 32))
    features = extract_features([img])
    features_pca = pca.transform(features)
    prediction = clf.predict(features_pca)

    return jsonify({'class': int(prediction[0])})

if __name__ == '__main__':
    os.makedirs('uploads', exist_ok=True)
    app.run(debug=True)


Processing folder 0: 100%|██████████| 501/501 [00:13<00:00, 37.25it/s] 
Processing folder 1: 100%|██████████| 2745/2745 [01:40<00:00, 27.31it/s] 
Processing folder 2: 100%|██████████| 501/501 [00:14<00:00, 35.60it/s] 
Processing folder 3: 100%|██████████| 501/501 [00:11<00:00, 45.23it/s] 
Processing folder 4: 100%|██████████| 501/501 [00:13<00:00, 36.34it/s] 
Processing folder 5: 100%|██████████| 501/501 [00:11<00:00, 42.92it/s] 


Loaded 5245 images.
Validation Accuracy: 52.72%
Test Accuracy: 51.48%
              precision    recall  f1-score   support

           0       0.21      0.03      0.05       103
           1       0.52      0.98      0.68       534
           2       0.00      0.00      0.00       104
           3       0.00      0.00      0.00        87
           4       1.00      0.01      0.02       119
           5       0.39      0.13      0.19       102

    accuracy                           0.51      1049
   macro avg       0.36      0.19      0.16      1049
weighted avg       0.44      0.51      0.37      1049

[[  3  95   0   0   0   5]
 [  3 523   2   0   0   6]
 [  1 100   0   0   0   3]
 [  2  81   1   0   0   3]
 [  1 114   0   0   1   3]
 [  4  85   0   0   0  13]]
 * Serving Flask app '__main__'
 * Debug mode: on


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug: * Restarting with stat
