In [2]:
import os
import cv2
import numpy as np
from tqdm import tqdm
from skimage.feature import hog
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import joblib
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
train_data = r"D:\Project\PG Project\Data\Image Data\seg_train\seg_train"
test_data = r"D:\Project\PG Project\Data\Image Data\seg_test\seg_test"

In [4]:
categories = ['buildings', 'forest', 'glacier', 'mountain', 'sea', 'street']

In [8]:
def load_data_and_extract_RF(data_dir, categories, image_size=(150, 150)):
    features = []
    labels = []
    for label, category in enumerate(categories):
        folder_path = os.path.join(data_dir, category)
        for file in tqdm(os.listdir(folder_path), desc=f"Loading {category}"):
            img_path = os.path.join(folder_path, file)
            img = cv2.imread(img_path)
            if img is None:
                continue
            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            resized = cv2.resize(gray, image_size)
            hog_feat = hog(resized, orientations=9, pixels_per_cell=(8, 8),
                           cells_per_block=(2, 2), block_norm='L2-Hys')
            features.append(hog_feat)
            labels.append(label)
    return np.array(features), np.array(labels)

In [10]:
X_train, y_train = load_data_and_extract_RF(train_data, categories)
X_test, y_test = load_data_and_extract_RF(test_data, categories)

Loading buildings: 100%|██████████| 2190/2190 [00:47<00:00, 45.89it/s]
Loading forest: 100%|██████████| 2263/2263 [00:50<00:00, 44.55it/s]
Loading glacier: 100%|██████████| 2387/2387 [00:51<00:00, 45.96it/s]
Loading mountain: 100%|██████████| 2495/2495 [00:53<00:00, 46.68it/s]
Loading sea: 100%|██████████| 2270/2270 [00:49<00:00, 46.16it/s]
Loading street: 100%|██████████| 2381/2381 [00:54<00:00, 43.39it/s]
Loading buildings: 100%|██████████| 437/437 [00:09<00:00, 45.76it/s]
Loading forest: 100%|██████████| 473/473 [00:10<00:00, 45.69it/s]
Loading glacier: 100%|██████████| 549/549 [00:11<00:00, 46.03it/s]
Loading mountain: 100%|██████████| 523/523 [00:10<00:00, 48.35it/s]
Loading sea: 100%|██████████| 510/510 [00:10<00:00, 46.78it/s]
Loading street: 100%|██████████| 501/501 [00:10<00:00, 46.27it/s]


In [12]:
RF1 = RandomForestClassifier(n_estimators=100, random_state=42)
RF1.fit(X_train, y_train)