In [1]:
import cv2
import os
from typing import List, Tuple
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from skimage.feature import hog


In [48]:
def process_images(folder_path: str) -> List:
    """
    Process images from the given folder path and return a list of processed images.
    :param folder_path: folder path
    :return: list of processed images
    """
    # get all the images from the folder
    images = [cv2.imread(folder_path + '/' + image)
              for image in os.listdir(folder_path)]
    # resize the images
    images = [cv2.resize(image, (224, 224)) for image in images]
    # convert the images to grayscale
    images = [cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) for image in images]
    return images


def extract_features(images: List) -> List:
    """
    Extract features from the given images and return a list of features.
    :param images: list of images
    :return: list of features
    """
    # create a list to store the features
    features = []
    # loop through the images
    for image in images:
        # extract the features
        hog_features = hog(image, orientations=8, pixels_per_cell=(
            16, 16), cells_per_block=(1, 1))  # Extract HOG features
        # append the features to the features list
        features.append(hog_features)
    return features


In [33]:
# preprocess the images
images = process_images(
    '/content/drive/MyDrive/Satellite_Imagery/dataset/train/flooded')
images += process_images(
    '/content/drive/MyDrive/Satellite_Imagery/dataset/train/non-flooded')


In [49]:
# extract the features
features = extract_features(images)


In [50]:
# split the data into train and validation
flooded: int = 1
non_flooded: int = -1
X_train, X_test, y_train, y_test = train_test_split(
    features, [flooded] * 365 + [non_flooded] * 365, test_size=0.2)


In [51]:
# train the model
model = RandomForestClassifier(n_estimators=200, max_depth=10, random_state=1)
model.fit(X_train, y_train)


In [52]:
# predict the test data
y_pred = model.predict(X_test)
# calculate the accuracy
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy: %.2f' % (accuracy * 100))


Accuracy: 75.34


In [53]:
# get the classification report
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

          -1       0.88      0.63      0.73        78
           1       0.68      0.90      0.77        68

    accuracy                           0.75       146
   macro avg       0.78      0.76      0.75       146
weighted avg       0.78      0.75      0.75       146



In [54]:
# preprocess the validation images
validation_images = process_images(
    '/content/drive/MyDrive/Satellite_Imagery/dataset/validation/flooded')
# preprocess the images
validation_images += process_images(
    '/content/drive/MyDrive/Satellite_Imagery/dataset/validation/non-flooded')
validation_features = extract_features(validation_images)


In [57]:
labels = [flooded]*95 + [non_flooded]*95
val = model.predict(validation_features)
print(classification_report(labels, val))


              precision    recall  f1-score   support

          -1       0.84      0.72      0.77        95
           1       0.75      0.86      0.80        95

    accuracy                           0.79       190
   macro avg       0.80      0.79      0.79       190
weighted avg       0.80      0.79      0.79       190



In [None]:
# preprocess the validation images
validation_images = process_images(
    '/content/drive/MyDrive/Satellite_Imagery/dataset/validation/flooded')
# preprocess the images
validation_images += process_images(
    '/content/drive/MyDrive/Satellite_Imagery/dataset/validation/non-flooded')
validation_features = extract_features(validation_images)


In [None]:
labels = [flooded]*95 + [non_flooded]*95
val = model.predict(validation_features)
print(classification_report(labels, val))


In [None]:
import joblib
joblib.dump(model, 'randomforest.joblib')
