In [3]:
import cv2
import os
import numpy as np
import matplotlib.pyplot as plt
import sklearn.metrics
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, f1_score
from sklearn.model_selection import GridSearchCV
from xgboost import XGBClassifier
from skimage.feature import hog, local_binary_pattern
from typing import List, Tuple

<font size=20>Pre-Processing</font>

In [4]:
def process_images(folder_path: str) -> List:
    """
    Process images from the given folder path and return a list of processed images.
    :param folder_path: folder path
    :return: list of processed images
    """
    # get all the images from the folder
    images = [cv2.imread(folder_path + '/' + image)
              for image in os.listdir(folder_path)]
    # resize the images
    images = [cv2.resize(image, (224, 224)) for image in images]
    # convert the images to grayscale
    images = [cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) for image in images]
    return images


def extract_HOG_features(images: List) -> List:
    """
    Extract features from the given images and return a list of features.
    :param images: list of images
    :return: list of features
    """
    # create a list to store the features
    features = []
    # loop through the images
    for image in images:
        # extract the features
        hog_features = hog(image, orientations=8, pixels_per_cell=(
            16, 16), cells_per_block=(1, 1))  # Extract HOG features
        # append the features to the features list
        features.append(hog_features)
    return features

def extract_LBP_features(images):
  # Extract LBP features
  radius = 1  # LBP radius
  n_points = 8 * radius  # Number of LBP sampling points
  hists = []
  for image in images:
    lbp = local_binary_pattern(image, n_points, radius, method='uniform')
    # Calculate histogram of LBP features
    hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, n_points + 3), range=(0, n_points + 2))
    hist = hist.astype("float")
    hist /= (hist.sum() + 1e-7)  # Normalize the histogram
    hists.append(hist)
  return hists


In [5]:
# Load images and corresponding labels
def load_images_and_labels():
    images = []
    labels = []
    for i in range(365):
        img = cv2.imread(f'/content/drive/MyDrive/Satellite_Imagery/dataset/train/flooded/{i}.jpg', cv2.IMREAD_GRAYSCALE)
        img = cv2.resize(img, (512, 512))
        images.append(img)
        labels.append(0)
    for i in range(578, 943):
        img = cv2.imread(f'/content/drive/MyDrive/Satellite_Imagery/dataset/train/non-flooded/{i}.jpg', cv2.IMREAD_GRAYSCALE)
        img = cv2.resize(img, (512, 512))
        images.append(img)
        labels.append(1)
    return images, labels

# Extract SIFT features from images
def extract_SIFT_features(images, max_descriptors=100):
    sift = cv2.SIFT_create()
    keypoints_list = []
    descriptors_list = []
    for image in images:
        keypoints, descriptors = sift.detectAndCompute(image, None)
        if len(keypoints) > max_descriptors:
            keypoints = keypoints[:max_descriptors]
            descriptors = descriptors[:max_descriptors]
        keypoints_list.append(keypoints)
        descriptors_list.append(descriptors)
    return keypoints_list, descriptors_list


# Flatten descriptors and prepare feature matrix
def prepare_feature_matrix(descriptors_list, limit=300):
    features = []
    for descriptor in descriptors_list:
      temp = descriptor.flatten()
      if (len(temp) < 12800):
        pad_size = 12800 - len(temp)
        temp = np.pad(temp, (0,pad_size), 'constant')
        print(len(temp))
      features.append(temp)
    return features

<font size=20>Building model pipeline</font>

In [6]:
def model_pipeline(X_train, Y_train, X_valid, Y_valid, model):

    model.fit(X_train, Y_train)
    train_acc = model.score(X_train, Y_train)

    predictions = model.predict(X_valid)

    val_acc = sklearn.metrics.accuracy_score(Y_valid, predictions)
    cm = confusion_matrix(Y_valid, predictions)
    disp = ConfusionMatrixDisplay(
        confusion_matrix=cm)
    report = sklearn.metrics.classification_report(Y_valid, predictions)

    disp.plot()
    plt.show()

    weighted_f1 = f1_score(Y_valid, predictions, average='weighted')

    return model, report, train_acc, val_acc, weighted_f1


<font size=20>Grid Search for hyperparameter tuning</font>

In [7]:
def grid_search(X_train, Y_train, X_validation, Y_validation, algorithm='gbtree'):
    param_grid = {
        'n_estimators': [5, 50, 100, 200],
        'eta': [1, 0.1, 0.01, 0.001],
        'booster': [algorithm],
    }

    grid = GridSearchCV(XGBClassifier(), param_grid, refit=True, verbose=0)
    model, report, train_acc, val_acc, weighted_f1 = model_pipeline(
        X_train, Y_train, X_validation, Y_validation, grid)

    scores = grid.cv_results_['mean_test_score'].reshape(
        len(param_grid['n_estimators']), len(param_grid['eta']))

    plt.imshow(scores, interpolation='nearest', cmap=plt.cm.hot)
    plt.xlabel('eta')
    plt.ylabel('n_estimators')
    plt.colorbar()
    plt.xticks(np.arange(
        len(param_grid['eta'])), param_grid['eta'], rotation=45)
    plt.yticks(
        np.arange(len(param_grid['n_estimators'])), param_grid['n_estimators'])
    plt.title('Validation accuracy')
    plt.show()

    print(grid.best_params_)
    return model


In [None]:
# preprocess the images
images = process_images(
    '/content/drive/MyDrive/Satellite_Imagery/dataset/train/flooded')
images += process_images(
    '/content/drive/MyDrive/Satellite_Imagery/dataset/train/non-flooded')


In [None]:
# extract LBP features
LBP_features = extract_LBP_features(images)

In [None]:
# split the data into train and validation
from sklearn.model_selection import train_test_split

flooded: int = 1
non_flooded: int = 0
X_train, X_test, y_train, y_test = train_test_split(
    LBP_features, [flooded] * 365 + [non_flooded] * 365, test_size=0.2)

In [None]:
model = XGBClassifier()
model, report, train_acc, val_acc, weighted_f1 = model_pipeline(X_train, y_train, X_test, y_test, model)
print(val_acc)
print(weighted_f1)

In [None]:
boosters = {booster: None for booster in ['gbtree', 'gblinear']}
for booster in boosters:
    boosters[booster] = grid_search(
        X_train, y_train, X_test, y_test, booster)