<a href="https://colab.research.google.com/github/AhmadJamal01/Floodead-Inside/blob/main/Machine_Learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Imports

In [None]:
import numpy as np
from sklearn.model_selection import cross_validate
from sklearn.svm import SVC
import pandas as pd
import numpy as np
import glob
import os
import cv2
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from osgeo import gdal
import cv2
from skimage.feature import hog
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score


NUM_FOLDS = 10

## Read the Data

In [None]:
!pip install gdal > /dev/null

In [None]:
import gdown
gdown.download("https://drive.google.com/uc?id=1och-QmNa3FAiS-wssgzCwISbmpSezIi_", "dataset.zip", quiet=False)
gdown.extractall("dataset.zip")
path = 'dataset/'


Downloading...
From: https://drive.google.com/file/d/1och-QmNa3FAiS-wssgzCwISbmpSezIi_
To: /content/dataset.zip
75.5kB [00:00, 34.8MB/s]


BadZipFile: ignored

## Prepare the Data

In [None]:
df = pd.DataFrame(columns=['image_path', 'label'])

for image_path in sorted(glob.glob('dataset/flooded/*.jpg')):
    data = {'image_path': image_path, 'label': 'flooded'}
    df.loc[len(df)] = data

for image_path in sorted(glob.glob('dataset/non-flooded/*.jpg')):
    data = {'image_path': image_path, 'label': 'non-flooded'}
    df.loc[len(df)] = data

In [None]:
print(df.head())
print("Dataset shape:", df.shape)

### Extract Features

### HOG

In [None]:
def calculate_hog_features(image):
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    hog_features = hog(gray_image, orientations=9, pixels_per_cell=(8, 8), cells_per_block=(2, 2))
    return hog_features


### Color-based Features

In [None]:
import cv2
import numpy as np

def calculate_average_color(image):
    average_color = np.mean(image, axis=(0, 1))
    return average_color

def calculate_color_histogram(image):
    hist = cv2.calcHist([image], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
    hist = cv2.normalize(hist, hist).flatten()
    return hist


### Texture-based Features

In [None]:
from skimage.feature import greycomatrix, graycoprops
from skimage.feature import local_binary_pattern


# Gray-Level Co-occurrence Matrix (GLCM): Computes the distribution of co-occurring pixel values in different directions.
def calculate_glcm_features(image):
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # glcm = greycomatrix(gray_image, [1], [0], 256, symmetric=True, normed=True)
    # contrast = greycoprops(glcm, 'contrast')[0, 0]
    # correlation = greycoprops(glcm, 'correlation')[0, 0]
    # energy = greycoprops(glcm, 'energy')[0, 0]
    # homogeneity = greycoprops(glcm, 'homogeneity')[0, 0]
    # return contrast, correlation, energy, homogeneity
    glcm = greycomatrix(gray_image, [1], [0, np.pi/4, np.pi/2, 3*np.pi/4], levels=256, normed=True, symmetric=True)
    return graycoprops(glcm, 'contrast').mean(), graycoprops(glcm, 'correlation').mean(), graycoprops(glcm, 'energy').mean(), graycoprops(glcm, 'homogeneity').mean(), graycoprops(glcm, 'dissimilarity').mean()

# Local Binary Patterns (LBP): Captures the patterns in the texture of the image.
def calculate_lbp_features(image):
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    lbp = local_binary_pattern(gray_image, 8, 1, method='uniform')
    hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, 10), range=(0, 10))
    hist = hist.astype("float")
    hist /= (hist.sum() + 1e-7)
    return hist


### Shape-based Features

In [None]:
def calculate_contour_area(image):
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary_image = cv2.threshold(gray_image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    contours, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contour_area = 0
    for contour in contours:
        contour_area += cv2.contourArea(contour)
    return contour_area

def calculate_aspect_ratio(image):
    height, width, _ = image.shape
    aspect_ratio = width / height
    return aspect_ratio


### Extract

In [None]:
# Take only a portion of the dataframe
newLen = 50
X_sampled = df.sample(n=newLen, random_state=42)

# Drop the remaining rows
X_dropped = df.drop(X_sampled.index)

# Verify the shapes of the dataframes
print("Sampled Data Shape:", X_sampled.shape)
print("Dropped Data Shape:", X_dropped.shape)

In [None]:
import os
import cv2
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler


# Initialize lists to store features and target labels
features = []
targets = []

i = 0
# Iterate over the images in the directory
for index, row in df.iterrows():
        image_path = row['image_path']
        image = cv2.imread(image_path)
        
        # Extract the features from the image
        hog_features = calculate_hog_features(image)
        # average_color = calculate_average_color(image)
        # color_histogram = calculate_color_histogram(image)
        # glcm_features = calculate_glcm_features(image)
        # lbp_features = calculate_lbp_features(image)
        # contour_area = calculate_contour_area(image)
        # aspect_ratio = calculate_aspect_ratio(image)
        
        # Append the features to the list
        # feature_row = [*hog_features, *average_color, *color_histogram, *glcm_features, *lbp_features, contour_area, aspect_ratio]
        feature_row = [*hog_features]
        features.append(feature_row)
        
        # Determine the target class based on the folder name
        label = row['label']
        if 'flooded' == label:
            target = 1
        elif 'non-flooded' == label:
            target = -1
        else:
            target = 0
        
        # Append the target label to the list
        targets.append(target)
        print(i); i=i+1



In [None]:
# Create the X dataframe with features
X_columns = ['hog_features', 'average_color', 'color_histogram', 'glcm_features', 'lbp_features', 'contour_area', 'aspect_ratio']
X = pd.DataFrame(features, columns=X_columns)

# Create the y dataframe with target labels and drop the 'target' column from X
y = pd.DataFrame(targets, columns=['target'])
X.drop('target', axis=1, inplace=True)

# Scale the numerical features in X using StandardScaler
scaler = StandardScaler()
X = pd.DataFrame(scaler.fit_transform(X), columns=X_columns)


## Split the Data

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


## Model

In [None]:
svm = LinearSVC()
svm.fit(X_train, y_train)


## Evaluation

In [None]:
y_pred = svm.predict(X_test)


In [None]:
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy: {:.2f}%'.format(accuracy * 100))
