In [38]:
# use GLCM from skimage.feature
import numpy as np
import matplotlib.pyplot as plt
from skimage.feature import greycomatrix, greycoprops
from skimage import data
from skimage import io
import os
# PCA from sklearn
from sklearn.decomposition import PCA
# train test split from sklearn
from sklearn.model_selection import train_test_split
# lazy predict
from lazypredict.Supervised import LazyClassifier
# brightness correction
from skimage import exposure
# HOG
from skimage.feature import hog, local_binary_pattern
# resize
from skimage.transform import resize

# score report
from sklearn.metrics import classification_report


In [5]:
def get_patches(image, n = 10):
    # width and height of the image
    w, h = image.shape
    # patch size = w/n, h/n
    patches = []
    for i in range(n):
        for j in range(n):
            patch = image[int(i*w/n):int((i+1)*w/n), int(j*h/n):int((j+1)*h/n)]
            patches.append(patch)
    return patches

In [6]:
def glcm_properties(image, distances, angles, properties, n = 8):
    w, h = image.shape

    # make the image intensity to be integer from 0 to 255 instead of float from 0 to 1
    image_int = (image*255).astype('uint8')

    patches = get_patches(image_int, n)
    # calculate the GLCM for each patch
    glcm = [greycomatrix(patch, distances, angles, normed=True, symmetric=True) for patch in patches]

    # calculate the properties for each patch
    property_values = [np.hstack([greycoprops(g, prop).ravel() for prop in properties]) for g in glcm]

    # make property_values a numpy array
    property_values = np.array(property_values)

    # flatten the array
    property_values = property_values.flatten()

    return property_values


In [7]:
def get_features(image):
    # brightness correction
    image_corr = exposure.equalize_adapthist(image, clip_limit=0.03)

    # scale image to 400x300
    image_corr = resize(image_corr, (320, 400), anti_aliasing=True)

    # HOG
    fd= hog(image_corr, orientations=8, pixels_per_cell=(40, 50),
                        cells_per_block=(1, 1), multichannel=False)

    # GLCM
    distances = [1, 3]
    angles = [0, np.pi/4, np.pi/2]
    properties = ['energy', 'homogeneity', 'contrast']

    glcm = glcm_properties(image_corr, distances, angles, properties, n = 8)

    # concatenate HOG and GLCM
    features = np.concatenate((fd, glcm))

    return features



In [8]:
features_flattened_2 = []
label_2 = []

In [9]:

for i in range(0, 461):
    image = io.imread('flooded/'+str(i)+'.jpg', as_gray=True)
    features = get_features(image)
    features_flattened_2.append(features)
    label_2.append(1)


In [10]:
for i in range(578, 1039):
    image = io.imread('non-flooded/'+str(i)+'.jpg', as_gray=True)
    features = get_features(image)
    features_flattened_2.append(features)
    label_2.append(0)

In [11]:
features_flattened_2_np = np.array(features_flattened_2)
print(features_flattened_2_np.shape)

(922, 1664)


In [13]:
features_np = np.reshape(features_flattened_2_np, (len(features_flattened_2), 1664))
print(features_np.shape)

(922, 1664)


In [14]:
label_2_np = np.array(label_2)
print(label_2_np.shape)

(922,)


In [15]:
# save features and label to npy files
np.save('features_2.npy', features_np)
np.save('label_2.npy', label_2_np)

In [16]:
# shuffle the features and labels
indices = np.arange(len(label_2))
np.random.shuffle(indices)
features_np_shuffled = features_np[indices]
label_np_shuffled = label_2_np[indices]

In [17]:
# split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features_np_shuffled, label_np_shuffled, test_size=0.2, random_state=42)

In [18]:
# lazy predict classifier
clf = LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
models, predictions = clf.fit(X_train, X_test, y_train, y_test)

100%|██████████| 29/29 [00:33<00:00,  1.16s/it]


In [19]:
models.head(20)

Unnamed: 0_level_0,Accuracy,Balanced Accuracy,ROC AUC,F1 Score,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
SVC,0.92,0.92,0.92,0.92,0.45
NuSVC,0.91,0.91,0.91,0.91,0.48
LGBMClassifier,0.9,0.9,0.9,0.9,5.2
RandomForestClassifier,0.9,0.9,0.9,0.9,2.11
ExtraTreesClassifier,0.89,0.89,0.89,0.89,0.45
LogisticRegression,0.89,0.89,0.89,0.89,0.2
SGDClassifier,0.88,0.88,0.88,0.88,0.13
XGBClassifier,0.88,0.88,0.88,0.88,3.18
CalibratedClassifierCV,0.88,0.87,0.87,0.88,1.99
PassiveAggressiveClassifier,0.86,0.86,0.86,0.86,0.18


In [137]:
from sklearn.svm import SVC, NuSVC
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
# LGBMClassifier
from lightgbm import LGBMClassifier
# grid search for SVM
from sklearn.model_selection import GridSearchCV
# hard voting of classifiers
from sklearn.ensemble import VotingClassifier
# logistic regression for classifier
from sklearn.linear_model import LogisticRegression
# SGDClassifier
from sklearn.linear_model import SGDClassifier
# XGBClassifier
from xgboost import XGBClassifier

In [112]:
# train SVC classifier 

clf = SVC(kernel='rbf', C=10, gamma="scale")
clf.fit(X_train, y_train)

# predict the test set
y_pred = clf.predict(X_test)

# print the classification report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.79      0.85      0.82        95
           1       0.83      0.77      0.80        90

    accuracy                           0.81       185
   macro avg       0.81      0.81      0.81       185
weighted avg       0.81      0.81      0.81       185



In [135]:
# random forest classifier
clf = RandomForestClassifier(n_estimators=100, max_depth=None, random_state=0)
clf.fit(X_train, y_train)

# predict the test set
y_pred = clf.predict(X_test)

# print the classification report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.91      0.91      0.91        95
           1       0.90      0.91      0.91        90

    accuracy                           0.91       185
   macro avg       0.91      0.91      0.91       185
weighted avg       0.91      0.91      0.91       185



In [133]:
clf_et = ExtraTreesClassifier(n_estimators=100, max_depth=10, random_state=0)
clf_et.fit(X_train, y_train)

# predict the test set
y_pred = clf_et.predict(X_test)

# print the classification report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.93      0.89      0.91        95
           1       0.89      0.93      0.91        90

    accuracy                           0.91       185
   macro avg       0.91      0.91      0.91       185
weighted avg       0.91      0.91      0.91       185



In [132]:
# LGBMClassifier
clf = LGBMClassifier(max_depth=10, n_estimators=200, learning_rate=0.2, random_state=0)
clf.fit(X_train, y_train)

# predict the test set
y_pred = clf.predict(X_test)

# print the classification report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.92      0.89      0.91        95
           1       0.89      0.92      0.91        90

    accuracy                           0.91       185
   macro avg       0.91      0.91      0.91       185
weighted avg       0.91      0.91      0.91       185



In [98]:
# NU SVC
clf = NuSVC(nu=0.01, kernel='rbf', gamma='scale')
clf.fit(X_train, y_train)

# predict the test set
y_pred = clf.predict(X_test)

# print the classification report
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.85      0.83      0.84        95
           1       0.83      0.84      0.84        90

    accuracy                           0.84       185
   macro avg       0.84      0.84      0.84       185
weighted avg       0.84      0.84      0.84       185



In [147]:
# SGDClassifier
clf = SGDClassifier(loss="log_loss", penalty="elasticnet", random_state=0)
clf.fit(X_train, y_train)

# predict the test set
y_pred = clf.predict(X_test)

# print the classification report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.73      0.69      0.71        95
           1       0.69      0.73      0.71        90

    accuracy                           0.71       185
   macro avg       0.71      0.71      0.71       185
weighted avg       0.71      0.71      0.71       185



In [152]:
# XGBClassifier
clf = XGBClassifier(max_depth=10, n_estimators=100, learning_rate=0.1, random_state=0)
clf.fit(X_train, y_train)

# predict the test set
y_pred = clf.predict(X_test)

# print the classification report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.90      0.87      0.89        95
           1       0.87      0.90      0.89        90

    accuracy                           0.89       185
   macro avg       0.89      0.89      0.89       185
weighted avg       0.89      0.89      0.89       185



In [154]:
#VotingClassifier
clf_rf = RandomForestClassifier(n_estimators=100, max_depth=None, random_state=0)
clf_et = ExtraTreesClassifier(n_estimators=100, max_depth=10, random_state=0)
clf_lgbm = LGBMClassifier(max_depth=10, n_estimators=200, learning_rate=0.2, random_state=0)

clf = VotingClassifier(estimators=[('rf', clf_rf), ('et', clf_et), ('lgbm', clf_lgbm)], voting='hard')

clf.fit(X_train, y_train)

# predict the test set
y_pred = clf.predict(X_test)

# print the classification report
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.95      0.91      0.92        95
           1       0.90      0.94      0.92        90

    accuracy                           0.92       185
   macro avg       0.92      0.92      0.92       185
weighted avg       0.93      0.92      0.92       185



In [87]:
# get parameters for the best 

0.9245614035087719
