In [7]:
#Imports 
import cv2
import numpy as np
import math
import glob
import time
import os
from os import listdir
from os.path import isfile, join
import matplotlib.pyplot as plt
from tqdm import tqdm
from feature_extraction import *
from PIL import Image, ImageEnhance

%matplotlib inline

In [2]:
# extract features from images
is_load = False
contours = []
labels   = []  # 1 for males  , 0 for females

if ( not is_load):
    # read male images
    flooded_path   = 'dataset_resized/flooded'
    flooded_files   = [ f for f in listdir(flooded_path) if isfile(join(flooded_path,f)) ]
    for i in tqdm(range(0, len(flooded_files))):
        img  = Image.open(join(flooded_path,flooded_files[i]))
        img, _  = preprocess_image( img )
        contour = get_contour_pixels(img)
        contours.append(contour)
        labels.append(1)
        
    ## read female images
    non_flooded_path = 'dataset_resized/non-flooded'
    non_flooded_files = [ f for f in listdir(non_flooded_path) if isfile(join(non_flooded_path,f)) ]
    for i in tqdm(range(0, len(non_flooded_files))):
        img  = Image.open( join(non_flooded_path,non_flooded_files[i]))
        img, _  = preprocess_image( img )
        contour = get_contour_pixels(img)
        contours.append(contour)
        labels.append(0)

    contours = np.asarray(contours , dtype= object)
    labels   = np.asarray(labels   , dtype= int )
    #save lables to dataset
    np.save('features/labels.npy', labels)

100%|██████████| 461/461 [00:19<00:00, 23.88it/s]
100%|██████████| 461/461 [00:25<00:00, 18.11it/s]


In [3]:
hinge_features = []
cold_features  = []
if(not is_load):
    for i in tqdm(range( len(contours)) ):
        feature  = get_hinge_features( contours[i] )
        hinge_features.append(feature)  
    hinge_features = np.asarray(hinge_features , dtype=object)
    np.save('features/hinge_features.npy', hinge_features)

    for i in tqdm(range( len(contours)) ):
        feature  = get_cold_features( contours[i] )
        cold_features.append(feature)  
    cold_features = np.asarray(cold_features , dtype=object)
    np.save('features/cold_features.npy', cold_features)

100%|██████████| 922/922 [03:34<00:00,  4.30it/s]
  rhos_log_space = np.log10(rhos)
 63%|██████▎   | 585/922 [14:30<08:21,  1.49s/it]


KeyboardInterrupt: 

In [4]:
if(is_load):
    hinge_features = np.load('features/hinge_features.npy' , allow_pickle= True)
    cold_features = np.load('features/cold_features.npy' ,  allow_pickle= True)
    labels = np.load('features/labels.npy' , allow_pickle= True)

FileNotFoundError: [Errno 2] No such file or directory: 'features/hinge_features.npy'

## concate features

In [5]:
# concatenate features in one flattened array
features = np.concatenate( (hinge_features, cold_features) , axis=1)
cold_features.shape

(922, 420)

In [6]:
# Split dataset into training set and test set
from sklearn.model_selection import train_test_split
# X_train, X_test, y_train, y_test = train_test_split( hinge_features , labels , test_size=0.2 ,  random_state=109) # 80% training and 20% test
# X_train, X_test, y_train, y_test = train_test_split( hinge_features , labels , test_size=0.2 ,  random_state=50)
X_train, X_test, y_train, y_test = train_test_split( features , labels , test_size=0.2 ,  random_state=175)

In [7]:
from sklearn.preprocessing import StandardScaler

# Create a based model
scaler = StandardScaler()
X_train  = scaler.fit_transform(X_train)
X_test  = scaler.transform(X_test)


from sklearn.decomposition import PCA

pca = PCA(n_components=40)
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)


## SVM

In [8]:
#Import svm model
from sklearn import svm
from sklearn import metrics
from sklearn.metrics import classification_report

#Create a svm Classifier
t0 = time.time()
SVM_clf = svm.SVC(kernel= "linear" ) # Linear Kernel

#Train the model using the training sets
SVM_clf.fit(X_train, y_train)

#Predict the response for test dataset
y_pred = SVM_clf.predict(X_test)
t1 = time.time()
# clf.score(X_test, y_test)


print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.8486486486486486
              precision    recall  f1-score   support

           0       0.78      0.86      0.82        72
           1       0.90      0.84      0.87       113

    accuracy                           0.85       185
   macro avg       0.84      0.85      0.84       185
weighted avg       0.85      0.85      0.85       185



## Random forest tree


In [9]:
#Import Random Forest Model
from sklearn.ensemble import RandomForestClassifier

from sklearn.model_selection import GridSearchCV

param_grid = {
    'max_depth': [10, 50, 100],
    'min_samples_leaf': [1, 2,5],
    'min_samples_split': [ 2, 5],
    'n_estimators': [10,20, 100, 200]
}




# Instantiate the grid search model
grid_search = GridSearchCV(estimator = RandomForestClassifier(random_state=42), param_grid = param_grid, cv = 3, n_jobs = -1, verbose = 1)

# Fit the grid search to the data
grid_search.fit(X_train, y_train)
print(grid_search.best_params_)
print(grid_search.best_score_)

y_pred = grid_search.predict(X_test)

print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Fitting 3 folds for each of 72 candidates, totalling 216 fits
{'max_depth': 50, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 200}
0.8629721807422156
Accuracy: 0.9027027027027027
              precision    recall  f1-score   support

           0       0.86      0.89      0.88        72
           1       0.93      0.91      0.92       113

    accuracy                           0.90       185
   macro avg       0.90      0.90      0.90       185
weighted avg       0.90      0.90      0.90       185



In [10]:
from sklearn.ensemble import GradientBoostingClassifier

# params for GradientBoostingClassifier
param_grid = {
    # 'max_depth': [10, 50, 100],
    # 'min_samples_leaf': [1, 2,5],
    # 'min_samples_split': [ 2, 5],
    'n_estimators': [ 200 , 500 , 1000]

}




# Instantiate the grid search model
grid_search =GridSearchCV(estimator =  GradientBoostingClassifier(random_state=42), param_grid = param_grid, cv = 3, n_jobs = -1, verbose = 1)

# Fit the grid search to the data
grid_search.fit(X_train, y_train)


y_pred = grid_search.predict(X_test)

print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Fitting 3 folds for each of 3 candidates, totalling 9 fits
Accuracy: 0.8918918918918919
              precision    recall  f1-score   support

           0       0.86      0.86      0.86        72
           1       0.91      0.91      0.91       113

    accuracy                           0.89       185
   macro avg       0.89      0.89      0.89       185
weighted avg       0.89      0.89      0.89       185



In [11]:
import pickle

# save the model to disk
filename = 'rfc.pkl'
pickle.dump(grid_search, open(filename, 'wb'))

