# Imports

In [147]:
import zipfile
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from IPython.core.pylabtools import figsize
from sklearn.model_selection import train_test_split
import torch
import torchvision
from torch.utils.data import TensorDataset, DataLoader,Dataset
from torchvision import datasets, transforms,models
import torch.nn as nn
from torch.nn.modules.loss import BCEWithLogitsLoss
from torch.optim import lr_scheduler
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import ParameterGrid
from tqdm import tqdm
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import calinski_harabasz_score
from torchvision.datasets import ImageFolder
import albumentations
from sklearn.metrics import f1_score

In [148]:
# Mount to Google Drive
from google.colab import drive
drive.mount('/content/drive')

# change directory to the folder where the data is stored
%cd /content/drive/My Drive/SAT/project

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/My Drive/SAT/project


# Helpers

In [149]:
def extract_file(zip_path,specified_directory):
  with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(specified_directory)

In [150]:
def read_images(flooded_path, non_flooded_path, width, height):
    flooded = np.empty((0, width, height, 3))
    non_flooded = np.empty((0, width, height, 3))

    for filename in os.listdir(flooded_path):
        img = cv2.imread(os.path.join(flooded_path, filename), cv2.IMREAD_COLOR)
        img = cv2.resize(img, (width, height)) 
        img = img.astype('float32') / 255.0 
        img = np.expand_dims(img, axis=0)
        flooded = np.concatenate((flooded, img), axis=0)

    for filename in os.listdir(non_flooded_path):
        img = cv2.imread(os.path.join(non_flooded_path, filename), cv2.IMREAD_COLOR)
        img = cv2.resize(img, (width, height)) 
        img = img.astype('float32') / 255.0  
        img = np.expand_dims(img, axis=0) 
        non_flooded = np.concatenate((non_flooded, img), axis=0)
    return flooded,non_flooded

In [151]:
def show_img(img,title=""):
   plt.imshow(img)
   plt.title(title)
   plt.show()

In [152]:
def show_images(images,images_labels):
  nrows = 4
  ncols = 4
  fig, ax = plt.subplots(nrows,ncols,figsize = (10,10))
  ax = ax.flatten()
  for i in range(nrows*ncols):
      pic = images[i%8]
      ax[i].imshow(pic)
      ax[i].set_title(images_labels[i%8])
      ax[i].set_axis_off()
  plt.show()

# Constants

In [153]:
CFG = {
    'img_size': 256,
    'epochs': 10,
    'train_bs': 16,
    'val_bs': 32,
    'test_bs': 32,
    'lr': 1e-4,
    'freeze': False,
    'out_features': 1,
    'dataset_path': 'dataset/dataset',
    'flooded_path': 'dataset/dataset/flooded',
    'non_flooded_path': 'dataset/dataset/non-flooded',
    'val_ratio': 0.4,
    }

# Load the data

In [154]:
# extract_file("./dataset.zip","./dataset")

In [155]:
# flooded, non_flooded = read_images(CFG['flooded_path'], CFG['non_flooded_path'], CFG['img_size'], CFG['img_size'])

In [156]:
# print("flooded images shapes",flooded.shape)
# print("non_flooded images shapes",non_flooded.shape)

In [157]:
# X = np.concatenate((flooded, non_flooded), axis=0)
# Y = np.concatenate((np.ones(flooded.shape[0]), np.zeros(non_flooded.shape[0])), axis=0)

# Split the dataset

In [158]:
# # Save the array to a file
# np.save('X.npy', X)
# # Save the array to a file
# np.save('Y.npy', Y)

In [159]:
# Load the array from the file
X = np.load('X.npy')
# Load the array from the file
Y = np.load('Y.npy')

In [160]:
X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=CFG['val_ratio'], stratify=Y, random_state=42)
X_val, X_test, Y_val, Y_test = train_test_split(X_val, Y_val, test_size=0.5, stratify=Y_val, random_state=42)

In [161]:
  print(X_train.shape, Y_train.shape)  
  print(X_val.shape, Y_val.shape)
  print(X_test.shape, Y_test.shape)

(553, 256, 256, 3) (553,)
(184, 256, 256, 3) (184,)
(185, 256, 256, 3) (185,)


# Augmentation

In [162]:
# train_aug = albumentations.Compose([
#             albumentations.RandomResizedCrop(256, 256),
#             albumentations.Transpose(p=0.5),
#             albumentations.HorizontalFlip(p=0.5),
#             albumentations.VerticalFlip(p=0.5),
#             albumentations.ShiftScaleRotate(p=0.5),
#             albumentations.HueSaturationValue(
#                 hue_shift_limit=0.2, 
#                 sat_shift_limit=0.2, 
#                 val_shift_limit=0.2, 
#                 p=0.5
#             ),
#             albumentations.RandomBrightnessContrast(
#                 brightness_limit=(-0.1,0.1), 
#                 contrast_limit=(-0.1, 0.1), 
#                 p=0.5
#             ),
#             # albumentations.Normalize(
#             #     mean=[0.485, 0.456, 0.406], 
#             #     std=[0.229, 0.224, 0.225], 
#             #     max_pixel_value=255.0, 
#             #     p=1.0
#             # ),
#             albumentations.CoarseDropout(p=0.5),
#             albumentations.Cutout(p=0.5)
#             ], p=1.)
  
        
# val_aug = albumentations.Compose([
#             albumentations.CenterCrop(256, 256, p=1.),
#             albumentations.Resize(256, 256),
#             # albumentations.Normalize(
#             #     mean=[0.485, 0.456, 0.406], 
#             #     std=[0.229, 0.224, 0.225], 
#             #     max_pixel_value=255.0, 
#             #     p=1.0
#             # )
#             ], p=1.)

In [163]:
train_aug = transforms.Compose([
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.RandomVerticalFlip(p=0.5),
            transforms.RandomRotation(degrees=(-45, 45)),
            transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            )
        ])

val_aug = None

# Dataset and DataLoader

In [164]:
# class FloodDataset(Dataset):
#     def __init__(self, X, Y, transform=None):
#         self.X = torch.from_numpy(X).float()
#         self.Y = torch.from_numpy(Y).float()
#         # self.X = X
#         # self.Y = Y
#         self.transform = transform
    
#     def __len__(self):
#         return len(self.X)

#     def __getitem__(self, idx):
#         x = self.X[idx]
#         y = self.Y[idx]
#         if self.transform:
#             x = self.transform(x.permute(2, 0, 1))
#             # x = self.transform(image=x)['image']
#         # x = torch.from_numpy(x).float()
#         # y = torch.tensor(y).float()
#         return x, y

In [165]:
# train_dataset = FloodDataset(X_train, Y_train, train_aug)
# val_dataset = FloodDataset(X_val, Y_val, val_aug)
# # test_dataset = FloodDataset(X_test, Y_test, None)

In [166]:
X_train_tensor = torch.from_numpy(X_train).float()
Y_train_tensor = torch.from_numpy(Y_train).float()
X_val_tensor = torch.from_numpy(X_val).float()
Y_val_tensor = torch.from_numpy(Y_val).float()
X_test_tensor = torch.from_numpy(X_test).float()
Y_test_tensor = torch.from_numpy(Y_test).float()

In [167]:
train_dataset = TensorDataset(X_train_tensor, Y_train_tensor)
train_dataset.transforms = train_aug

val_dataset = TensorDataset(X_val_tensor, Y_val_tensor)
test_dataset = TensorDataset(X_test_tensor, Y_test_tensor)

In [168]:
  train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=CFG['train_bs'], shuffle=True)
  val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=CFG['val_bs'], shuffle=True)
  test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=CFG['test_bs'], shuffle=True)

# Model

In [169]:
class Resnet(nn.Module):
    def __init__(self, pretrained=True, freeze=CFG['freeze'], out_features=CFG['out_features']):
        super(Resnet, self).__init__()
        self.model = models.resnet18(pretrained=pretrained)
        self.model.fc = nn.Linear(self.model.fc.in_features, out_features)

        if freeze:
            for param in self.model.parameters():
                param.requires_grad = False
    
    def forward(self, x):
        return self.model(x)

# Train

In [170]:
def train(model, train_loader, optimizer, criterion, device):
    y_true = []
    y_pred = []
    running_loss = 0.0
    for batch_idx, (input, target) in enumerate(tqdm(train_loader)):
            # Get data to cuda if possible
            input = input.permute(0, 3, 1, 2)
            input = input.to(device=device)
            target = target.unsqueeze(1).float()
            target = target.to(device=device)

            # forward
            scores = model(input)
            loss = criterion(scores, target)

            # backward
            optimizer.zero_grad()
            loss.backward()

            running_loss += loss.item()
            # gradient descent or adam step
            optimizer.step()

            predicted = torch.round(torch.sigmoid(scores))
            y_true.extend(target.flatten().tolist())
            y_pred.extend(predicted.flatten().tolist())

    return running_loss/len(train_loader), f1_score(y_true, y_pred, average='macro')

In [171]:
def evaluate(model, val_loader, criterion, device):
    model.eval()
    y_true = []
    y_pred = []
    running_loss = 0.0
    with torch.no_grad():
      for batch_idx, (input, target) in enumerate(tqdm(val_loader)):
            input = input.permute(0, 3, 1, 2)
            input = input.to(device)
            target = target.unsqueeze(1).float()
            target = target.to(device)

            scores = model(input)
            loss = criterion(scores, target)

            running_loss += loss.item()

            predicted = torch.round(torch.sigmoid(scores))
            y_true.extend(target.flatten().tolist())
            y_pred.extend(predicted.flatten().tolist())

    return running_loss/len(val_loader), f1_score(y_true, y_pred, average='macro')

In [172]:
def run(model, train_loader, val_loader, epochs, device):
    criterion = nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=CFG['lr'])

    best_f1 = 0
    for epoch in range(epochs):
        train_loss, train_f1 = train(model, train_loader, optimizer, criterion, device)
        val_loss, val_f1 = evaluate(model, val_loader, criterion, device)
        if val_f1 > best_f1:
            best_f1 = val_f1
            torch.save(model.state_dict(), f"best_resnet18.pt")
        print(f"    Training loss: {train_loss} Training macro f1: {100*train_f1:.2f}% Validation loss: {val_loss} Validation macro f1: {100*val_f1:.2f}%")

In [173]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [174]:
  model = Resnet()
  model = model.to(device)
  acc = run(model, train_loader, val_loader, epochs=20, device=device)

100%|██████████| 35/35 [00:02<00:00, 12.90it/s]
100%|██████████| 6/6 [00:00<00:00, 15.25it/s]


    Training loss: 0.24819060316575425 Training macro f1: 92.04% Validation loss: 0.09695737436413765 Validation macro f1: 97.28%


100%|██████████| 35/35 [00:02<00:00, 12.75it/s]
100%|██████████| 6/6 [00:00<00:00, 15.50it/s]


    Training loss: 0.22556061205520694 Training macro f1: 93.31% Validation loss: 0.05898566544055939 Validation macro f1: 98.91%


100%|██████████| 35/35 [00:02<00:00, 12.61it/s]
100%|██████████| 6/6 [00:00<00:00, 15.87it/s]


    Training loss: 0.22142943779804877 Training macro f1: 90.05% Validation loss: 0.1582625893255075 Validation macro f1: 94.00%


100%|██████████| 35/35 [00:02<00:00, 12.26it/s]
100%|██████████| 6/6 [00:00<00:00, 15.96it/s]


    Training loss: 0.03603482555897374 Training macro f1: 99.10% Validation loss: 0.03386334054327259 Validation macro f1: 98.37%


100%|██████████| 35/35 [00:02<00:00, 12.76it/s]
100%|██████████| 6/6 [00:00<00:00, 15.95it/s]


    Training loss: 0.005037260562260469 Training macro f1: 99.82% Validation loss: 0.10964824624049167 Validation macro f1: 97.28%


100%|██████████| 35/35 [00:02<00:00, 12.69it/s]
100%|██████████| 6/6 [00:00<00:00, 15.31it/s]


    Training loss: 0.03154889778067757 Training macro f1: 99.10% Validation loss: 0.07236332601557176 Validation macro f1: 96.19%


100%|██████████| 35/35 [00:02<00:00, 12.49it/s]
100%|██████████| 6/6 [00:00<00:00, 15.74it/s]


    Training loss: 0.04589555152883155 Training macro f1: 98.55% Validation loss: 0.12448070663958788 Validation macro f1: 96.19%


100%|██████████| 35/35 [00:02<00:00, 12.77it/s]
100%|██████████| 6/6 [00:00<00:00, 15.78it/s]


    Training loss: 0.015196281071881198 Training macro f1: 99.82% Validation loss: 0.061035146083061896 Validation macro f1: 97.83%


100%|██████████| 35/35 [00:02<00:00, 12.76it/s]
100%|██████████| 6/6 [00:00<00:00, 15.72it/s]


    Training loss: 0.02272185810096354 Training macro f1: 99.46% Validation loss: 0.2972410172224045 Validation macro f1: 90.68%


100%|██████████| 35/35 [00:02<00:00, 12.70it/s]
100%|██████████| 6/6 [00:00<00:00, 15.55it/s]


    Training loss: 0.02627021613817695 Training macro f1: 99.10% Validation loss: 0.26463715452700853 Validation macro f1: 91.24%


100%|██████████| 35/35 [00:02<00:00, 12.45it/s]
100%|██████████| 6/6 [00:00<00:00, 14.93it/s]


    Training loss: 0.019186255098143843 Training macro f1: 99.64% Validation loss: 0.034388295685251556 Validation macro f1: 98.37%


100%|██████████| 35/35 [00:02<00:00, 12.57it/s]
100%|██████████| 6/6 [00:00<00:00, 15.72it/s]


    Training loss: 0.013799697973549232 Training macro f1: 99.46% Validation loss: 0.0931824103317922 Validation macro f1: 97.28%


100%|██████████| 35/35 [00:02<00:00, 12.69it/s]
100%|██████████| 6/6 [00:00<00:00, 15.52it/s]


    Training loss: 0.00022901500323127622 Training macro f1: 100.00% Validation loss: 0.0522595943948545 Validation macro f1: 97.83%


100%|██████████| 35/35 [00:02<00:00, 12.64it/s]
100%|██████████| 6/6 [00:00<00:00, 15.58it/s]


    Training loss: 6.089128119777243e-05 Training macro f1: 100.00% Validation loss: 0.04922806704416871 Validation macro f1: 97.83%


100%|██████████| 35/35 [00:02<00:00, 12.35it/s]
100%|██████████| 6/6 [00:00<00:00, 14.89it/s]


    Training loss: 3.1168333611982946e-05 Training macro f1: 100.00% Validation loss: 0.04635544405997886 Validation macro f1: 98.37%


100%|██████████| 35/35 [00:02<00:00, 12.55it/s]
100%|██████████| 6/6 [00:00<00:00, 15.46it/s]


    Training loss: 2.3188011163759776e-05 Training macro f1: 100.00% Validation loss: 0.04445327438755461 Validation macro f1: 98.37%


100%|██████████| 35/35 [00:02<00:00, 12.60it/s]
100%|██████████| 6/6 [00:00<00:00, 15.69it/s]


    Training loss: 1.97855359396775e-05 Training macro f1: 100.00% Validation loss: 0.04455397931936508 Validation macro f1: 98.37%


100%|██████████| 35/35 [00:02<00:00, 12.61it/s]
100%|██████████| 6/6 [00:00<00:00, 15.53it/s]


    Training loss: 1.8346669512985566e-05 Training macro f1: 100.00% Validation loss: 0.04425854476100236 Validation macro f1: 98.37%


100%|██████████| 35/35 [00:02<00:00, 12.47it/s]
100%|██████████| 6/6 [00:00<00:00, 14.85it/s]


    Training loss: 1.4488969522931582e-05 Training macro f1: 100.00% Validation loss: 0.04273349430938348 Validation macro f1: 98.37%


100%|██████████| 35/35 [00:02<00:00, 12.52it/s]
100%|██████████| 6/6 [00:00<00:00, 15.43it/s]

    Training loss: 1.3098946326956756e-05 Training macro f1: 100.00% Validation loss: 0.048282538734686874 Validation macro f1: 98.37%





In [175]:
model = Resnet().to(device)
model.load_state_dict(torch.load('best_resnet18.pt'))



<All keys matched successfully>

In [176]:
criterion = nn.BCEWithLogitsLoss()
val_loss, val_f1 = evaluate(model, val_loader, criterion, device)
print(f"Validation loss: {val_loss} Validation macro f1: {100*val_f1:.2f}%")

100%|██████████| 6/6 [00:00<00:00, 15.68it/s]

Validation loss: 0.060799707348148026 Validation macro f1: 98.91%





In [177]:
criterion = nn.BCEWithLogitsLoss()
test_loss, test_f1 = evaluate(model, test_loader, criterion, device)
print(f"test loss: {test_loss} test macro f1: {100*test_f1:.2f}%")

100%|██████████| 6/6 [00:00<00:00, 15.93it/s]

test loss: 0.054245576883355774 test macro f1: 98.92%



