In [1]:
## Useful libraries
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import os
import copy
import pickle
from urllib.request import urlretrieve
from torch.utils.data import DataLoader
from torch.utils.data.dataset import random_split
from sklearn.preprocessing import MinMaxScaler
from matplotlib.colors import TwoSlopeNorm
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

# Additional input
import networkx as nx
from tqdm import tqdm
!pip install torch_geometric
from torch_geometric.data import Data
!pip install perlin-noise
from perlin_noise import PerlinNoise
import random

from cycler import cycler
import seaborn as sns
import time

# Set the color scheme
sns.set_theme()
colors = ['#0076C2', '#EC6842', '#A50034', '#009B77', '#FFB81C', '#E03C31', '#6CC24A', '#EF60A3', '#0C2340', '#00B8C8', '#6F1D77']
plt.rcParams['axes.prop_cycle'] = cycler(color=colors)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

Collecting torch_geometric
  Downloading torch_geometric-2.4.0-py3-none-any.whl (1.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torch_geometric
Successfully installed torch_geometric-2.4.0
Collecting perlin-noise
  Downloading perlin_noise-1.12-py3-none-any.whl (5.3 kB)
Installing collected packages: perlin-noise
Successfully installed perlin-noise-1.12


In [2]:
def center_grid_graph(dim1, dim2):
    '''
    Create graph from a rectangular grid of dimensions dim1 x dim2
    Returns networkx graph connecting the grid centers and corresponding
    node positions
    ------
    dim1: int
        number of grids in the x direction
    dim2: int
        number of grids in the y direction
    '''
    G = nx.grid_2d_graph(dim1, dim2, create_using=nx.DiGraph)
    # for the position, it is assumed that they are located in the centre of each grid
    pos = {i:(x+0.5,y+0.5) for i, (x,y) in enumerate(G.nodes())}

    #change keys from (x,y) format to i format
    mapping = dict(zip(G, range(0, G.number_of_nodes())))
    G = nx.relabel_nodes(G, mapping)

    return G, pos

def create_grid_dataset(dataset_folder, n_sim, start_sim=1, number_grids=64):
    '''
    Creates a pytorch geometric dataset with n_sim simulations
    returns a regular grid graph dataset
    ------
    dataset_folder: str, path-like
        path to raw dataset location
    n_sim: int
        number of simulations used in the dataset creation
    '''
    assert os.path.exists(dataset_folder), "There is no raw dataset folder"
    grid_dataset = []

    graph, pos = center_grid_graph(number_grids,number_grids)

    for i in tqdm(range(start_sim,start_sim+n_sim)):

        DEM = np.loadtxt(f"{dataset_folder}/DEM/DEM_{i}.txt")[:,2]
        WD = np.loadtxt(f"{dataset_folder}/WD/WD_{i}.txt")
#         VX = np.loadtxt(f"{dataset_folder}\\VX\\VX_{i}.txt")
#         VY = np.loadtxt(f"{dataset_folder}\\VY\\VY_{i}.txt")

        grid_i = convert_to_pyg(graph, pos, DEM, WD)  # VX, VY
        grid_dataset.append(grid_i)

    return grid_dataset


def convert_to_pyg(graph, pos, DEM, WD):  # VX, VY
    '''Converts a graph or mesh into a PyTorch Geometric Data type
    Then, add position, DEM, and water variables to data object'''
    DEM = DEM.reshape(-1)

    edge_index = torch.LongTensor(list(graph.edges)).t().contiguous()
    row, col = edge_index

    data = Data()

    delta_DEM = torch.FloatTensor(DEM[col]-DEM[row])
    coords = torch.FloatTensor(get_coords(pos))
    edge_relative_distance = coords[col] - coords[row]
    edge_distance = torch.norm(edge_relative_distance, dim=1)
    edge_slope = delta_DEM/edge_distance

    data.edge_index = edge_index
    data.edge_distance = edge_distance
    data.edge_slope = edge_slope
    data.edge_relative_distance = edge_relative_distance

    data.num_nodes = graph.number_of_nodes()
    data.pos = torch.tensor(list(pos.values()))
    data.DEM = torch.FloatTensor(DEM)
    data.WD = torch.FloatTensor(WD.T)
#     data.VX = torch.FloatTensor(VX.T)
#     data.VY = torch.FloatTensor(VY.T)

    return data

def get_coords(pos):
    '''
    Returns array of dimensions (n_nodes, 2) containing x and y coordinates of each node
    ------
    pos: dict
        keys: (x,y) index of every node
        values: spatial x and y positions of each node
    '''
    return np.array([xy for xy in pos.values()])


def save_database(dataset, name, out_path='datasets'):
    '''
    This function saves the geometric database into a pickle file
    The name of the file is given by the type of graph and number of simulations
    ------
    dataset: list
        list of geometric datasets for grid and mesh
    names: str
        name of saved dataset
    out_path: str, path-like
        output file location
    '''
    n_sim = len(dataset)
    path = f"{out_path}/{name}.pkl"

    if os.path.exists(path):
        os.remove(path)
    elif not os.path.exists(out_path):
        os.mkdir(out_path)

    pickle.dump(dataset, open(path, "wb" ))

    return None

In [3]:
# Connect to Google Colab
from google.colab import drive

# This will prompt for authorization to access your Google Drive from Colab.
drive.mount('/content/drive', force_remount=True)

# After mounting, you can navigate to a specific folder using the usual UNIX cd command.
# Replace 'your_folder_path' with the actual path of your folder inside Google Drive.
folder_path = '/content/drive/MyDrive/DSAIE/FLOOD/raw_datasets/'  # Example path

%cd "$folder_path"

Mounted at /content/drive
/content/drive/MyDrive/DSAIE/FLOOD/raw_datasets


In [None]:
data_folder = folder_path
train_dataset = 'DEM/'

dataset_folder = data_folder
n_sim = 80
start_sim = 1
dataset_name = 'grid'

datasets_folder = 'datasets'
if not os.path.exists(datasets_folder):
    os.makedirs(datasets_folder)

dataset_dir = datasets_folder + '/train'

##################### Use this code to create local pickle file #####################
pyg_dataset = create_grid_dataset(dataset_folder, n_sim=n_sim)
save_database(pyg_dataset, name=dataset_name, out_path=dataset_dir)

def load_dataset(dataset_name, dataset_folder='datasets/'):
    '''
    Loads dataset, composed by a list of pytorch geometric data objects
    only accepts files of .pkl format
    ------
    dataset_name: str
        name of the dataset to be loaded
    '''

    path = f"{dataset_folder}/{dataset_name}.pkl"

    with open(path, 'rb') as file:
        dataset = pickle.load(file)

    return dataset

train_dataset = load_dataset(dataset_name=dataset_name, dataset_folder=dataset_dir)

100%|██████████| 80/80 [01:14<00:00,  1.07it/s]


In [None]:
print(train_dataset[0])

Data(edge_index=[2, 16128], edge_distance=[16128], edge_slope=[16128], edge_relative_distance=[16128, 2], num_nodes=4096, pos=[4096, 2], DEM=[4096], WD=[4096, 97])


In [None]:
data_folder = folder_path
test_dataset = 'DEM/'

dataset_folder = data_folder
n_sim = 20
start_sim = 501
dataset_name = 'grid_test'

datasets_folder = 'datasets'
if not os.path.exists(datasets_folder):
    os.makedirs(datasets_folder)

dataset_dir = datasets_folder + '/test'

##################### Use this code to create local pickle file #####################
pyg_dataset = create_grid_dataset(dataset_folder, n_sim=n_sim)
save_database(pyg_dataset, name=dataset_name, out_path=dataset_dir)

test_dataset = load_dataset(dataset_name=dataset_name, dataset_folder=dataset_dir)

100%|██████████| 20/20 [00:02<00:00,  8.48it/s]


In [None]:
def normalize_dataset(dataset, scaler_DEM, scaler_WD):
    min_DEM, max_DEM = scaler_DEM.data_min_[0], scaler_DEM.data_max_[0]
#     min_VX, max_VX = scaler_VX.data_min_[0], scaler_VX.data_max_[0]
#     min_VY, max_VY = scaler_VY.data_min_[0], scaler_VY.data_max_[0]
    min_WD, max_WD = scaler_WD.data_min_[0], scaler_WD.data_max_[0]
    normalized_dataset = []
    for idx in range(len(dataset)):
        DEM = dataset[idx]['DEM']
#         VX = dataset[idx]['VX']
#         VY = dataset[idx]['VY']
        WD = dataset[idx]['WD']
        norm_DEM = (DEM - min_DEM) / (max_DEM - min_DEM)
#         norm_VX = (VX - min_VX) / (max_VX - min_VX)
#         norm_VY = (VY - min_VY) / (max_VY - min_VY)
        norm_WD = (WD - min_WD) / (max_WD - min_WD)
        normalized_dataset.append((norm_DEM, norm_WD))
    return normalized_dataset


# Normalize the inputs and outputs using training dataset
scaler_DEM = MinMaxScaler() # Can store DEM, VX, VY as one 'input' Scaler
# scaler_VX = MinMaxScaler()
# scaler_VY = MinMaxScaler()
scaler_WD = MinMaxScaler()

for idx in range(len(train_dataset)):
    scaler_DEM.partial_fit(train_dataset[idx]['DEM'].reshape(1, -1).T.cpu())
#     scaler_VX.partial_fit(train_dataset[idx]['VX'].reshape(train_dataset[0]['VX'].shape[0], -1).T.cpu())
#     scaler_VY.partial_fit(train_dataset[idx]['VY'].reshape(train_dataset[0]['VY'].shape[0], -1).T.cpu())
    scaler_WD.partial_fit(train_dataset[idx]['WD'].reshape(1, -1).T.cpu())

normalized_train_dataset = normalize_dataset(train_dataset, scaler_DEM, scaler_WD)

# Split dataset into train, validation, and testing
train_percnt = 0.8
train_size = int(train_percnt * len(train_dataset))
val_size = len(train_dataset) - train_size
training_dataset, val_dataset = random_split(normalized_train_dataset, [train_size, val_size])

# Dataset has two variables, training data DEM, target WD
print('Amount of variables', len(normalized_train_dataset[0]))
print('Size of DEM data', len(normalized_train_dataset[0][0]))
print(f'Size of WD data ({len(normalized_train_dataset[0][1])}, {len(normalized_train_dataset[0][1][0])})')
print(normalized_train_dataset[0][0])

Amount of variables 2
Size of DEM data 4096
Size of WD data (4096, 97)
tensor([0.4818, 0.4313, 0.3685,  ..., 0.5115, 0.5253, 0.5234])


In [None]:
import numpy as np
import torch
import torch.nn as nn

class CNNBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=3, padding=1, bias=False, batch_norm=True):
        super().__init__()

        layers = [nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, padding=padding, bias=bias)]
        if batch_norm:
            layers.append(nn.BatchNorm2d(num_features=out_channels))
        layers.append(nn.PReLU())
        layers.append(nn.Conv2d(out_channels, out_channels, kernel_size=kernel_size, padding=padding, bias=bias))

        self.cnnblock = nn.Sequential(*layers)

    def forward(self, x):
        return self.cnnblock(x)

class Encoder(nn.Module):
    def __init__(self, channels=[32, 64, 128], kernel_size=3, padding=1, bias=False, batch_norm=True):
        super().__init__()

        self.enc_blocks = nn.ModuleList([
            CNNBlock(channels[block], channels[block+1], kernel_size, padding, bias,
                     batch_norm=batch_norm)
            for block in range(len(channels)-1)]
            )
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

    def forward(self, x):
        outs = []
        for block in self.enc_blocks:
            x = block(x)
            outs.append(x)
            x = self.pool(x)
        return outs

class Decoder(nn.Module):
    def __init__(self, channels=[128, 64, 32], kernel_size=3, padding=1, bias=False, batch_norm=True):
        super().__init__()
        self.channels = channels
        self.upconvs = nn.ModuleList([
            nn.ConvTranspose2d(channels[block], channels[block+1], kernel_size=2, padding=0, stride=2)
            for block in range(len(channels)-1)]
            )
        self.dec_blocks = nn.ModuleList([
            CNNBlock(channels[block], channels[block+1], kernel_size, padding, bias,
                     batch_norm=batch_norm)
             for block in range(len(channels)-1)]
             )

    def forward(self, x, x_skips):
        for i in range(len(x_skips)):
            x = self.upconvs[i](x)
            x = torch.cat((x, x_skips[-(1+i)]), dim=1)
            x = self.dec_blocks[i](x)

        x = self.dec_blocks[-1](x)
        return x

class CNN(nn.Module):
    def __init__(self, node_features, out_dim=1, n_downsamples=3, initial_hid_dim=64, batch_norm=True,
                 bias=True):
        super(CNN, self).__init__()
        hidden_channels = [initial_hid_dim*2**i for i in range(n_downsamples)]
        encoder_channels = [node_features]+hidden_channels
        decoder_channels = list(reversed(hidden_channels))+[out_dim]

        self.encoder = Encoder(encoder_channels, kernel_size=3, padding=1,
                               bias=bias, batch_norm=batch_norm)
        self.decoder = Decoder(decoder_channels, kernel_size=3, padding=1,
                               bias=bias, batch_norm=batch_norm)

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x[-1], x[:-1])
        x = nn.Sigmoid()(x)
        return x

node_features = train_dataset[0]['DEM'].shape[0]
model = CNN(node_features=node_features, n_downsamples=4, initial_hid_dim=32,
            batch_norm=True, bias=True)

In [None]:
def train_epoch(model, loader, optimizer, device='cpu'):
    model.to(device)
    model.train() # specifies that the model is in training mode

    losses = []

    for batch in loader:
        x = batch[0]
        y = batch[1]

        # Model prediction
        preds = model(x)

        # MSE loss function
        loss = nn.MSELoss()(preds, y)

        losses.append(loss.cpu().detach())

        # Backpropagate and update weights
        loss.backward()   # compute the gradients using backpropagation
        optimizer.step()  # update the weights with the optimizer
        optimizer.zero_grad(set_to_none=True)   # reset the computed gradients

    losses = np.array(losses).mean()

    return losses

def evaluation(model, loader, device='cpu'):
    model.to(device)
    model.eval() # specifies that the model is in evaluation mode

    losses = []

    with torch.no_grad():
        for batch in loader:
            x = batch[0]
            y = batch[1]

            # Model prediction
            preds = model(x)

            # MSE loss function
            loss = nn.MSELoss()(preds, y)
            losses.append(loss.cpu().detach())

    losses = np.array(losses).mean()

    return losses

In [None]:
# Set training parameters
learning_rate = 0.001
batch_size = 8
num_epochs = 10

# Create the optimizer to train the neural network via back-propagation
optimizer = torch.optim.Adam(params=model.parameters(), lr=learning_rate)

# Create the training and validation dataloaders to "feed" data to the model in batches
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(normalized_test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
#create vectors for the training and validation loss
train_losses = []
val_losses = []

for epoch in range(1, num_epochs+1):
    # Model training
    train_loss = train_epoch(model, train_loader, optimizer, device=device)

    # Model validation
    val_loss = evaluation(model, val_loader, device=device)

    if epoch == 1:
        best_loss = val_loss

    if val_loss<=best_loss:
        best_model = copy.deepcopy(model)
        best_loss = val_loss
        best_epoch = epoch

    train_losses.append(train_loss)
    val_losses.append(val_loss)

    if epoch%10 == 0:
        print("epoch:",epoch, "\t training loss:", np.round(train_loss,4),
                            "\t validation loss:", np.round(val_loss,4))

model = copy.deepcopy(best_model)