In [8]:
import matplotlib.pyplot as plt
import numpy as np
import os
import sys
import torch
import torch_geometric
from torch_geometric.loader import DataLoader
from torch_geometric.nn import summary
import yaml

import Dataset
import Models
from utils import time_func

In [9]:
print(f"Torch version: {torch.__version__}")
print(f"Cuda available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"Cuda device: {torch.cuda.get_device_name()}")
print(f"Cuda version: {torch.version.cuda}")
print(f"Torch geometric version: {torch_geometric.__version__}")

Torch version: 2.0.1+cu117
Cuda available: False
Cuda version: 11.7
Torch geometric version: 2.3.1


In [10]:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
DEVICE

device(type='cpu')

In [11]:
params = yaml.safe_load(open('./config/pipeline.yaml'))

DATA_PATH = params['input_subset_pre_processed']
MESH_PATH = params['input_subset_grid']

TRAIN_PROP = params['train_prop']
VAL_PROP = params['val_prop']
TEST_PROP = params['test_prop']

TRAIN_BATCH_SIZE = params['train_batch_size']
VAL_BATCH_SIZE = params['val_batch_size']
TEST_BATCH_SIZE = params['test_batch_size']

# TODO use these
N_FEATURES = params['n_features']
HID_CHANNELS = params['hid_channels']
N_CLASSES = params['n_classes']

FINAL_ACT = None
if params['final_act'] == "sigmoid":
    FINAL_ACT = torch.sigmoid
elif params['final_act'] == "linear":
    FINAL_ACT = torch.nn.Linear(1, 1)

LOSS_OP = None
if params['loss_op'] == "BCE":
    LOSS_OP = torch.nn.BCELoss()

OPTIMIZER = None
if params['optimizer'] == "Adam":
    OPTIMIZER = torch.optim.Adam

LEARN_RATE = params['learn_rate']

# TODO use these
PLOT_SHOW = params['plot_show']
PLOT_VERTICAL = params['plot_vertical']

#TIMESTAMP = time_func.start_time() # TODO test for performances

### Dataset creation

In [12]:
timestamp = time_func.start_time()
train_dataset = Dataset.EddyDataset(root=DATA_PATH, mesh_path=MESH_PATH, split='train')
val_dataset = Dataset.EddyDataset(root=DATA_PATH, mesh_path=MESH_PATH, split='val')
test_dataset = Dataset.EddyDataset(root=DATA_PATH, mesh_path=MESH_PATH, split='test')
time_func.stop_time(timestamp, "Datasets creation")

train:  256  val:  73  test:  36
(EddyDataset(256), tensor([245,  76, 144,  66,  34,  78,  62, 177, 184,  80, 252, 161, 148, 133,
         91,  46, 158, 146,  59, 238, 106, 190, 134,   8,  61,  33, 174, 242,
        232, 130,  52, 180, 152,  36,  10, 157, 233,  86, 187,  23,  35, 192,
        163,   2,  68, 143, 243, 122, 109, 239, 231,  43, 105, 118, 203, 188,
        202,  39, 179,  41,   1,  21, 244,  85, 166,  65,  97,  83,  67, 223,
         63,  71, 121, 162, 175, 116, 120,  99,  88, 126, 213,  27,  51, 218,
         28, 201,  20,  38, 251, 226,  69, 136, 186,  93,  90,  22,  49,  70,
         17, 189,  98, 171,  72,  48, 160,  74, 247, 159, 167,  55, 183, 237,
         16, 108, 209, 197, 200,  95, 129,  26,  87, 205, 229, 103,  24,   9,
         19, 123,  45, 156,  79, 164, 222, 224,  44,  13, 119, 248, 194, 241,
        199, 207, 221,  42,  73, 139, 254, 182, 168, 181, 142,  37,   7, 110,
        137, 234, 150, 125, 127, 250, 176,  89,  40, 124, 219, 135,   3, 172,
         54,

In [13]:
print(train_dataset.len(), val_dataset.len(), test_dataset.len())

256 73 36


In [15]:
val_dataset[25]

Get(val): year_2015_month_11_day_2.pt


Data(x=[757747, 1], edge_index=[2, 4537526], y=[757747])

### Testing some parameters and orientation of graph edges

In [16]:
if (TRAIN_PROP+VAL_PROP+TEST_PROP) != 100:
    raise ValueError(f"Sum of train-val-test proportions with value {TRAIN_PROP+VAL_PROP+TEST_PROP} is different from 1")

if FINAL_ACT == None:
    raise ValueError(f"Parameter 'final_act' is invalid with value {params['final_act']}")

if LOSS_OP == None:
    raise ValueError(f"Parameter 'loss_op' is invalid with value {params['loss_op']}")

if OPTIMIZER == None:
    raise ValueError(f"Parameter 'optimizer' is invalid with value {params['optimizer']}")

dummy_graph = train_dataset[0]

if dummy_graph.num_features != N_FEATURES:
    raise ValueError(f"Graph num_features is different from parameter N_FEATURES: ({dummy_graph.num_features} != {N_FEATURES})")

if dummy_graph.is_directed():
    raise ValueError("Graph edges are directed!")

Get(train): year_2015_month_11_day_9.pt


### Train-validation-test split

In [None]:
'''
train_set = []
val_set = []
test_set = []
train_months = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
val_months = [11]
test_months = [12]

for m in train_months:
    train_set += dataset.get_all(year=2015, month=m)
    print(m)
    
for m in val_months:
    val_set += dataset.get_all(year=2015, month=m)
    print(m)

for m in test_months:
    test_set += dataset.get_all(year=2015, month=m)
    print(m)

print(len(train_set), len(val_set), len(test_set))

train_loader = DataLoader(train_set, batch_size=TRAIN_BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_set, batch_size=VAL_BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_set, batch_size=TEST_BATCH_SIZE, shuffle=False)

print(len(train_loader), len(val_loader), len(test_loader))
'''

In [18]:
train_loader = DataLoader(train_dataset, batch_size=TRAIN_BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=VAL_BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=TEST_BATCH_SIZE, shuffle=False)

print(len(train_loader.dataset), len(val_loader.dataset), len(test_loader.dataset))

256 73 36


### Model instantiation

In [24]:
Model = Models.GUNet

model = Model(
    in_channels = N_FEATURES,
    hidden_channels = HID_CHANNELS,
    out_channels = N_CLASSES,
    num_nodes = dummy_graph.num_nodes,   # TODO can put these in Dataset.py
    final_act = FINAL_ACT
).to(DEVICE)

model

GUNet instantiated!
	Middle act: relu
	Final act: torch


GUNet(
  (unet): GraphUNet(1, 32, 1, depth=3, pool_ratios=[0.002639403389257892, 0.5, 0.5])
)

In [25]:
summary(model, dummy_graph)

  C = torch.sparse.mm(A, B)


'+-------------------------------+---------------------------------------------------+---------------------------------------------------------+----------+\n| Layer                         | Input Shape                                       | Output Shape                                            | #Param   |\n|-------------------------------+---------------------------------------------------+---------------------------------------------------------+----------|\n| GUNet                         | [757747, 757747]                                  | [757747, 1]                                             | 5,473    |\n| ├─(unet)GraphUNet             | [757747, 1], [2, 4537526]                         | [757747, 1]                                             | 5,473    |\n| │    └─(down_convs)ModuleList | --                                                | --                                                      | 3,232    |\n| │    │    └─(0)GCNConv        | [757747, 1], [2, 4537526], [4

### Optimizer

In [26]:
OPTIMIZER = torch.optim.Adam(model.parameters(), lr=0.005)# = OPTIMIZER(model.parameters(), lr=LEARN_RATE)

### Train function

In [28]:
def train():
    model.train()
    total_loss = 0

    for batch in train_loader:
        batch = batch.to(DEVICE)

        # zero the parameter gradients
        OPTIMIZER.zero_grad()

        # forward + loss
        pred = model(batch)
        pred = pred.squeeze()

        loss = LOSS_OP(pred, batch.y)

        # If you try the Soft Dice Score, use this(even if the loss stays constant)
        #loss.requires_grad = True
        #loss = torch.tensor(loss.item(), requires_grad=True)

        # backward + optimize
        # loss * _train_batch_size(5)
        total_loss += loss.item() * batch.num_graphs
        loss.backward()
        optimizer.step()

    # average loss = total_loss / training graps(20)
    total_loss = total_loss / len(train_loader.dataset)
    return total_loss

loss = train()
#print("Train loss, debug: ", loss)


Get(train): year_2015_month_11_day_17.pt
Get(train): year_2015_month_8_day_11.pt


RuntimeError: all elements of input should be between 0 and 1