In [1]:
import matplotlib.pyplot as plt
import numpy as np
import os
import sys
import torch
import torch_geometric
from torch_geometric.loader import DataLoader
from torch_geometric.nn import summary
import yaml

import Dataset
import Models
from utils import time_func

In [2]:
print(f"Torch version: {torch.__version__}")
print(f"Cuda available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"Cuda device: {torch.cuda.get_device_name()}")
print(f"Cuda version: {torch.version.cuda}")
print(f"Torch geometric version: {torch_geometric.__version__}")

Torch version: 2.0.1+cu117
Cuda available: False
Cuda version: 11.7
Torch geometric version: 2.3.1


In [3]:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
DEVICE

device(type='cpu')

In [4]:
params = yaml.safe_load(open('./config/pipeline.yaml'))

DATA_PATH = params['input_subset_pre_processed']
MESH_PATH = params['input_subset_grid']

TRAIN_PROP = params['train_prop']
VAL_PROP = params['val_prop']
TEST_PROP = params['test_prop']

TRAIN_BATCH_SIZE = params['train_batch_size']
VAL_BATCH_SIZE = params['val_batch_size']
TEST_BATCH_SIZE = params['test_batch_size']

# TODO use these
N_FEATURES = params['n_features']
HID_CHANNELS = params['hid_channels']
N_CLASSES = params['n_classes']

FINAL_ACT = None
if params['final_act'] == "sigmoid":
    FINAL_ACT = torch.sigmoid
elif params['final_act'] == "linear":
    FINAL_ACT = torch.nn.Linear(1, 1)

LOSS_OP = None
if params['loss_op'] == "BCE":
    LOSS_OP = torch.nn.BCELoss()

OPTIMIZER = None
if params['optimizer'] == "Adam":
    OPTIMIZER = torch.optim.Adam

LEARN_RATE = params['learn_rate']

# TODO use these
PLOT_SHOW = params['plot_show']
PLOT_VERTICAL = params['plot_vertical']

#TIMESTAMP = time_func.start_time() # TODO test for performances

### Dataset creation

In [5]:
timestamp = time_func.start_time()
dataset = Dataset.PilotDataset(root=DATA_PATH, mesh_path=MESH_PATH)
time_func.stop_time(timestamp, "Dataset creation")

  ---  Dataset creation  ---  0.421 seconds.


### Testing some parameters and orientation of graph edges

In [6]:
if (TRAIN_PROP+VAL_PROP+TEST_PROP) != 100:
    raise ValueError(f"Sum of train-val-test proportions with value {TRAIN_PROP+VAL_PROP+TEST_PROP} is different from 1")

if FINAL_ACT == None:
    raise ValueError(f"Parameter 'final_act' is invalid with value {params['final_act']}")

if LOSS_OP == None:
    raise ValueError(f"Parameter 'loss_op' is invalid with value {params['loss_op']}")

if OPTIMIZER == None:
    raise ValueError(f"Parameter 'optimizer' is invalid with value {params['optimizer']}")

dummy_graph = dataset.get(year=2015, month=1, day=1)

if dummy_graph.num_features != N_FEATURES:
    raise ValueError(f"Graph num_features is different from parameter N_FEATURES: ({dummy_graph.num_features} != {N_FEATURES})")

if dummy_graph.is_directed():
    raise ValueError("Graph edges are directed!")

### Train-validation-test split

In [7]:
# These files are useless(for now?)
graph_names = os.listdir(dataset.processed_dir)
if 'pre_filter.pt' in graph_names: os.remove(dataset.processed_dir+'/pre_filter.pt')
if 'pre_transform.pt' in graph_names: os.remove(dataset.processed_dir+'/pre_transform.pt')

In [8]:
train_set = []
val_set = []
test_set = []
train_months = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
val_months = [11]
test_months = [12]

for m in train_months:
    train_set += dataset.get_all(year=2015, month=m)
    print(m)
    
for m in val_months:
    val_set += dataset.get_all(year=2015, month=m)
    print(m)

for m in test_months:
    test_set += dataset.get_all(year=2015, month=m)
    print(m)

print(len(train_set), len(val_set), len(test_set))

train_loader = DataLoader(train_set, batch_size=TRAIN_BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_set, batch_size=VAL_BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_set, batch_size=TEST_BATCH_SIZE, shuffle=False)

print(len(train_loader), len(val_loader), len(test_loader))

1
2
3
4
5
6
7
8
9
10
11
12
304 30 31
5 1 1


In [9]:
'''
# Snippet for dataset split
# To use every available sample, n_test gets all the remanining
n_train = round(dataset.len()*TRAIN_PROP/100)
n_val = round(dataset.len()*VAL_PROP/100)
n_test = dataset.len()-n_train-n_val
print(n_train, n_val, n_test, (n_train+n_val+n_test))
'''

'\n# Snippet for dataset split\n# To use every available sample, n_test gets all the remanining\nn_train = round(dataset.len()*TRAIN_PROP/100)\nn_val = round(dataset.len()*VAL_PROP/100)\nn_test = dataset.len()-n_train-n_val\nprint(n_train, n_val, n_test, (n_train+n_val+n_test))\n'

### Model instantiation

In [10]:
Model = Models.GUNet

model = Model(
    in_channels = N_FEATURES,
    hidden_channels = HID_CHANNELS,
    out_channels = N_CLASSES,
    num_nodes = dummy_graph.num_nodes,   # TODO can put these in Dataset.py
    final_act = FINAL_ACT
).to(DEVICE)

model

GUNet instantiated!
	Middle act: relu
	Final act: torch


GUNet(
  (unet): GraphUNet(1, 32, 1, depth=3, pool_ratios=[0.002639403389257892, 0.5, 0.5])
)

In [11]:
summary(model, dummy_graph)

  C = torch.sparse.mm(A, B)


'+-------------------------------+---------------------------------------------------+---------------------------------------------------------+----------+\n| Layer                         | Input Shape                                       | Output Shape                                            | #Param   |\n|-------------------------------+---------------------------------------------------+---------------------------------------------------------+----------|\n| GUNet                         | [757747, 757747]                                  | [757747, 1]                                             | 5,473    |\n| ├─(unet)GraphUNet             | [757747, 1], [2, 4537526]                         | [757747, 1]                                             | 5,473    |\n| │    └─(down_convs)ModuleList | --                                                | --                                                      | 3,232    |\n| │    │    └─(0)GCNConv        | [757747, 1], [2, 4537526], [4

### Optimizer

In [12]:
OPTIMIZER = OPTIMIZER(model.parameters(), lr=LEARN_RATE)

### Train function