In [1]:
import matplotlib.pyplot as plt
import numpy as np
import os
import sys
import torch
from torch.utils.data import random_split
import torch_geometric
from torch_geometric.loader import DataLoader
import yaml

import Dataset
import Models
from utils import time_func

In [2]:
print(f"Torch version: {torch.__version__}")
print(f"Cuda available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"Cuda device: {torch.cuda.get_device_name()}")
print(f"Cuda version: {torch.version.cuda}")
print(f"Torch geometric version: {torch_geometric.__version__}")

Torch version: 2.0.1+cu117
Cuda available: False
Cuda version: 11.7
Torch geometric version: 2.3.1


In [3]:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
DEVICE

device(type='cpu')

In [4]:
params = yaml.safe_load(open('./config/pipeline.yaml'))

DATA_PATH = params['input_subset_pre_processed']
MESH_PATH = params['input_subset_grid']

TRAIN_PROP = params['train_prop']
VAL_PROP = params['val_prop']
TEST_PROP = params['test_prop']

TRAIN_BATCH_SIZE = params['train_batch_size']
VAL_BATCH_SIZE = params['val_batch_size']
TEST_BATCH_SIZE = params['test_batch_size']

# TODO use these
N_FEATURES = params['n_features']
HID_CHANNELS = params['hid_channels']
N_CLASSES = params['n_classes']

FINAL_ACT = None
if params['final_act'] == "sigmoid":
    FINAL_ACT = torch.sigmoid
elif params['final_act'] == "linear":
    FINAL_ACT = torch.nn.Linear(1, 1)

# TODO use these
PLOT_SHOW = params['plot_show']
PLOT_VERTICAL = params['plot_vertical']

#TIMESTAMP = time_func.start_time() # TODO test for performances

### Dataset instantiation

In [5]:
timestamp = time_func.start_time()
dataset = Dataset.PilotDataset(root=DATA_PATH, mesh_path=MESH_PATH)
time_func.stop_time(timestamp, "Dataset creation")

  ---  Dataset creation  ---  14.271 seconds.


### Testing some parameters and orientation of graph edges

In [6]:
if FINAL_ACT == None:
    raise ValueError(f"Parameter 'final_act' is invalid with value {params['final_act']}")

if (TRAIN_PROP+VAL_PROP+TEST_PROP) != 1:
    raise ValueError(f"Sum of train-val-test proportions with value {TRAIN_PROP+VAL_PROP+TEST_PROP} is different from 1")

dummy_graph = dataset.get(year=2015, month=1, day=1)

if dummy_graph.num_features != N_FEATURES:
    raise ValueError(f"Graph num_features is different from parameter N_FEATURES: ({dummy_graph.num_features} != {N_FEATURES})")
    
if dummy_graph.is_directed():
    raise ValueError("Graph edges are directed!")

### Train-validation-test split

In [7]:
# These files are useless(for now?)
graph_names = os.listdir(dataset.processed_dir)
if 'pre_filter.pt' in graph_names:
    os.remove(dataset.processed_dir+'/pre_filter.pt')
if 'pre_transform.pt' in graph_names:
    os.remove(dataset.processed_dir+'/pre_transform.pt')

In [8]:
train_set, val_set, test_set = random_split(dataset, [TRAIN_PROP, VAL_PROP, TEST_PROP])
print(len(train_set))
print(len(val_set))
print(len(test_set))

train_loader = DataLoader(train_set, batch_size=TRAIN_BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_set, batch_size=VAL_BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_set, batch_size=TEST_BATCH_SIZE, shuffle=False)

256
55
54
