In this notebook, I will use torch_geometric to predict the developpement of a graph of positions through time

In [1]:
"""
I realized I am leaning towards this approach https://doi.org/10.1016/j.trc.2020.102635
"""

'\nI realized I am leaning towards this approach https://doi.org/10.1016/j.trc.2020.102635\n'

In [2]:
import torch
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts

import pickle

import sys
import os
from genericpath import exists

model_path = "models/model"
loss_path = "models/loss"

from cell_dataset import CellGraphDataset
from cell_model import GraphEvolution
from cell_utils import GraphingLoss
from cell_training import train, test_single, test_recursive, run_single, run_single_recursive

import os, psutil
process = psutil.Process(os.getpid())
print("Using : ", process.memory_info().rss // 1000000)  # in megabytes 
print("Available : ", process.memory_info().vms  // 1000000)  # in megabytes 

print(torch.cuda.is_available())

#https://github.com/clovaai/AdamP
from adamp import AdamP

sys.path.append('/home/nstillman/1_sbi_activematter/cpp_model')
try :
    import allium
except :
    print("Could not import allium")

Using :  361
Available :  3475
True
Cannot import zuko. Continuing without prior
No module named 'pycapmd'
Cannot import simulator


The data is a graph of cells having their own positions and velocity.

In the graph, we will first start by connecting all the edges, then maybe later make radius_graphs to reduce the cost of the pass through the model

In [3]:
load_all = True

if load_all : 
    if os.path.exists("data/training.pkl") :
        with open("data/training.pkl", "rb") as f:
            data_train = pickle.load(f)
    if os.path.exists("data/testing.pkl") :
        with open("data/testing.pkl", "rb") as f:
            data_test = pickle.load(f)
else :
    #path = "data/" #local
    path = "/scratch/users/nstillman/data-cpp/" #remote

    data_train = CellGraphDataset(root=path + 'train', max_size=1000, rdts=True, inmemory=True, bg_load=True, wrap=True, T_limit=16)
    print("Training data length : ", data_train.len())

    data_test = CellGraphDataset(root=path + 'test', max_size=50, inmemory=True, bg_load=True, wrap=True, T_limit=16)
    print("Test data length : ", data_test.len())
    
    data_val = CellGraphDataset(root=path + 'valid', max_size=50, inmemory=True, bg_load=True, wrap=True, T_limit=8)
    print("Validation data length : ", data_val.len())

    override = True #make this true to always use the same ones

    if override :
        data_train.save_or_load_if_exists("train_paths.pkl")
        data_test.save_or_load_if_exists("test_paths.pkl")
        data_val.save_or_load_if_exists("val_paths.pkl")
    else :
        torch.autograd.set_detect_anomaly(True)

In [4]:
#INFO : if bg_load is True, this starts the loading, if skipped, bg_loading will take place as soon as a get is called
rval, edge_index, edge_attr, batch_edge, border, params = data_train.get(0)

Next we need to define the model that will be used :
    > input 
        (1) Graph at a particular time t (nodes having x,y,dx,dy as attributes)
        (2) Graphs up to a particular time [t-a, t] (nodes having x,y as attributes)
    > output
        (a) Graph at the immediate next time step t+1
        (b) Graph [t, t+b]
        (c) Graph at t+b
    > graph size
        (x) Fixed graph size to the most nodes possible (or above)
        (y) Unbounded graph size
            >> idea : graph walks
            >> idea : sampler

The following model will do (1ax)

In [5]:
global losses
losses = []

In [6]:
def start(model : GraphEvolution, optimizer : torch.optim.Optimizer, scheduler  : torch.optim.lr_scheduler._LRScheduler,\
          data_train : CellGraphDataset, data_test : CellGraphDataset, device : torch.device, epoch : int, offset : int, save=0, save_datasets=True):
    for e in range(offset, offset + epoch):
        
        recursive = True

        model = train(model, optimizer, scheduler, data_train, device, e, process, max_epoch=offset+epoch, recursive=recursive)

        test_loss_s = test_single(model, data_test, device, duration=8)
        test_loss_r = test_recursive(model, data_test, device, duration=8)
        
        if(e == 0 and save_datasets) :
            with open("data/training.pkl", 'wb') as f:
                pickle.dump(data_train, f)
            with open("data/testing.pkl", 'wb') as f:
                pickle.dump(data_test, f)
            print("Saved datasets")
        
        if (e%10 == 0) :
            print("Epoch : ", e, "Test loss : ", test_loss_s, "Test loss recursive : ", test_loss_r)
        
        losses.append(test_loss_r)
        losses.append(test_loss_s)
        
        if (save and (e%save == 0 or e == epoch-1)) :
            torch.save(model.state_dict(), model_path + str(e) + ".pt")
            with open(loss_path + str(e) + ".pkl", 'wb') as f:
                pickle.dump(losses, f)

In [7]:
load = True

epoch_to_load = 225

model = GraphEvolution(in_channels=9, out_channels=4, hidden_channels=32, dropout=0.01, edge_dim=2, messages=5)

if exists(model_path + str(epoch_to_load) + ".pt") and load :
    with open(loss_path + str(epoch_to_load) + ".pkl", 'rb') as f:
        losses = pickle.load(f)
    model.load_state_dict(torch.load(model_path + str(epoch_to_load) + ".pt"))
    print("Loaded model")

In [8]:
print("Using : ", process.memory_info().rss // 1000000)  # in megabytes
print("Losses : ", len(losses) // 2)
print("Model : ", model)

Using :  3436
Losses :  0
Model :  GraphEvolution(
  (transformer_decoder): TransformerDecoder(
    (layers): ModuleList(
      (0): TransformerDecoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=32, out_features=32, bias=True)
        )
        (multihead_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=32, out_features=32, bias=True)
        )
        (linear1): Linear(in_features=32, out_features=128, bias=True)
        (dropout): Dropout(p=0.01, inplace=False)
        (linear2): Linear(in_features=128, out_features=32, bias=True)
        (norm1): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
        (norm3): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.01, inplace=False)
        (dropout2): Dropout(p=0.01, inplace=False)
        (dropout3): Dropout(p=0.01,

In [9]:
#might want to investigate AdamP 
optimizer = AdamP(model.parameters(), lr=5e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=5e-3, delta=0.1, wd_ratio=0.1, nesterov=True)
scheduler = CosineAnnealingWarmRestarts(optimizer=optimizer, T_0=10, T_mult=2, eta_min=1e-12)

In [None]:
epochs = 630
grapher = GraphingLoss(losses)
scheduler.step(len(losses) // 2)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
try :
    grapher.gstart(20)
    start(model, optimizer, scheduler, data_train, data_test, device, \
          epochs, len(losses) // 2, save=10, save_datasets=False)
finally :
    grapher.gstop()

Current probability of recursive training :  tensor(0.1080)
Epoch :  0 Test loss :  0.07798135828226804 Test loss recursive :  1.260199143886566 
Current loss : 1.08, ... 278, / 1000, Current memory usage : 4774 MB, loaded 1000    