In this notebook, I will use torch_geometric to predict the developpement of a graph of positions through time

In [1]:
import torch
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
import torch.nn.functional as F

import numpy as np

import torch_geometric
from torch_geometric.utils import to_networkx, from_networkx
from torch_geometric.nn import radius_graph
from torch_geometric.data import Data

import networkx as nx

import pickle

import time

import sys
import os
from genericpath import exists

import random

model_path = "models/model"
loss_path = "models/loss"

from cell_dataset import CellGraphDataset
from cell_model import GraphEvolution
from cell_utils import GraphingLoss, make_animation
from cell_training import train, test, run_single, run_single_recursive, evaluate

import threading
import matplotlib.pyplot as plt

import os, psutil
process = psutil.Process(os.getpid())
print("Using : ", process.memory_info().rss // 1000000)  # in megabytes 
print("Available : ", process.memory_info().vms  // 1000000)  # in megabytes 

print(torch.cuda.is_available())

#https://github.com/clovaai/AdamP
from adamp import AdamP

sys.path.append('/home/nstillman/1_sbi_activematter/cpp_model')
import allium

Using :  365
Available :  3482
True
No module named 'pycapmd'
Cannot import simulator


The data is a graph of cells having their own positions and velocity.

In the graph, we will first start by connecting all the edges, then maybe later make radius_graphs to reduce the cost of the pass through the model

In [2]:
#path = "data/" #local
path = "/scratch/users/nstillman/data-cpp/" #remote

data_train = CellGraphDataset(root=path + 'train', max_size=500, rdts=True, inmemory=True, bg_load=True, wrap=True, T_limit=32)
print("Training data length : ", data_train.len())

data_test = CellGraphDataset(root=path + 'test', max_size=50, inmemory=True, bg_load=True, wrap=True, T_limit=32)
print("Test data length : ", data_test.len())
 
data_val = CellGraphDataset(root=path + 'valid', max_size=50, inmemory=True, bg_load=True, wrap=True, T_limit=32)
print("Validation data length : ", data_val.len())

Training data length :  500
Test data length :  50
Validation data length :  50


In [3]:
override = True #make this true to always use the same ones

if override :
    data_train.save_or_load_if_exists("train_paths.pkl")
    data_test.save_or_load_if_exists("test_paths.pkl")
    data_val.save_or_load_if_exists("val_paths.pkl")


In [4]:
#INFO : if bg_load is True, this starts the loading, if skipped, bg_loading will take place as soon as a get is called
rval, edge_index, edge_attr, batch_edge, border, params = data_train.get(0)

Next we need to define the model that will be used :
    > input 
        (1) Graph at a particular time t (nodes having x,y,dx,dy as attributes)
        (2) Graphs up to a particular time [t-a, t] (nodes having x,y as attributes)
    > output
        (a) Graph at the immediate next time step t+1
        (b) Graph [t, t+b]
        (c) Graph at t+b
    > graph size
        (x) Fixed graph size to the most nodes possible (or above)
        (y) Unbounded graph size
            >> idea : graph walks
            >> idea : sampler

The following model will do (1ax)

In [5]:
global losses
losses = []

In [6]:
def start(model : GraphEvolution, optimizer : torch.optim.Optimizer, scheduler  : torch.optim.lr_scheduler._LRScheduler,\
          data_train : CellGraphDataset, data_test : CellGraphDataset, device : torch.device, epoch : int, offset : int, save=0):
    for e in range(offset, offset + epoch):
        
        recursive = e > 70

        model = train(model, optimizer, scheduler, data_train, device, e, process, max_epoch=offset+epoch, recursive=recursive)

        test_loss_r = test(model, data_test, device, method=run_single_recursive, duration=8)
        test_loss_s = test(model, data_test, device, method=run_single, duration=8)
        
        test_loss = test_loss_r + test_loss_s
        
        if (e%10 == 0) :
            print("Epoch : ", e, "Test loss : ", test_loss, "                                                         ")
        
        losses.append(test_loss_r)
        losses.append(test_loss_s)
        
        if (save and (e%save == 0 or e == epoch-1)) :
            with open(model_path + str(e) + ".pkl", 'wb') as f:
                pickle.dump(model.cpu(), f)
            with open(loss_path + str(e) + ".pkl", 'wb') as f:
                pickle.dump(losses, f)

In [7]:
load = True

epoch_to_load = 0

if (load and exists(model_path)) :
    with open(model_path + str(epoch_to_load) + ".pkl", 'rb') as f:
        model = pickle.load(f)
    with open(loss_path + str(epoch_to_load) + ".pkl", 'rb') as f:
        losses = pickle.load(f)
else :
    model = GraphEvolution(in_channels=4, out_channels=4, hidden_channels=64, dropout=0.05, edge_dim=16, messages=5)
    losses = []
    
assert isinstance(model, GraphEvolution)

In [8]:
#might want to investigate AdamP 
optimizer = AdamP(model.parameters(), lr=2e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=5e-3, delta=0.1, wd_ratio=0.1, nesterov=True)
scheduler = CosineAnnealingWarmRestarts(optimizer=optimizer, T_0=10, T_mult=2, eta_min=1e-12)

In [None]:
epochs = 620
grapher = GraphingLoss(losses)
scheduler.step(len(losses))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
try :
    grapher.gstart(20)
    start(model, optimizer, scheduler, data_train, data_test, device, \
          epochs, len(losses), save=20)
finally :
    grapher.gstop()

Current probability of recursive training :  0
Current loss :  96.31526119285253  ...  54 / 500 . Current memory usage :  2915  MB, loaded  112      

In [None]:
mean, std, sim_mean, sim_std = evaluate(model, data_val, device, method=run_single_recursive, duration=16)
print("Mean : ", mean, "Std : ", std, "Sim mean : ", sim_mean, "Sim std : ", sim_std)