# 2.1.3 Rerunning PredNet on Fractal Data

The last results were only run for 25 epochs, so lets see if we get the expected behavior at 200.

## Jupyter Extensions

Load [watermark](https://github.com/rasbt/watermark) to see the state of the machine and environment that's running the notebook. To make sense of the options, take a look at the [usage](https://github.com/rasbt/watermark#usage) section of the readme.

In [1]:
# Load `watermark` extension
%load_ext watermark
# Display the status of the machine and other non-code related info
%watermark -n -m -g -b -t -h

Mon Sep 07 2020 21:32:54 

compiler   : GCC 7.3.0
system     : Linux
release    : 4.15.0-112-generic
machine    : x86_64
processor  : x86_64
CPU cores  : 16
interpreter: 64bit
host name  : serrep5
Git hash   : 63debd6c1aae64186354ca82868f3dad80b7e525
Git branch : master


Load [autoreload](https://ipython.org/ipython-doc/3/config/extensions/autoreload.html) which will always reload modules marked with `%aimport`.

This behavior can be inverted by running `autoreload 2` which will set everything to be auto-reloaded *except* for modules marked with `%aimport`.

In [2]:
# Load `autoreload` extension
%load_ext autoreload
# Set autoreload behavior
%autoreload 1

Load `matplotlib` in one of the more `jupyter`-friendly [rich-output modes](https://ipython.readthedocs.io/en/stable/interactive/plotting.html). Some options (that may or may not have worked) are `inline`, `notebook`, and `gtk`.

In [3]:
# Set the matplotlib mode
%matplotlib inline

## Imports

In [4]:
import gc
import logging
from argparse import Namespace
from pathlib import Path

import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import torch
import pytorch_lightning as pl
from PIL import Image, ImageOps
from torch.utils.data import IterableDataset, DataLoader

%aimport prevseg.constants
import prevseg.constants as const
%aimport prevseg.index
import prevseg.index as index
%aimport prevseg.dataloaders.schapiro
import prevseg.dataloaders.schapiro as sch
%aimport prevseg.schapiro
from prevseg.schapiro import walk, graph
%aimport prevseg.models.prednet
import prevseg.models.prednet as pn

# Keep track of versions of everything
%watermark -v -iv

prevseg           0+untagged.87.g63debd6.dirty
PIL.Image         7.2.0
numpy             1.19.1
logging           0.5.1.2
torch             1.4.0
networkx          2.4
pytorch_lightning 0.8.5
CPython 3.8.5
IPython 7.16.1


## Training the Model

In [5]:
hparams = const.DEFAULT_HPARAMS

ModelClass = pn.PredNetTrackedSchapiro
hparams.n_layers = 2
hparams.batch_size = 256 + 128
hparams.max_steps = 128
hparams.n_paths = 16
hparams.n_pentagons = 3
hparams.time_steps = hparams.max_steps
hparams.exp_name = 'schapiro_test'
hparams.name = f'{ModelClass.name}_{hparams.exp_name}'
hparams.debug = False
hparams.n_workers = 1
hparams.lr = 0.001
hparams.n_epochs = 200
hparams.seed = 1
hparams.layer_loss_mode = 'first'

train_model = True

In [6]:
if train_model:
    model, trainer = None, None
    train_dataloader, val_dataloader = None, None
    errors, optimizer = None, None
    ckpt = None
    train_errors, val_errors = None, None
    res = None
    gc.collect()
    torch.cuda.empty_cache()

    log_dir = Path(hparams.dir_logs) / f'{hparams.name}'
    if not log_dir.exists():
        log_dir.mkdir(parents=True)
    logger = pl.loggers.TensorBoardLogger(str(log_dir.parent), name=hparams.name)

    ckpt_dir = Path(hparams.dir_checkpoints) / f'{hparams.name}_v{logger.version}'
    if not ckpt_dir.exists():
        ckpt_dir.mkdir(parents=True)

    ckpt = pl.callbacks.ModelCheckpoint(
        filepath=str(ckpt_dir / (hparams.exp_name+'_{global_step:05d}_{epoch:03d}_{val_loss:.3f}')),
        verbose=True,
        save_top_k=1,
    )

    trainer = pl.Trainer(checkpoint_callback=ckpt,
                         max_epochs=hparams.n_epochs,
                         logger=logger,
                         gpus=1
                         )

    model = ModelClass(hparams)
    model.ds = None
    model

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
CUDA_VISIBLE_DEVICES: [1]


In [7]:
from pprint import pprint
pprint(vars(hparams))

{'batch_size': 384,
 'debug': False,
 'device': 'cuda',
 'dir_checkpoints': '/home/abdullah/work/predictive-event-segmentation/models/checkpoints',
 'dir_logs': '/home/abdullah/work/predictive-event-segmentation/logs/tensorboard',
 'dir_weights': '/home/abdullah/work/predictive-event-segmentation/models/weights',
 'exp_name': 'schapiro_test',
 'input_size': 2048,
 'layer_loss_mode': 'first',
 'lr': 0.001,
 'max_steps': 128,
 'model_name': 'prednet',
 'n_epochs': 200,
 'n_layers': 2,
 'n_paths': 16,
 'n_pentagons': 3,
 'n_test': 16,
 'n_val': 256,
 'n_workers': 1,
 'name': 'prednet_tracked_schapiro_schapiro_test',
 'output_mode': 'error',
 'seed': 1,
 'time_steps': 128}


In [None]:
if train_model:
    trainer.fit(model)
    exit()


  | Name                         | Type       | Params
------------------------------------------------------------
0 | predcell_tracked_0_recurrent | LSTM       | 67 M  
1 | predcell_tracked_0_dense     | Sequential | 4 M   
2 | predcell_tracked_0_update_a  | Sequential | 4 M   
3 | predcell_tracked_1_recurrent | LSTM       | 12 M  
4 | predcell_tracked_1_dense     | Sequential | 1 M   


Created mapping as follows:
{0: '26', 1: '44', 2: '97', 3: '13', 4: '87', 5: '9', 6: '68', 7: '4', 8: '28', 9: '1', 10: '51', 11: '24', 12: '38', 13: '42', 14: '59'}
Created mapping as follows:
{0: '26', 1: '44', 2: '97', 3: '13', 4: '87', 5: '9', 6: '68', 7: '4', 8: '28', 9: '1', 10: '51', 11: '24', 12: '38', 13: '42', 14: '59'}
Validation sanity check: 0it [00:00, ?it/s]



Epoch 1: : 0it [00:00, ?it/s]                                         



Epoch 1: : 16it [01:11,  4.46s/it, loss=0.176, v_num=5]
Validating: 0it [00:00, ?it/s][A
Epoch 1: : 17it [01:12,  4.25s/it, loss=0.176, v_num=5]


Epoch 00000: val_loss reached 0.15429 (best 0.15429), saving model to /home/abdullah/work/predictive-event-segmentation/models/checkpoints/prednet_tracked_schapiro_schapiro_test_v5/schapiro_test_global_step=00015_epoch=000_val_loss=0.154.ckpt as top 1


Epoch 1: : 17it [01:14,  4.36s/it, loss=0.176, v_num=5]
Epoch 2: : 16it [01:11,  4.50s/it, loss=0.153, v_num=5]
Validating: 0it [00:00, ?it/s][A
Epoch 2: : 17it [01:12,  4.28s/it, loss=0.153, v_num=5]


Epoch 00001: val_loss reached 0.14991 (best 0.14991), saving model to /home/abdullah/work/predictive-event-segmentation/models/checkpoints/prednet_tracked_schapiro_schapiro_test_v5/schapiro_test_global_step=00031_epoch=001_val_loss=0.150.ckpt as top 1


Epoch 2: : 17it [01:14,  4.41s/it, loss=0.153, v_num=5]
Epoch 3: : 16it [01:12,  4.55s/it, loss=0.149, v_num=5]
Validating: 0it [00:00, ?it/s][A
Epoch 3: : 17it [01:13,  4.33s/it, loss=0.149, v_num=5]


Epoch 00002: val_loss reached 0.14645 (best 0.14645), saving model to /home/abdullah/work/predictive-event-segmentation/models/checkpoints/prednet_tracked_schapiro_schapiro_test_v5/schapiro_test_global_step=00047_epoch=002_val_loss=0.146.ckpt as top 1


Epoch 3: : 17it [01:15,  4.46s/it, loss=0.149, v_num=5]
Epoch 4: : 16it [01:12,  4.55s/it, loss=0.146, v_num=5]
Validating: 0it [00:00, ?it/s][A
Epoch 4: : 17it [01:13,  4.33s/it, loss=0.146, v_num=5]


Epoch 00003: val_loss reached 0.14500 (best 0.14500), saving model to /home/abdullah/work/predictive-event-segmentation/models/checkpoints/prednet_tracked_schapiro_schapiro_test_v5/schapiro_test_global_step=00063_epoch=003_val_loss=0.145.ckpt as top 1


Epoch 4: : 17it [01:15,  4.45s/it, loss=0.146, v_num=5]
Epoch 5: : 4it [00:19,  4.94s/it, loss=0.146, v_num=5] 

In [9]:
if not train_model:
    # Get all the experiments with the name hparams.name*
    experiments = index.DIR_CHECKPOINTS.glob(f'{hparams.name}*')
    # Get the newest exp by v number
    experiment_newest = sorted(experiments, 
                               key=lambda path: int(path.stem.split('_')[-1][1:]))[-1]
    # Get the model with the best (lowest) val_loss
    experiment_newest_best_val = sorted(experiment_newest.iterdir(),
                                        key=lambda path: float(path.stem.split('val_loss=')[-1]))[0]
    experiment_newest_best_val
    
    model, trainer = None, None
    train_dataloader, val_dataloader = None, None
    errors, optimizer = None, None
    ckpt = None
    train_errors, val_errors = None, None
    res = None
    gc.collect()
    torch.cuda.empty_cache()

    model = LSTMStacked.load_from_checkpoint(str(experiment_newest_best_val))
    model.prepare_data()
    model.cuda()
    hparams = model.hparams

    log_dir = Path(hparams.dir_logs) / f'{hparams.name}'
    if not log_dir.exists():
        log_dir.mkdir(parents=True)
    logger = pl.loggers.TensorBoardLogger(str(log_dir.parent), 
                                          name=hparams.name,
                                          version=0)
    model.logger = logger

In [None]:
iter_ds = sch.ShapiroResnetEmbeddingDataset(
    batch_size=1, 
    max_steps=hparams.max_steps, 
    n_paths=1,
    mapping=model.ds.mapping,
    mode='euclidean')
loader = DataLoader(iter_ds, batch_size=None)

for data, nodes in loader:
    pass

In [None]:
data_all = torch.cat((data, torch.flip(data, (0,1))[:,1:,:]), 1)
data_all.shape

In [None]:
outs = model.forward(data_all, output_mode='eval', run_num='fwd_rev', 
                     tb_labels=['nodes'])

In [None]:
nodes = np.array(nodes).reshape(30)
nodes

In [None]:
nodes_all = np.concatenate((nodes, np.flip(nodes)[1:]))
nodes_all.shape

In [None]:
for i, val in enumerate(nodes_all):
    print(i, val)

In [None]:
G = graph.schapiro_graph(n_pentagons=3)
nx.draw(G, with_labels=True, font_weight='bold')
plt.show()

In [None]:
borders = [9, 19, 29, 30, 40, 50]

### Hidden State Differences

In [None]:
fig = plt.figure()
ax_large = fig.add_subplot(111)

for i, out in enumerate(outs['hidden_diff']):
    ax = fig.add_subplot(11 + i + len(outs['hidden_diff'])*100)
    ax.plot(np.array(out.cpu()).reshape(59)[1:])
    ax.set_ylabel(f'Layer {i+1}')
    [ax.axes.axvline(b, ls=':') for b in borders]
    if i == len(outs['hidden_diff'])-1:
        ax.set_xlabel('Step')
        
ax_large.axes.xaxis.set_ticks([])
ax_large.axes.yaxis.set_ticks([])
gcf = plt.gcf()
gcf.set_size_inches(16,9)

### Representation Differences

In [None]:
fig = plt.figure()
ax_large = fig.add_subplot(111)

for i, out in enumerate(outs['representation_diff']):
    ax = fig.add_subplot(11 + i + len(outs['representation_diff'])*100)
    ax.plot(np.array(out.cpu()).reshape(59)[1:])
    ax.set_ylabel(f'Layer {i+1}')
    [ax.axes.axvline(b, ls=':') for b in borders]
    if i == len(outs['representation_diff'])-1:
        ax.set_xlabel('Step')
        
ax_large.axes.xaxis.set_ticks([])
ax_large.axes.yaxis.set_ticks([])
gcf = plt.gcf()
gcf.set_size_inches(16,9)

### Alternating Within vs Between Communities

In [None]:
test_nodes = [6,8,9,
              10,9,10,
              13,12,14,
              0,14,0,
              1,2,4,
              5,4,5]
test_data = np.array([iter_ds.array_data[n] 
                      for n in test_nodes]).reshape((1,len(test_nodes),2048))

In [None]:
border_outs = model.forward(torch.Tensor(test_data), 
                            output_mode='eval', 
                            run_num='border_walk_3', 
                            tb_labels=['nodes'])

In [None]:
fig = plt.figure()
ax_large = fig.add_subplot(111)

for i, out in enumerate(border_outs['hidden_diff']):
    ax = fig.add_subplot(11 + i + len(border_outs['hidden_diff'])*100)
    ax.plot(np.array(out.cpu()).reshape(len(test_nodes))[1:])
    ax.set_ylabel(f'Layer {i+1}')
    if i == len(border_outs['hidden_diff'])-1:
        ax.set_xlabel('Step')
        
ax_large.axes.xaxis.set_ticks([])
ax_large.axes.yaxis.set_ticks([])
gcf = plt.gcf()
gcf.set_size_inches(16,9)