# Environment Setup

In [1]:
# Enable autoreload of libraries on execution
%load_ext autoreload
%autoreload 2

%load_ext tensorboard

In [2]:
from IPython import get_ipython

# Set this to True when you just want to test if a model works, without proper training
TEST_MODE = False 

def is_using_colab() -> bool:
    """Return if running on the Colab platform."""
    return "google.colab" in str(get_ipython())

if is_using_colab():
    import os
    import zipfile
    from google.colab import drive

    if TEST_MODE:
        print("⚠️ Running on TEST_MODE, if that's what you wanted to do, ignore this. ⚠️")

    # Install required libraries
    os.system("pip -q install torchinfo")
    os.system("pip -q install pytorch-lightning")
    os.system("pip -q install torchmetrics[image]")
    os.system("pip -q install bagpy")
    
    # Create folders
    !mkdir Experiments
    !mkdir Datasets
    
    # Copy code and datasets zips from Drive
    drive.mount('/content/drive')
    !cp -r "/content/drive/MyDrive/Master Thesis/02 - Code/." "."
    # !cp -r "/content/drive/MyDrive/Master Thesis/01 - Datasets/." "Datasets/."
    # !cp -r "/content/drive/MyDrive/Master Thesis/01 - Datasets/DIV2K_800_5.zip" "Datasets/DIV2K_800_5.zip"
    !cp -r "/content/drive/MyDrive/Master Thesis/01 - Datasets/DIV2K_5_fix.zip" "Datasets/DIV2K_5_fix.zip"

    # Extract datasets zips
    for zip in os.listdir("Datasets"):
        with zipfile.ZipFile(os.path.join("Datasets", zip), 'r') as zip_ref:
            zip_ref.extractall(os.path.join("Datasets", zip.replace(".zip", "")))
            os.remove(os.path.join("Datasets", zip))

# Set relevant environment variables
if is_using_colab():
    device = "cuda"
    experiments_dir = "Experiments"
    datasets_path = r"Datasets/"
else:
    device = "cuda"
    experiments_dir = "../05 - Experiments"
    datasets_path = r"C:\datasets"

# Import required libraries
import os
import time
import gc
import json

import torch
import numpy as np
import cv2
import matplotlib.pyplot as plt
from tqdm import tqdm
from IPython.display import display

from media_utils import plot_img

# Datasets Definition


In [3]:
DATASET_PARAMS = {
    "limit": None,
    "preload_to_RAM": True,
    "crop_size": (128, 128)
}

DATALOADER_PARAMS = {
    "batch_size": 8,
    "num_workers": 0,
    "pin_memory": True,
}

if is_using_colab():
    DATASET_PARAMS["preload_to_RAM"] = True
    DATALOADER_PARAMS["batch_size"] = 64
    DATALOADER_PARAMS["num_workers"] = 0

## CED Dataset

In [None]:
CED_DATASET_PATH = os.path.join(datasets_path, "CEDDataset")
available_sequences = os.listdir(CED_DATASET_PATH)
print("Available sequences:")
for i, seq in enumerate(available_sequences):
    print(f"{i:<5}{seq}")

In [None]:
from dataset import CEDDataset
train_datasets_names = ["simple_color_keyboard_1", "simple_fruit"]
train_dataset = CEDDataset(CED_DATASET_PATH, sequences=train_datasets_names, ignore_input_image=True, **DATASET_PARAMS)

valid_datasets_names = ["simple_rabbits"]
valid_dataset = CEDDataset(CED_DATASET_PATH, sequences=valid_datasets_names, ignore_input_image=True, **DATASET_PARAMS)

test_datasets_names = ["simple_color_keyboard_2", "simple_jenga_1", "simple_wires_1"]
test_dataset = CEDDataset(CED_DATASET_PATH, sequences=test_datasets_names, ignore_input_image=True, **DATASET_PARAMS)


## Synthetic Dataset from DIV2K

In [14]:
from dataset import DIV2KDataset
DIV2K_DATASET_PATH = os.path.join(datasets_path, "DIV2K_5_fix")

train_datasets_names = ["{:04}".format(i) for i in range(1, 800 + 1)]
if TEST_MODE:
    train_datasets_names = ["{:04}".format(i) for i in range(1, 25 + 1)]
train_dataset = DIV2KDataset(DIV2K_DATASET_PATH, sequences=train_datasets_names, **DATASET_PARAMS)

valid_datasets_names = ["{:04}".format(i) for i in range(801, 900 + 1)]
if TEST_MODE:
    valid_datasets_names = ["{:04}".format(i) for i in range(801, 805 + 1)]
valid_dataset = DIV2KDataset(DIV2K_DATASET_PATH, sequences=valid_datasets_names, **DATASET_PARAMS)

In [15]:
def generate_batch_from_files(paths, load_batch_fn, pre_process_fn):
    X = []
    y = []

    for path in paths:

        # If the path doesn't exist try to search .pt or .npz files        
        for ext in [".pt", ".npz"]:
            if os.path.exists(path + ext):
                path = path + ext
                break

        sample = pre_process_fn(load_batch_fn(path))
        X.append(torch.tensor(sample[0], dtype=torch.float32))
        y.append(torch.tensor(sample[1], dtype=torch.float32))
    batch = torch.stack(X), torch.stack(y)
    return batch


train_batch_paths = [
    os.path.join(DIV2K_DATASET_PATH, "0001", "batch_0000"),
    os.path.join(DIV2K_DATASET_PATH, "0002", "batch_0000"),
    os.path.join(DIV2K_DATASET_PATH, "0003", "batch_0000"),
    os.path.join(DIV2K_DATASET_PATH, "0004", "batch_0000"),
    os.path.join(DIV2K_DATASET_PATH, "0005", "batch_0000"),
]
valid_batch_paths = [
    os.path.join(DIV2K_DATASET_PATH, "0801", "batch_0000"),
    os.path.join(DIV2K_DATASET_PATH, "0802", "batch_0000"),
    os.path.join(DIV2K_DATASET_PATH, "0803", "batch_0000"),
    os.path.join(DIV2K_DATASET_PATH, "0804", "batch_0000"),
    os.path.join(DIV2K_DATASET_PATH, "0805", "batch_0000"),
]

train_batch = generate_batch_from_files(train_batch_paths, train_dataset._load_batch, train_dataset.pre_process)
valid_batch = generate_batch_from_files(valid_batch_paths, valid_dataset._load_batch, valid_dataset.pre_process)

# PyTorch Dataset and DataLoader

In [16]:
from dataset import CEDDataset, ConcatBatchSampler

# Check that there is no instersection between train and valid dataset
union = set(train_datasets_names).union(set(valid_datasets_names))
sum_of_lengths = sum([len(ds) for ds in [train_datasets_names, valid_datasets_names]])
assert len(union) == sum_of_lengths, "Some datasets are in common"

# Do not batch events coming from different datasets
# samplers = [torch.utils.data.RandomSampler(ds) for ds in train_datasets]
# sampler = ConcatBatchSampler(samplers=samplers, batch_size=TRAIN_DS_PARAMS["batch_size"], drop_last=False)
# train_dataloader = torch.utils.data.DataLoader(
#     concat_ds, batch_sampler=sampler,
# )

train_dataloader = torch.utils.data.DataLoader(train_dataset, shuffle=True, **DATALOADER_PARAMS)
valid_dataloader = torch.utils.data.DataLoader(valid_dataset, shuffle=False, **DATALOADER_PARAMS)

DATASET_PARAMS.update({
    "train_datasets_names": train_datasets_names, 
    "valid_datasets_names": valid_datasets_names, 
})

print("Train samples: {} \t Train batches: {:<10}".format(len(train_dataset), len(train_dataloader)))
print("Valid samples: {} \t Valid batches: {:<10}".format(len(valid_dataset), len(valid_dataloader)))

Train samples: 125 	 Train batches: 16        
Valid samples: 25 	 Valid batches: 4         


# Inspect datasets

In [19]:
# Inspect train dataloader
from media_utils import save_video_tensors
frames = []

# Old dataset format
# for (i, e), o in tqdm(train_dataloader):

for e, o in tqdm(train_dataloader):
  for batch in o:
    frames.append(batch)

save_video_tensors("train_frames.mp4", frames, 5)

100%|██████████| 16/16 [00:00<00:00, 134.89it/s]


In [20]:
# Inspect valid dataloader
from media_utils import save_video_tensors
frames = []

# Old dataset format
# for (i, e), o in tqdm(valid_dataloader):

for e, o in tqdm(valid_dataloader):
  for batch in o:
    frames.append(batch)

save_video_tensors("valid_frames.mp4", frames, 5)

100%|██████████| 4/4 [00:00<00:00, 363.31it/s]


# UNet AutoEncoder Model


In [None]:
from torchinfo import summary
from models import EventsToImagesUNet

PARAMS = {
    "input_channels": 3 + 10,
}

model = EventsToImagesUNet(PARAMS["input_channels"])
# summary(model, input_size=(4, input_channels, 256, 336), device=device)


## Model Training


In [None]:
from train import train_unet

PARAMS.update({
    "n_epochs": 3,
    "learning_rate": 0.0001,
    "comment": "",
})
PARAMS.update({"train_dataset_params": DATASET_PARAMS})

train_unet(model, device, train_dataloader, PARAMS, log_path=experiments_dir, valid_ds=valid_dataloader, save_best_model=True)

In [None]:
torch.save(model.state_dict(), "model_thesis.pt")

In [None]:
# Save input frames as video
from media_utils import save_video_tensors
imgs = []
for batch in tqdm(train_dataloader):
    (input_images, events_tensors), ground_truth_images = batch
    imgs += input_images
save_video_tensors("input_video.mp4", imgs, 30)


In [None]:
# Save generated frames as video
use_prev_images_as_input = False

imgs = []
for i, batch in enumerate(tqdm(train_dataloader)):
    if use_prev_images_as_input and i != 0:
        input_images = generated_images
    else:
        (input_images, events_tensors), ground_truth_images = batch
    input_images = torch.einsum("bhwc -> bchw", input_images)

    input_tensors = torch.hstack((input_images, events_tensors))
    input_tensors = input_tensors.to(device)

    generated_images = model(input_tensors).cpu().detach()
    generated_images = torch.einsum("bchw -> bhwc", generated_images)

    imgs += generated_images

save_video_tensors("generated_video.mp4", imgs, 30)


In [None]:
# BS = 1; Seconds for one epoch: 142.36227083206177
# BS = 2; Seconds for one epoch: 132.93124723434448
# BS = 4; Seconds for one epoch: 127.95218682289124
# BS = 8; Seconds for one epoch: 138.49955368041992
# BS = 16; Seconds for one epoch: after 180 seconds it was at 3/10 batches so I stopped it


# Transformer Model


## Model definition


In [None]:
from models import TransformerModel

PARAMS = {
    "input_shape": (336, 256, 3),
    "encoding_size": 512,
    "heads": 4,
    "layers_number": 2,
}

model = TransformerModel(PARAMS["input_shape"], PARAMS["encoding_size"], PARAMS["heads"], PARAMS["layers_number"])

## Model Training


In [None]:
from train import train_transformer

PARAMS.update({
    "n_epochs": 3,
    "learning_rate": 0.0001,
    "comment": "",
})
PARAMS.update({"train_dataset_params": DATASET_PARAMS})

train_transformer(model, device, train_dataloader, PARAMS, log_path=experiments_dir, valid_ds=valid_dataloader, save_best_model=True)

In [None]:
# Save model
torch.save(model.state_dict(), "transformer.pt")

In [None]:
# Load Model
model.load_state_dict(torch.load("transformer.pt"))

# Autoencoder Model

In [11]:
from models.autoencoder import EventEncoder, EventDecoder, EventAutoEncoder
from torchinfo import summary

PARAMS = {
    "n_filters": [16, 16, 32, 32],
    "input_features": 1,
    "lr": 0.001
}

ee = EventEncoder(PARAMS["input_features"], PARAMS["n_filters"])
ed = EventDecoder(PARAMS["n_filters"][-1], PARAMS["n_filters"][::-1])
model = EventAutoEncoder(ee, ed, PARAMS["lr"])

# summary(eae, input_size=(8, 1, 128, 128), device=device)

In [12]:
train_events = train_batch[0][:, 1]
train_batch = (train_events.unsqueeze(1), train_events)
valid_events = valid_batch[0][:, 1]
valid_batch = (valid_events.unsqueeze(1), valid_events)

# Event Encoder Transformer Model

In [None]:
from models import EventEncoderTransformer
from torchinfo import summary

PARAMS = {
    "output_shape": (336, 256, 3),
    "encoding_size": 336,
    "heads": 4,
    "layers_number": 6
}

# The model is going to use the previous AutoEncoder, so be sure to use a pre-trained version of it

model = EventEncoderTransformer(
    output_shape=PARAMS["output_shape"], 
    encoder=eae.encoder, 
    encoding_size=PARAMS["encoding_size"], 
    heads=PARAMS["heads"], 
    layers_number=PARAMS["layers_number"]
)

# summary(model, input_size=(4, 10, 256, 336))


In [None]:
from train import train_transformer

PARAMS.update({
    "n_epochs": 300,
    "learning_rate": 0.0001,
    "comment": ""
})
PARAMS.update({"train_dataset_params": DATASET_PARAMS})

train_transformer(model, device, train_dataloader, PARAMS, log_path=experiments_dir, valid_ds=valid_dataloader, save_best_model=True)

In [None]:
# Save model
torch.save(model.state_dict(), "model_tae.pt")

In [None]:
# Load model
model.load_state_dict(torch.load("model_tae.pt"))

# ViT-like Model

In [None]:
# Clean VRAM
del model
torch.cuda.empty_cache()
import gc
gc.collect()

In [None]:
import pytorch_lightning as pl
from models.transformer import VisionTransformer

PARAMS = {
    "input_shape": (10, 128, 128),
    "patch_size": (32, 32),
    "encoding_size": 256,
    "heads": 4,
    "layers_number": 3,
    "use_linear_proj": True,
    "learning_rate": 0.0001,
    "use_LPIPS": False,
    "vgg_layer": "features.35",
}
model = VisionTransformer(**PARAMS)

In [None]:
if is_using_colab():
    from IPython import get_ipython
    get_ipython().magic('tensorboard --logdir "Experiments/lightning_logs"')

In [None]:
from utils import LogImagesCallback, KerasProgressBar
from pytorch_lightning.callbacks import ModelCheckpoint

PARAMS.update({
    "n_epochs": 2,
    "comment": "Test with VGG54"
})
PARAMS.update({"train_dataset_params": DATASET_PARAMS})

callbacks = []

train_batch = next(iter(train_dataloader))
valid_batch = next(iter(valid_dataloader))
callbacks.append(LogImagesCallback(train_batch, valid_batch, n=5, n_epochs=5))

callbacks.append(KerasProgressBar())

checkpoint_callback = ModelCheckpoint(monitor="val_loss", mode="min", save_last=True)
checkpoint_callback.CHECKPOINT_NAME_LAST = "{epoch}-last"
callbacks.append(checkpoint_callback)

logger = pl.loggers.TensorBoardLogger(experiments_dir, version=PARAMS["comment"])
log_every = 50
if len(train_dataloader) < log_every:
    log_every = 1

params_markdown = "```json\n" + json.dumps(PARAMS, indent=2).replace("\n","  \n") + "\n```"
logger.experiment.add_text("params", params_markdown)

trainer = pl.Trainer(max_epochs=PARAMS["n_epochs"], callbacks=callbacks, accelerator="gpu", profiler=None, logger=logger, log_every_n_steps=log_every)
trainer.fit(model, train_dataloader, valid_dataloader)

In [None]:
# Save model
torch.save(model.state_dict(), "model_vit.pt")

In [None]:
# Load model
model.load_state_dict(torch.load("model_vit.pt"))

# Conv ViT

In [None]:
# Clean VRAM
del model
torch.cuda.empty_cache()
import gc
gc.collect()

In [None]:
import pytorch_lightning as pl
from models.transformer import VisionTransformerConv

PARAMS = {
    "input_shape": (10, 128, 128),
    "patch_size": (32, 32),
    "heads": 4,
    "layers_number": 1,
    "learning_rate": 0.0001,
    "image_loss_weight": 1,
    "feature_loss_weight": 1e-2,
}
model = VisionTransformerConv(**PARAMS)

In [None]:
from torchinfo import summary
summary(model, input_size=(8, 10, 128, 128), col_names=["input_size", "output_size", "num_params"])

# Basic CNN

In [24]:
from models.cnn import BasicCNN

PARAMS = {
    "lr": 0.001
}
model = BasicCNN(**PARAMS)

In [None]:
from torchinfo import summary
summary(model, input_size=(8, 10, 128, 128), col_names=["input_size", "output_size", "num_params"])

# PyTorch Lightning Training

In [None]:
from utils import LogImagesCallback, KerasProgressBar, ColabSaveCallback
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint

PARAMS.update({
    "n_epochs": 50,
    "comment": "Small - little test"
})
PARAMS.update({"dataset_params": DATASET_PARAMS})
PARAMS.update({"dataloader_params": DATALOADER_PARAMS})


callbacks = []
callbacks.append(LogImagesCallback(train_batch, valid_batch, n=5, n_epochs=5))
callbacks.append(KerasProgressBar())
if is_using_colab():
    dst_path = "/content/drive/MyDrive/Master Thesis"
    colab_cb = ColabSaveCallback("Exp.zip", dst_path, 60*60, ["zip -r Exp.zip Experiments"])
    callbacks.append(colab_cb)

checkpoint_callback = ModelCheckpoint(monitor="val_loss", mode="min", save_last=True)
checkpoint_callback.CHECKPOINT_NAME_LAST = "{epoch}-last"
callbacks.append(checkpoint_callback)

logger = pl.loggers.TensorBoardLogger(experiments_dir, version=PARAMS["comment"])
log_every = 50
if len(train_dataloader) < log_every:
    log_every = 1

params_markdown = "```json\n" + json.dumps(PARAMS, indent=2).replace("\n","  \n") + "\n```"
logger.experiment.add_text("params", params_markdown)

trainer = pl.Trainer(max_epochs=PARAMS["n_epochs"], callbacks=callbacks, accelerator="gpu", profiler=None, logger=logger, log_every_n_steps=log_every)
trainer.fit(model, train_dataloader, valid_dataloader)

# Evaluation

In [22]:
from models.transformer import VisionTransformerConv
checkpoint_path = r"E:\Cartelle Personali\Fabrizio\Universita\Magistrale\Tesi\05 - Experiments\lightning_logs\Large, Long - 1 il, 1e-2 fl, bn relu, maxpool\checkpoints\epoch=243-step=15372.ckpt"
model = VisionTransformerConv.load_from_checkpoint(checkpoint_path, feature_loss_weight=None, image_loss_weight=None, map_location="cuda")

In [None]:
from utils import LogImagesCallback

def fig_to_numpy(fig):
    canvas = fig.canvas 
    canvas.draw()
    rgba = np.asarray(canvas.buffer_rgba())
    return rgba

def create_figure(batch, model):
    cb = LogImagesCallback(train_batch, valid_batch)
    x, y = batch
    outputs = model(x)[0]
    figs = []
    for i in range(len(x)):
        fig = cb._create_plot(outputs[i], y[i])
        figs.append(fig_to_numpy(fig))
        plt.close(fig)
    return np.hstack(figs)

path_split = checkpoint_path.split(os.sep)
RUN_NAME = path_split[path_split.index("checkpoints") - 1]
plt.figure(figsize=(20, 5))
plt.axis("off")
train_fig = create_figure(train_batch, model)
plt.title(f"{RUN_NAME} | Train")
plt.imsave(f"{RUN_NAME}_train.png", train_fig)
plt.imshow(train_fig)

plt.figure(figsize=(20, 5))
plt.axis("off")
plt.title(f"{RUN_NAME} | Valid")
valid_fig = create_figure(valid_batch, model)
plt.imsave(f"{RUN_NAME}_valid.png", valid_fig)
plt.imshow(valid_fig)

In [None]:
# Save video results on train and test set
from media_utils import save_predicted_video
save_predicted_video(model, train_dataloader, "train_prediction.mp4")
save_predicted_video(model, valid_dataloader, "test_prediction.mp4")

In [None]:
from dataset_utils import save_events_frames_view
save_events_frames_view("train_inspect.mp4", train_dataset, model=model, fps=15, denorm=True)
save_events_frames_view("test_inspect.mp4", valid_dataset, model=model, fps=15, denorm=True)

In [None]:
from media_utils import plot_square, predict_n_images

N_EVAL_IMAGES = 16

results = predict_n_images(train_dataset, N_EVAL_IMAGES, model)
plot_square(results, size=3)

results = predict_n_images(valid_dataset, N_EVAL_IMAGES, model)
plot_square(results, size=3)