In [None]:
# Torch
import torch
import torchvision
from torch.backends import cudnn
from torch.nn import functional as F
from torch.utils.data import sampler
from torch.utils.data import DataLoader
from torch.utils.data.sampler import RandomSampler
cudnn.benchmark = True
# Data Handling
import numpy as np
import pandas as pd
# System
import os
import pprint
import itertools
# Custom
import models
import datasets
from helpers import utils

# Model Training Framework

This Notebook works for all model configurations detailed in the experiment settings below

Experiment Settings below could eventually be removed, not the best way of putting it rn with the dictionary tbh


In [None]:
########################## FILE SYSTEM SETTINGS ###########################

savedir_base = "/home/jovyan/work/runs/"
datadir = "/home/jovyan/work/DENMARK/256x256"

####################### EXPERIMENT SETTINGS ################################
EXP_GROUPS = {}

EXP_GROUPS['denmark'] = {"dataset": {'name': 'denmark', 'transform': 'rgb_normalize'},
                         "model": {'name': 'lcfcn', 'base': "fcn8_vgg16"},
                         "batch_size": 1,
                         "max_epoch": 5,
                         "dataset_size": {'train': 'all', 'val': 'all'},
                         "optimizer": 'adam',
                         "lr": 1e-5}

EXP_GROUPS['denmark_debug'] = {"dataset": {'name': 'denmark', 'transform': 'rgb_normalize'},
                               "model": {'name': 'lcfcn', 'base': "fcn8_vgg16"},
                               "batch_size": 1,
                               "max_epoch": 5,
                               "dataset_size": {'train': 10, 'val': 5},
                               "optimizer": 'adam',
                               "lr": 1e-5}

EXP_GROUPS['denmark_cob'] = {"dataset": {'name': 'denmark', 'transform': 'rgb_normalize'},
                             "model": {'name': 'cob', 'base': "fcn8_vgg16"},
                             "batch_size": 1,
                             "max_epoch": 5,
                             "dataset_size": {'train': 'all', 'val': 'all'},
                             "optimizer": 'adam',
                             "lr": 1e-5}

EXP_GROUPS['denmark_sup'] = {"dataset": {'name': 'denmark', 'transform': 'rgb_normalize'},
                             "model": {'name': 'supervised', 'base': "unet"},
                             "batch_size": 1,
                             "max_epoch": 5,
                             "dataset_size": {'train': 'all', 'val': 'all'},
                             "optimizer": 'sgd',
                             "lr": 1e-5}

############################### SELECT EXPERIMENT ####################################

exp_dict = EXP_GROUPS['denmark_sup']
pprint.pprint(exp_dict)

## Saving Location

Create new folder for the selected experiment and save the experiment dict

In [None]:
exp_id = utils.hashDict(exp_dict) #generate ID by hashing experiment dict
savedir = os.path.join(savedir_base, exp_id)

# Backup and Overwrite previous experiment with same name
utils.deleteExperiment(savedir, backup_flag = True)
print("Cleared previous experiment...")

os.makedirs(savedir, exist_ok=True)
utils.saveJSON(os.path.join(savedir, "exp_dict.json"), exp_dict)
print("Experiment saved in %s" % savedir)

## Data

Introduce datasets and dataloaders

In [None]:
train_set = datasets.getDataset(dataset = exp_dict["dataset"]["name"],
                                split = "train",
                                datadir = datadir,
                                transform = exp_dict['dataset']['transform'],
                                dataset_size = exp_dict['dataset_size'])

val_set = datasets.getDataset(dataset = exp_dict["dataset"]["name"],
                              split = "val",
                              datadir = datadir,
                              transform = exp_dict['dataset']['transform'],
                              dataset_size = exp_dict['dataset_size'])

train_sampler = torch.utils.data.RandomSampler(train_set)
train_loader = DataLoader(train_set,
                          sampler = train_sampler,
                          batch_size = exp_dict["batch_size"], 
                          drop_last = True, 
                          num_workers = 1)

val_sampler = torch.utils.data.SequentialSampler(val_set)
val_loader = DataLoader(val_set,
                        sampler = val_sampler,
                        batch_size = 1,
                        num_workers = 1)

In [None]:
# Mean computation, maybe try out
means = []
stds = []

for i, data in enumerate(train_loader, 0):
    # shape (batch_size, 3, height, width)
    numpy_image = data['images'].numpy()
    
    # shape (3,)
    batch_mean = np.mean(numpy_image, axis=(0,2,3))
    batch_std = np.std(numpy_image, axis=(0,2,3))
    
    means.append(batch_mean)
    stds.append(batch_std)

# shape (num_iterations, 3) -> (mean across 0th axis) -> shape (3,)
means = np.array(means).mean(axis=0)
stds = np.array(stds).mean(axis=0)
print(f"Means: {means} \n SDs: {stds}")

## Model

Load Model and underlying base model

In [None]:
n_classes = train_set.n_classes
model = models.getModel(exp_dict, n_classes).cuda()

## Experiment Run Management 

Resume experiment if a previous score_list exists or start a new one from epoch 0 if not

In [None]:
model_path = os.path.join(savedir, "model.pth")
score_list_path = os.path.join(savedir, "score_list.pkl")

if os.path.exists(score_list_path): #resume
    model.loadStateDict(utils.loadTorch(model_path))
    score_list = utils.loadPKL(score_list_path)
    s_epoch = score_list[-1]['epoch'] + 1
    print(f"Resuming previous experiment fom epoch {s_epoch}")
else: #restart
    score_list = []
    s_epoch = 0
    print(f"Beginning new experiment from epoch {s_epoch}")

## Main Epoch Loop

Each epoch conists of training, validation, updating the statstics and saving the best as well as the most recent model and validation statistics

In [None]:
for e in range(s_epoch, exp_dict['max_epoch']):
    # Validate only at the start of each cycle
    score_dict = {}
    # Train the model
    train_dict = model.trainOnLoader(model, train_loader)
    print("Training done...")
    # Validate and Visualize the model
    val_dict = model.valOnLoader(val_loader, savedir_images=os.path.join(savedir, "images"), n_images=30)
    print("Validation done..")
    
    # Update score_dict and add to score_list
    score_dict.update(val_dict)
    score_dict.update(train_dict)
    score_dict["epoch"] = len(score_list)
    score_list += [score_dict]

    # Report score_list
    score_df = pd.DataFrame(score_list)
    print("\n", score_df.tail(), "\n")
    
    # Save Model and score_list
    utils.saveTorch(model_path, model.getStateDict())
    utils.savePKL(score_list_path, score_list)
    print("Checkpoint Saved: %s" % savedir)

    # Save best Checkpoint
    if e == 0 or (score_dict.get("val_score", 0) > score_df["val_score"][:-1].fillna(0).max()):
        utils.savePKL(os.path.join(savedir, "score_list_best.pkl"), score_list)
        utils.saveTorch(os.path.join(savedir, "model_best.pth"), model.getStateDict())
        print("Saved Best: %s" % savedir)
    
    print(f"Epoch {e+1} of {exp_dict['max_epoch'] - s_epoch} completed.")

print(f"Experiment completed!")

In [1]:
import torch
probs = torch.load('probs.pt')
shape = torch.load('shape.pt')

In [2]:
probs.shape

torch.Size([1, 1, 256, 256])

In [9]:
import numpy as np
np.unique(shape.cpu().numpy())

array([0, 1])

In [None]:
import torch
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.benchmark = True
torch.backends.cudnn.deterministic = False
torch.backends.cudnn.allow_tf32 = True
data = torch.randn([1, 128, 128, 128], dtype=torch.float, device='cuda', requires_grad=True)
net = torch.nn.Conv2d(128, 128, kernel_size=[3, 3], padding=[1, 1], stride=[1, 1], dilation=[1, 1], groups=1)
net = net.cuda().float()
out = net(data)
out.backward(torch.randn_like(out))
torch.cuda.synchronize()