In [1]:
# Torch
import torch
import torchvision
from torch.backends import cudnn
from torch.nn import functional as F
from torch.utils.data import sampler
from torch.utils.data import DataLoader
from torch.utils.data.sampler import RandomSampler
cudnn.benchmark = True
# Data Handling
import numpy as np
import pandas as pd
# System
import os
import time
import pprint
import itertools
# Custom
import models
import datasets
from helpers import utils

# LCFCN Model

Locates Objects with Point Supervision Training

Based on https://github.com/ElementAI/LCFCN

Experiment Settings below could eventually be removed


In [3]:
####################### EXPERIMENT SETTINGS ################################
EXP_GROUPS = {}
EXP_GROUPS['trancos'] = {"dataset": {'name': 'trancos', 
                                     'transform': 'rgb_normalize'},
                         "model": {'name': 'lcfcn',
                                   'base': "fcn8_vgg16"},
                         "batch_size": [1, 5, 10],
                         "max_epoch": [100],
                         'dataset_size': [{'train': 'all', 
                                           'val': 'all'},],
                         'optimizer': ['adam'],
                         'lr':[1e-5]}

EXP_GROUPS['trancos_debug'] = {"dataset": {'name': 'trancos', 
                                           'transform': 'rgb_normalize'},
                               "model": {'name': 'lcfcn',
                                         'base': "fcn8_vgg16"},
                               "batch_size": [1, 5, 10],
                               "max_epoch": [5],
                               'dataset_size': [{'train': 5, 
                                                 'val': 5},],
                               'optimizer':['adam'],
                               'lr':[1e-5]}

EXP_GROUPS['denmark_debug'] = {"dataset": {'name': 'denmark', 
                                           'transform': 'rgb_normalize'},
                               "model": {'name': 'lcfcn',
                                         'base': "fcn8_vgg16"},
                               "batch_size": [1, 5, 10],
                               "max_epoch": [5],
                               'dataset_size': [{'train': 800, 
                                                 'val': 400},],
                               'optimizer':['adam'],
                               'lr':[1e-5]}

EXP_GROUPS['trancos_debug_wtp'] = {"dataset": {'name': 'trancos', 
                                           'transform': 'rgb_normalize'},
                               "model": {'name': 'wtp',
                                         'base': "wtp_vgg16"},
                               "batch_size": [1, 20],
                               "max_epoch": [5],
                               'dataset_size': [{'train': 5, 
                                                 'val': 5},],
                               'optimizer':['sdg'],
                               'lr':[1e-5]}

EXP_GROUPS = {k: utils.cartesian(v) for k, v in EXP_GROUPS.items()}

exp_group_list = ["denmark_debug"]
exp_list = []
for exp_group_name in exp_group_list:
    exp_list += EXP_GROUPS[exp_group_name]
exp_dict = exp_list[0]
    
########################## FILE SYSTEM SETTINGS ###########################

savedir_base = "/home/jovyan/work/saves/LCFCN"
datadir = "/home/jovyan/work/data/DENMARK"

############################### PRINTS ####################################

pprint.pprint(exp_dict)

{'batch_size': 1,
 'dataset': {'name': 'denmark', 'transform': 'rgb_normalize'},
 'dataset_size': {'train': 800, 'val': 400},
 'lr': 1e-05,
 'max_epoch': 5,
 'model': {'base': 'fcn8_vgg16', 'name': 'lcfcn'},
 'optimizer': 'adam'}


## Saving Location

Create new folder for the selected experiment and save the experiment dict

In [4]:
exp_id = utils.hashDict(exp_dict) #generate ID by hashing experiment dict
savedir = os.path.join(savedir_base, exp_id)

# Backup and Overwrite previous experiment with same name
utils.deleteExperiment(savedir, backup_flag = True)
print("Cleared previous experiment...")

os.makedirs(savedir, exist_ok=True)
utils.saveJSON(os.path.join(savedir, "exp_dict.json"), exp_dict)
print("Experiment saved in %s" % savedir)

Cleared previous experiment...
Experiment saved in /home/jovyan/work/jannis/saves/LCFCN/730774860cf026c8606e5fd35fe421d1


## Data

Introduce datasets and dataloaders

In [5]:
train_set = datasets.getDataset(dataset_dict = exp_dict["dataset"],
                                 split = "train",
                                 datadir = datadir,
                                 exp_dict = exp_dict,
                                 dataset_size = exp_dict['dataset_size'])
val_set = datasets.getDataset(dataset_dict = exp_dict["dataset"],
                               split = "val",
                               datadir = datadir,
                               exp_dict = exp_dict,
                               dataset_size = exp_dict['dataset_size'])

train_sampler = torch.utils.data.RandomSampler(train_set, replacement=True, num_samples=2*len(val_set))
train_loader = DataLoader(train_set,
                          sampler = train_sampler,
                          batch_size = exp_dict["batch_size"], 
                          drop_last = True, 
                          num_workers = 2)

val_sampler = torch.utils.data.SequentialSampler(val_set)
val_loader = DataLoader(val_set,
                        sampler = val_sampler,
                        batch_size = 1,
                        num_workers = 2)

## Model

Load Model and underlying base model

In [6]:
model = models.getModel(model_dict = exp_dict['model'],
                         exp_dict = exp_dict,
                         train_set = train_set).cuda()

# model.opt = optimizers.get_optim(exp_dict['opt'], model)

## Experiment Run Management 

Resume experiment if a previous score_list exists or start a new one from epoch 0 if not

In [7]:
model_path = os.path.join(savedir, "model.pth")
score_list_path = os.path.join(savedir, "score_list.pkl")

if os.path.exists(score_list_path): #resume
    model.loadStateDict(utils.loadTorch(model_path))
    score_list = utils.loadPKL(score_list_path)
    s_epoch = score_list[-1]['epoch'] + 1
    print(f"Resuming previous experiment fom epoch {s_epoch}")
else: #restart
    score_list = []
    s_epoch = 0
    print(f"Beginning new experiment from epoch {s_epoch}")

Beginning new experiment from epoch 0


## Main Epoch Loop

Each epoch conists of training, validation, updating the statstics and saving the best as well as the most recent model and validation statistics

In [None]:
for e in range(s_epoch, exp_dict['max_epoch']):
    # Validate only at the start of each cycle
    score_dict = {}
    # Train the model
    train_dict = model.trainOnLoader(model, train_loader)
    print("Training done...")
    # Validate and Visualize the model
    val_dict = model.valOnLoader(val_loader, savedir_images=os.path.join(savedir, "images"), n_images=3)
    # model.visOnLoader(vis_loader, savedir=os.path.join(savedir, "images"))
    print("Validation done..")
    
    # Update score_dict and add to score_list
    score_dict.update(val_dict)
    score_dict.update(train_dict)
    score_dict["epoch"] = len(score_list)
    score_list += [score_dict]

    # Report score_list
    score_df = pd.DataFrame(score_list)
    print("\n", score_df.tail(), "\n")
    
    # Save Model and score_list
    utils.saveTorch(model_path, model.getStateDict())
    utils.savePKL(score_list_path, score_list)
    print("Checkpoint Saved: %s" % savedir)

    # Save best Checkpoint
    if e == 0 or (score_dict.get("val_score", 0) > score_df["val_score"][:-1].fillna(0).max()):
        utils.savePKL(os.path.join(savedir, "score_list_best.pkl"), score_list)
        utils.saveTorch(os.path.join(savedir, "model_best.pth"), model.getStateDict())
        print("Saved Best: %s" % savedir)
    print(f"Epoch {e+1} of {exp_dict['max_epoch'] - s_epoch} completed.")

print(f"Experiment completed!")

Training. Loss: 1.6711: 100%|██████████| 800/800 [02:01<00:00,  6.59it/s]
  0%|          | 0/400 [00:00<?, ?it/s]
  0%|          | 0/400 [00:00<?, ?it/s][A

Training done...


Validating. MAE: 3.0000:   0%|          | 1/400 [00:00<04:12,  1.58it/s]
Validating. MAE: 2.0000:   1%|          | 3/400 [00:00<01:47,  3.71it/s]
Validating. MAE: 2.6000:   1%|▏         | 5/400 [00:01<01:21,  4.85it/s]
Validating. MAE: 2.0000:   2%|▏         | 7/400 [00:01<01:10,  5.54it/s]
Validating. MAE: 2.1111:   2%|▏         | 9/400 [00:01<01:05,  5.98it/s]
Validating. MAE: 1.8182:   3%|▎         | 11/400 [00:02<01:01,  6.29it/s]
Validating. MAE: 1.6154:   3%|▎         | 13/400 [00:02<00:59,  6.48it/s]
Validating. MAE: 1.5333:   4%|▍         | 15/400 [00:02<00:58,  6.61it/s]
Validating. MAE: 1.4706:   4%|▍         | 17/400 [00:02<00:57,  6.71it/s]
Validating. MAE: 1.4737:   5%|▍         | 19/400 [00:03<00:56,  6.77it/s]
Validating. MAE: 1.3810:   5%|▌         | 21/400 [00:03<00:58,  6.50it/s]
Validating. MAE: 1.3043:   6%|▌         | 23/400 [00:03<00:57,  6.60it/s]
Validating. MAE: 1.2800:   6%|▋         | 25/400 [00:04<00:56,  6.69it/s]
Validating. MAE: 1.2963:   7%|▋         | 2