In [None]:
!nvidia-smi

# Init

In [None]:
!git clone --depth=1 https://github.com/AdeelH/potsdam-batch-exp.git

In [44]:
BATCH_SIZE = 16
PRETRAINED = True
LAST_CROSS = True

MODEL_ARCH = 'unet'
MODEL_BASE = 'resnet18'
CHANNEL_VARIATION = 'e'
MODEL_VARIATION = f'lc_{LAST_CROSS}'
MODEL_VARIATION += f''

In [45]:
EXPERIMENT_NAME = f'ss_{CHANNEL_VARIATION}_{MODEL_ARCH}_{MODEL_BASE}{"p" if PRETRAINED else ""}_{MODEL_VARIATION}_bsz_{BATCH_SIZE}'
print(EXPERIMENT_NAME)

S3_BUCKET = 'raster-vision-ahassan'
S3_ROOT = f'potsdam/experiments/output/{EXPERIMENT_NAME}'

ss_e_unet_resnet18p_lc_True_bsz_16


In [14]:
EPOCHS = 4
LR_START = 1e-1
LR_END = 1e-4

MOMENTUM = 0.9
WEIGHT_DECAY = 5e-4

CHECKPOINT_INTERVAL = 5
BATCH_CB_INTERVAL = 10

In [23]:
import os
from pathlib import Path
import glob
from datetime import datetime

import numpy as np
import torch
from torch import nn
from torch import optim
from torch.nn import functional as F
import torch.utils
import torchvision as tv
from torchvision import transforms as tf

from fastai.vision import *
from fastai.metrics import error_rate

import matplotlib.pyplot as plt
%reload_ext autoreload
%autoreload 2
# %matplotlib inline

In [None]:
import sys
sys.path.append('potsdam-batch-exp/')

from utils_ import *
from io_ import *
from data_ import *
from transforms import *
from models import *
from training import *
from visualizations import *
from monitoring import *

In [None]:
CHANNELS = [ch_E]
CHIP_SIZE = 400
STRIDE = 200
DOWNSAMPLING = 2

TRAIN_SPLIT = 0.85

In [None]:
CLASS_NAMES = ['building', 'tree', 'low-vegetation', 'clutter', 'car', 'pavement']
NCLASSES = len(CLASS_NAMES)

In [None]:
io_handler = S3IoHandler(
    local_root=EXPERIMENT_NAME, 
    s3_bucket=S3_BUCKET, 
    s3_root=S3_ROOT
)

# Data

In [None]:
potsdam_dict = io_handler.load_pickled_file('potsdam/data/potsdam.pkl', 'data/potsdam.pkl')

## Prepare datasets

In [None]:
train_transform, val_transform, x_transform, y_transform = tfs_potsdam(channels=CHANNELS, downsampling=DOWNSAMPLING)

In [None]:
original_ds = Potsdam(potsdam_dict, chip_size=CHIP_SIZE, stride=STRIDE, tf=val_transform)
train_ds    = Potsdam(potsdam_dict, chip_size=CHIP_SIZE, stride=STRIDE, tf=train_transform, x_tf=x_transform, y_tf=y_transform)
val_ds      = Potsdam(potsdam_dict, chip_size=CHIP_SIZE, stride=STRIDE, tf=val_transform  , x_tf=x_transform, y_tf=y_transform)

### Train/val split

In [None]:
train_split_size = int((len(train_ds) * TRAIN_SPLIT) // 1)
val_split_size = len(train_ds) - train_split_size
train_split_size, val_split_size

print('train_split_size', train_split_size)
print('val_split_size', val_split_size)

inds = np.arange(len(train_ds))

### Samplers

In [None]:
train_sampler = torch.utils.data.SubsetRandomSampler(inds[:train_split_size])
val_sampler = torch.utils.data.SubsetRandomSampler(inds[train_split_size:])

assert len(set(train_sampler.indices) & set(val_sampler.indices)) == 0

# Model

Use FastAI to create a UNet from a Resnet18

In [None]:
def _base_model(pretrained=False):
    m = tv.models.resnet18(pretrained=pretrained)
    m._modules['conv1'] = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    return m

In [None]:
body = create_body(_base_model, pretrained=PRETRAINED)
model = models.unet.DynamicUnet(body, n_classes=NCLASSES, last_cross=LAST_CROSS).cuda()

In [None]:
model

# Train

## Training monitoring callbacks

In [None]:

def get_epoch_monitor(io_handler, model_name='model', chkpt_interval=1, acc_tol=0.005, viz_root='visualizations/per_epoch'):
    assert chkpt_interval > 0 and acc_tol < 1

    filter_path = f'{viz_root}/rgb_conv'
    os.makedirs(io_handler.to_local_path(filter_path), exist_ok=True)

    def _monitor(model, logs):
        epoch = len(logs['epoch']) # epoch is now 1-indexed
        val_acc = logs['val_acc'][-1]
        last_best_acc = logs['best_acc'][-1] if epoch > 1 else -1

        if epoch % chkpt_interval == 0:
            io_handler.save_model(model, f'checkpoints/epoch_{epoch}', info=logs)

        if val_acc >= (last_best_acc + acc_tol):
            logs['best_acc'].append(val_acc)
            io_handler.save_model(model, f'best_model/{model_name}', info=logs)
        else:
            logs['best_acc'].append(last_best_acc)

        log_str = logs_to_str(logs)
        print(log_str)

        io_handler.save_log('logs.pkl', logs)
        io_handler.save_log_str(f'logs.txt', log_str)

        fig = viz_conv_layer_filters(model[0][0].weight.data, scale_each=False, padding=1)
        io_handler.save_img(fig, f'{filter_path}/epoch_%04d' % (epoch))
        plt.close(fig)

        fig = plot_lr(logs['epoch'], logs['lr'])
        io_handler.save_img(fig, f'visualizations/lr')
        plt.close(fig)

        fig = plot_losses(logs['epoch'], logs['train_loss'], logs['val_loss'])
        io_handler.save_img(fig, f'visualizations/loss')
        plt.close(fig)

        fig = plot_accs(logs['epoch'], logs['train_acc'], logs['val_acc'])
        io_handler.save_img(fig, f'visualizations/accuracy')
        plt.close(fig)

        stat_figs = plot_class_stats(logs)
        for stat, fig in stat_figs:
            io_handler.save_img(fig, f'visualizations/{stat}')
            plt.close(fig)

    return _monitor

def get_batch_monitor(io_handler, viz_root='visualizations/per_batch', interval=4):

    filter_path = f'{viz_root}/rgb_conv'
    grad_path = f'{viz_root}/rgb_conv_grad'
    os.makedirs(io_handler.to_local_path(filter_path), exist_ok=True)
    os.makedirs(io_handler.to_local_path(grad_path), exist_ok=True)

    def _monitor(model, epoch, batch_idx, batch, labels):
        if batch_idx % interval != 0:
            return

        fs = model[0][0].weight

        fig = viz_conv_layer_filters(fs.data, scale_each=False, padding=1)
        title = 'epoch %04d, batch %05d' % (epoch, batch_idx)
        io_handler.save_img(fig, f'{filter_path}/epoch_%04d_batch_%05d' % (epoch, batch_idx))
        plt.close(fig)

        fig = viz_conv_layer_filters(fs.grad.data, scale_each=False, padding=1)
        io_handler.save_img(fig, f'{grad_path}/epoch_%04d_batch_%05d' % (epoch, batch_idx))
        plt.close(fig)

    return _monitor


In [None]:
epoch_callback = get_epoch_monitor(io_handler, EXPERIMENT_NAME, chkpt_interval=CHECKPOINT_INTERVAL)
batch_callback = get_batch_monitor(io_handler, interval=BATCH_CB_INTERVAL)

In [None]:
train_params = {}
train_params['batch_size'] = BATCH_SIZE
train_params['val_batch_size'] = BATCH_SIZE

In [None]:
train_dl = torch.utils.data.DataLoader(train_ds, sampler=train_sampler, batch_size=train_params['batch_size']    , pin_memory=False)
val_dl   = torch.utils.data.DataLoader(val_ds  , sampler=val_sampler  , batch_size=train_params['val_batch_size'], pin_memory=False)

In [None]:
train_params = {}
train_params['epochs'] = EPOCHS
train_params['learning_rate'] = LR_START
train_params['learning_rate_min'] = LR_END

optimizer = optim.SGD(model.parameters(), lr=train_params['learning_rate'], momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
sched = optim.lr_scheduler.CosineAnnealingLR(optimizer, train_params['epochs'], eta_min=train_params['learning_rate_min'])

plt.ioff()
train_seg(model, train_dl, val_dl, optimizer, sched, train_params, 
          epoch_callback=epoch_callback, batch_callback=batch_callback)

In [None]:
!apt update -y
!apt install ffmpeg -y

In [None]:
!ffmpeg -framerate 15 -pattern_type glob -i "$EXPERIMENT_NAME/visualizations/per_batch/rgb_conv/*.png" -c:v libx264 \
    -pix_fmt yuv420p "$EXPERIMENT_NAME/visualizations/per_batch/rgb_conv/out.mp4" -y;
!ffmpeg -framerate 15 -pattern_type glob -i "$EXPERIMENT_NAME/visualizations/per_batch/rgb_conv_grad/*.png" -c:v libx264 \
    -pix_fmt yuv420p "$EXPERIMENT_NAME/visualizations/per_batch/rgb_conv_grad/out.mp4" -y;