# Refactor of baseline model

In this notebook the code defined in the [notebook 03_TrainBaselineModel.ipynb](../1_build_end2end_prototype/03_TrainBaselineModel.ipynb) is refactored into functions. These functions are placed in the script 04_RefactorBaseline.py to easily run W&B sweeps for hyperparameter tuning.

In [2]:
import wandb
import pandas as pd
from fastai.vision.all import *
from fastai.callback.wandb import WandbCallback
import torchvision.models as tvmodels

import params
# Helper functions - for example metrics we will track during our experiments
from utils import get_predictions, create_iou_table, MIOU, BackgroundIOU, \
                  RoadIOU, TrafficLightIOU, TrafficSignIOU, PersonIOU, VehicleIOU, BicycleIOU

In [4]:
# SimpleNamespace - creates an object to store values as attributes without creating your own (almost empty) class.

train_config = SimpleNamespace(
    framework="fastai",
    img_size=(180, 320),
    batch_size=8,
    augment=True, # use data augmentation
    epochs=10, 
    lr=2e-3,
    arch="resnet18",
    pretrained=True,  # whether to use pretrained encoder
    seed=42,
    log_preds=True
)

In [5]:
def download_data():
    # Use artefacts to track the data linage of our models
    processed_data_artifact = wandb.use_artifact(f'{params.PROCESSED_DATA_AT}:latest')
    # Download split data from W&B artifact
    processed_dataset_dir = Path(processed_data_artifact.download())
    return processed_dataset_dir

In [6]:
def label_func(fname):
    return (fname.parent.parent/"labels")/f"{fname.stem}_mask.png"

In [7]:
def get_df(processed_dataset_dir, is_test=False):
    # Read csv containing data split data (train/valid/test)
    df = pd.read_csv(processed_dataset_dir / 'data_split.csv')

    if not is_test:
        # Remove test set rows
        df = df[df.Stage != 'test'].reset_index(drop=True)
        # is_valid column will tell our trainer how we want to split data between training and validation.
        df['is_valid'] = df.Stage == 'valid'
    else:
        df = df[df.Stage == 'test'].reset_index(drop=True)

    # Add image and mask label paths to dataframe
    df["image_fname"] = [processed_dataset_dir/f'images/{f}' for f in df.File_Name.values]
    df["label_fname"] = [label_func(f) for f in df.image_fname.values]

    return df

In [8]:
# fastAI dataloader
def get_data(df, bs=4, img_size=(180, 320), augment=True):
    block = DataBlock(blocks=(ImageBlock, MaskBlock(codes=params.BDD_CLASSES)),
                  get_x=ColReader("image_fname"),
                  get_y=ColReader("label_fname"),
                  splitter=ColSplitter(),
                  item_tfms=Resize(img_size),
                  batch_tfms=aug_transforms() if augment else None,
                 )
    return block.dataloaders(df, bs=bs)

In [9]:
def log_predictions(learn):
    "Log a Table with model predictions"
    samples, outputs, predictions = get_predictions(learn)
    table = create_iou_table(samples, outputs, predictions, params.BDD_CLASSES)
    wandb.log({"pred_table":table})

In [10]:
def log_final_metrics(learn):
    scores = learn.validate()
    metric_names = ['final_loss'] + [f'final_{x.name}' for x in learn.metrics]
    final_results = {metric_names[i] : scores[i] for i in range(len(scores))}
    for k,v in final_results.items(): 
        wandb.summary[k] = v

In [11]:
def train(config):
    # Set seed for reproducibility.
    set_seed(config.seed, reproducible=True)

    # - pass train_config into W&B run to control training hyperparameters
    # - project=params.WANDB_PROJECT to make this W&B run be part of same project as previous  notebook W&B runs
    run = wandb.init(project=params.WANDB_PROJECT, entity=params.ENTITY, job_type="training", config=config)

    config = wandb.config

    processed_dataset_dir = download_data()
    df = get_df(processed_dataset_dir)

    dls = get_data(df, bs=config.batch_size, img_size=config.img_size, augment=config.augment)

    # We use intersection over union metrics: mean across all classes (MIOU) and IOU for each class separately.
    metrics = [MIOU(), BackgroundIOU(), RoadIOU(), TrafficLightIOU(), \
            TrafficSignIOU(), PersonIOU(), VehicleIOU(), BicycleIOU()]

    # The model is a unet based on a pretrained resnet18 backbone
    # - backbone type obtained from torchvision.modles (defined in config variable)
    learn = unet_learner(dls, arch=getattr(tvmodels, config.arch), pretrained=config.pretrained, metrics=metrics)

    callbacks = [
        SaveModelCallback(monitor='miou'),              # Save model with best miou metric
        WandbCallback(log_preds=False, log_model=True)  # We log predictions manually on W&B (so set log_preds=False), and we want to log model W&B (so log_model=True)
    ]

    # Train model
    learn.fit_one_cycle(config.epochs, config.lr, cbs=callbacks)

    if config.log_preds:
        log_predictions(learn)

    log_final_metrics(learn)

    wandb.finish()

# Run training

In [12]:
train(train_config)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33md-oliver-cort[0m ([33mdoc93[0m). Use [1m`wandb login --relogin`[0m to force relogin


[34m[1mwandb[0m: Downloading large artifact bdd_simple_1k_split:latest, 846.07MB. 4010 files... 
[34m[1mwandb[0m:   4010 of 4010 files downloaded.  
Done. 0:0:21.5


epoch,train_loss,valid_loss,miou,background_iou,road_iou,traffic_light_iou,traffic_sign_iou,person_iou,vehicle_iou,bicycle_iou,time
0,0.484952,0.340188,0.307488,0.861481,0.750193,0.0,0.0,0.0,0.540744,0.0,00:11
1,0.420646,0.391662,0.269773,0.854439,0.744341,0.0,0.0,0.0,0.289631,0.0,00:10
2,0.33331,0.292086,0.335508,0.893526,0.795668,0.0,0.0,0.0,0.659361,0.0,00:10
3,0.298229,0.271899,0.344468,0.901476,0.810399,0.0,0.0,0.0,0.699401,0.0,00:10
4,0.27773,0.265701,0.346587,0.903434,0.816732,0.0,0.0,0.0,0.705945,0.0,00:10
5,0.251262,0.261658,0.353291,0.911652,0.837527,0.0,0.0,0.0,0.723855,0.0,00:10
6,0.220542,0.240589,0.354488,0.914565,0.834692,0.007771,0.0,0.0,0.724386,0.0,00:10
7,0.205341,0.243942,0.367228,0.915367,0.844541,0.073065,0.001875,0.0,0.73575,0.0,00:10
8,0.188729,0.239334,0.372175,0.919285,0.848172,0.098092,0.000335,0.0,0.739343,0.0,00:10
9,0.1786,0.23537,0.374038,0.921072,0.848464,0.095306,0.000469,0.0,0.752953,0.0,00:10


Better model found at epoch 0 with miou value: 0.30748828709499254.
Better model found at epoch 2 with miou value: 0.3355078448346512.
Better model found at epoch 3 with miou value: 0.34446802911357516.
Better model found at epoch 4 with miou value: 0.3465872394815637.
Better model found at epoch 5 with miou value: 0.3532905633075679.
Better model found at epoch 6 with miou value: 0.3544876233900053.
Better model found at epoch 7 with miou value: 0.367228229181832.
Better model found at epoch 8 with miou value: 0.37217533426711136.
Better model found at epoch 9 with miou value: 0.3740379537930716.




0,1
background_iou,▂▁▅▆▆▇▇▇██
bicycle_iou,▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
eps_0,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eps_1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eps_2,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
lr_0,▁▂▂▃▄▅▆▇███████▇▇▇▇▆▆▆▅▅▅▄▄▄▃▃▃▂▂▂▁▁▁▁▁▁
lr_1,▁▂▂▃▄▅▆▇███████▇▇▇▇▆▆▆▅▅▅▄▄▄▃▃▃▂▂▂▁▁▁▁▁▁
lr_2,▁▂▂▃▄▅▆▇███████▇▇▇▇▆▆▆▅▅▅▄▄▄▃▃▃▂▂▂▁▁▁▁▁▁
miou,▄▁▅▆▆▇▇███

0,1
background_iou,0.92107
bicycle_iou,0.0
epoch,10.0
eps_0,1e-05
eps_1,1e-05
eps_2,1e-05
final_background_iou,0.92107
final_bicycle_iou,0.0
final_loss,0.23537
final_miou,0.37404
