# Train model

Fix conda slow loading https://github.com/pytorch/pytorch/issues/537    

## Tensorboard loading

In [1]:
# get_ipython().system_raw('tensorboard --logdir /tmp/log --host 0.0.0.0 --port 6006 &')

In [2]:
# get_ipython().system_raw('lt --port 6006 >> url.txt 2>&1 &')

In [3]:
# !cat url.txt

In [4]:
from processing_utils import image_manipulations as i_manips
from processing_utils import runtime_logic
from processing_utils import analysis_utils
from processing_utils import load_data
from model.retinanet import model as retina_model

import argparse
import os
import pickle
from copy import deepcopy

import yaml
import torch
import numpy as np
from torch import nn
from torch import optim
from torch.utils.data import DataLoader
from torchvision import datasets, models
from torchvision.transforms import Compose, Normalize, ToTensor, ColorJitter, FiveCrop, CenterCrop, Lambda, RandomCrop, Resize

In [5]:
print(f'Cuda available: {torch.cuda.is_available()}')

Cuda available: True


## Hyperparameters

In [6]:
seed = 0
torch.cuda.manual_seed(seed)    

run_name = "object_detection_test"
data_root = os.path.join(os.getcwd(), "dummy_data/00007")
train_dir = os.path.join(data_root, "train/")
val_dir = os.path.join(data_root, "val/")

train_file = os.path.join(train_dir, "annotations.csv")
val_file = os.path.join(val_dir, "annotations.csv")
class_file = os.path.join(data_root, "classmap.csv")

normalize_params_file = f'outputs/norm_params/{run_name}_norm_params.yaml'
previous_model_state = None # Pickled model state dictionary path

# Can be set to none if TensorboardX isn't installed
tensorboard_outpath = None # f'outputs/tensorboard/{run_name}_log' # Set to none for temp

n_epochs = 25
input_size = 224
init_l_rate = 1e-4
lr_decay = 0.1
lr_decay_epoch = [5, 10, 15]
w_decay = 1e-4

batch_size = 2
num_channels = 1

# class_weights = torch.Tensor([1, 1])

criterion = nn.CrossEntropyLoss()
if torch.cuda.is_available():
    criterion.cuda()

report_results_per_n_batches = {'train':5, 'val':1}
save_interval = 9999

shutdown_after = False

In [7]:
model = retina_model.resnet18(2, pretrained=False)

# num_ftrs = model.fc.in_features
# model.fc = nn.Linear(num_ftrs, num_classes)
# model.avgpool = nn.AdaptiveAvgPool2d(1)

if torch.cuda.is_available():
    criterion = criterion.cuda()
    model = model.cuda()

In [8]:
# state_dict = torch.load(previous_model_state)
# model.load_state_dict(state_dict)
# if torch.cuda.is_available():
#     model = model.cuda()

## Data Loading

### Get normalize parameters

In [9]:
# # Get normalize parameters
all_imgs = i_manips.get_images(train_dir)

if os.path.isfile(normalize_params_file):
    stream = open(normalize_params_file, 'r')
    norm_params = yaml.load(stream)
else:
    norm_params = i_manips.get_normalize_params(all_imgs, num_bands=num_channels)
    analysis_utils.write_normalize_values(norm_params, normalize_params_file)

means = norm_params["means"]
sdevs = norm_params["sdevs"]

  


In [10]:
train_transforms = Compose([
    ColorJitter(0.05),
    CenterCrop(224),    
    ToTensor(),
    Normalize([means[0]],
              [sdevs[0]])
])

val_transforms = Compose([
    CenterCrop(224),
    ToTensor(),
    Normalize([means[0]],
              [sdevs[0]])
])   

In [11]:
## Load training data
train_data = load_data.CSVDataset(train_dir, train_file, class_file, train_transforms)

train_loader = DataLoader(train_data,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=4)

val_data = load_data.CSVDataset(val_dir, val_file, class_file, val_transforms)

val_loader = DataLoader(val_data,
                        batch_size=batch_size,
                        shuffle=True,
                        num_workers=4)

In [12]:
# %pdb
for iter_num, data in enumerate(train_loader):
    print(data)

[tensor([[[[-5.1660, -5.1650, -5.1673,  ..., -5.1668, -5.1679, -5.1695],
          [-5.1689, -5.1671, -5.1663,  ..., -5.1668, -5.1684, -5.1700],
          [-5.1700, -5.1684, -5.1681,  ..., -5.1689, -5.1684, -5.1665],
          ...,
          [-5.1729, -5.1740, -5.1700,  ..., -5.1658, -5.1660, -5.1652],
          [-5.1713, -5.1747, -5.1729,  ..., -5.1650, -5.1626, -5.1642],
          [-5.1718, -5.1692, -5.1700,  ..., -5.1665, -5.1658, -5.1652]],

         [[-5.1660, -5.1650, -5.1673,  ..., -5.1668, -5.1679, -5.1695],
          [-5.1689, -5.1671, -5.1663,  ..., -5.1668, -5.1684, -5.1700],
          [-5.1700, -5.1684, -5.1681,  ..., -5.1689, -5.1684, -5.1665],
          ...,
          [-5.1729, -5.1740, -5.1700,  ..., -5.1658, -5.1660, -5.1652],
          [-5.1713, -5.1747, -5.1729,  ..., -5.1650, -5.1626, -5.1642],
          [-5.1718, -5.1692, -5.1700,  ..., -5.1665, -5.1658, -5.1652]],

         [[-5.1660, -5.1650, -5.1673,  ..., -5.1668, -5.1679, -5.1695],
          [-5.1689, -5.1671, 

### Instantiate runtime

In [13]:
optimizer = optim.RMSprop(model.parameters(), lr=init_l_rate, weight_decay=w_decay)
train_analysis = runtime_logic.ObjectDetection(model,
                                                '',
                                                means,
                                                sdevs,
                                                train_loader=train_loader)#,
                                                #val_loader=val_loader)

In [14]:
train_analysis.instantiate_loss_tracker('outputs')
train_analysis.loss_tracker.setup_output_storage(run_name)
train_analysis.instantiate_visualiser(tensorboard_outpath)

## Instantialize trainer

In [15]:
arguments = {# model components
             'run_name':run_name,
             'optimizer':optimizer,
             'criterion':criterion,

             # Hyperparameters
             'n_epochs':n_epochs,
             'batch_size':batch_size,
             'lr_decay':lr_decay,
             'lr_decay_epoch':lr_decay_epoch,
             # 'lr_decay_patience':lr_decay_patience,
             # 'class_weights':class_weights,

             # Saving & Information retrieval
             'report_interval':report_results_per_n_batches,
             'save_interval':save_interval,
    
             'shutdown':shutdown_after,
            }

## Perform training

In [16]:
%pdb

Automatic pdb calling has been turned ON


In [17]:
train_analysis.train(arguments)




lr decayed by 0.1


lr decayed by 0.1


lr decayed by 0.1

