In [1]:
import sys
sys.path.append('/home/james/Documents/VS/EmbedSegScrolls')
import numpy as np
import os
import torch
from EmbedSeg.train import begin_training
from EmbedSeg.utils.create_dicts import create_dataset_dict, create_model_dict, create_loss_dict, create_configs
from matplotlib.colors import ListedColormap
import json

### Specify the path to `train`, `val` crops and the type of `center` embedding which we would like to train the network for:

The train-val images, masks and center-images will be accessed from the path specified by `data_dir` and `project-name`.
<a id='center'></a>

In [2]:
data_dir = 'crops'
project_name = 'Vesuvius'
center = 'approximate-medoid' # 'centroid', 'medoid'

print("Project Name chosen as : {}. \nTrain-Val images-masks-center-images will be accessed from : {}".format(project_name, data_dir))

Project Name chosen as : Vesuvius. 
Train-Val images-masks-center-images will be accessed from : crops


In [3]:
try:
    assert center in {'medoid', 'centroid', 'approximate-medoid'}
    print("Spatial Embedding Location chosen as : {}".format(center))
except AssertionError as e:
    e.args += ('Please specify center as one of : {"medoid", "centroid"}', 42)
    raise

Spatial Embedding Location chosen as : approximate-medoid


### Obtain properties of the dataset 

Here, we read the `dataset.json` file prepared in the `01-data` notebook previously.

In [4]:
if os.path.isfile('data_properties.json'): 
    with open('data_properties.json') as json_file:
        data = json.load(json_file)
        data_type, foreground_weight, n_z, n_y, n_x, pixel_size_z_microns, pixel_size_x_microns = data['data_type'], float(data['foreground_weight']), int(data['n_z']), int(data['n_y']), int(data['n_x']), float(data['pixel_size_z_microns']), float(data['pixel_size_x_microns'])

### Specify training dataset-related parameters

Some hints: 
* The `train_size` attribute indicates the number of image-mask paired examples which the network would see in one complete epoch. Ideally this should be the number of `train` image crops. 

In the cell after this one, a `train_dataset_dict` dictionary is generated from the parameters specified here!

In [5]:
train_size = len(os.listdir(os.path.join(data_dir, project_name, 'train', 'images')))
print("Train size : {}".format(train_size))
train_batch_size = 1

Train size : 13


### Create the `train_dataset_dict` dictionary  

In [6]:
train_dataset_dict = create_dataset_dict(data_dir = data_dir, 
                                         project_name = project_name,  
                                         center = center, 
                                         size = train_size, 
                                         batch_size = train_batch_size, 
                                         type = 'train',
                                         name = '3d',
                                         workers=8,
                                         virtual_batch_multiplier=8)
print("Train Dataset Dict : {}".format(train_dataset_dict))

`train_dataset_dict` dictionary successfully created                 with: 
 -- train images accessed from crops/Vesuvius/train/images, 
 -- number of images per epoch equal to 13, 
 -- batch size set at 1, 
Train Dataset Dict : {'name': '3d', 'kwargs': {'center': 'center-approximate-medoid', 'data_dir': 'crops/Vesuvius', 'type': 'train', 'size': 13, 'transform': Compose(
    RandomRotationsAndFlips_3d(degrees=[-90.0, 90.0], interpolation=nearest, expand=False, fill=0)
    <EmbedSeg.utils.transforms.ToTensorFromNumpy object at 0x7296805a2690>
), 'one_hot': False}, 'batch_size': 1, 'virtual_batch_multiplier': 8, 'workers': 8}


### Specify validation dataset-related parameters

Some hints:
* The size attribute indicates the number of image-mask paired examples which the network would see in one complete epoch. Here, it is recommended to set `val_size` equal to the total number of validation image crops.

In the cell after this one, a `val_dataset_dict` dictionary is generated from the parameters specified here!

In [7]:
val_size = len(os.listdir(os.path.join(data_dir, project_name, 'val', 'images')))
val_batch_size = 1

### Create the `val_dataset_dict` dictionary

In [8]:
val_dataset_dict = create_dataset_dict(data_dir = data_dir, 
                                       project_name = project_name, 
                                       center = center, 
                                       size = val_size, 
                                       batch_size = val_batch_size, 
                                       type ='val',
                                       name ='3d',
                                       workers=8,
                                       virtual_batch_multiplier=1)

`val_dataset_dict` dictionary successfully created                 with: 
 -- val images accessed from crops/Vesuvius/val/images, 
 -- number of images per epoch equal to 13, 
 -- batch size set at 1, 


### Specify model-related parameters

Some hints:
* Set the `input_channels` attribute equal to the number of channels in the input images. 
* Set the `num_classes = [6, 1]` for `3d` training and `num_classes = [4, 1]` for `2d` training
<br>(here, 6 implies the offsets and bandwidths in x, y and z dimensions and 1 implies the `seediness` value per pixel)

In the cell after this one, a `model_dataset_dict` dictionary is generated from the parameters specified here!

In [9]:
input_channels = 1
num_classes = [6, 1] 

### Create the `model_dict` dictionary

In [10]:
model_dict = create_model_dict(input_channels = input_channels,
                              num_classes = num_classes,
                              name = '3d')

`model_dict` dictionary successfully created                 with: 
 -- num of classes equal to 1, 
 -- input channels                 equal to [6, 1], 
 -- name equal to branched_erfnet_3d


### Create the `loss_dict` dictionary

In [11]:
loss_dict = create_loss_dict(n_sigma = 3, foreground_weight = foreground_weight)

`loss_dict` dictionary successfully created                 with: 
 -- foreground weight equal to 1.112, 
 -- w_inst                 equal to 1, 
 -- w_var                 equal to 10, 
 -- w_seed equal to 1


### Specify additional parameters 

Some hints:
* The `n_epochs` attribute determines how long the training should proceed. In general for reasonable results, you should atleast train for longer than 50 epochs.
* The `save_dir` attribute identifies the location where the checkpoints and loss curve details are saved. 
* If one wishes to **resume training** from a previous checkpoint, they could point `resume_path` attribute appropriately. For example, one could set `resume_path = './experiment/Mouse-Organoid-Cells-CBG-demo/checkpoint.pth'` to resume training from the last checkpoint.


In [12]:
n_epochs = 200 #crashes after 200 epochs for some reason
save_dir = os.path.join('experiment', project_name+'-'+'demo')
resume_path  = None#"/home/james/Documents/VS/EmbedSegScrolls/examples/3d/VesuviusScrolls/experiment/Vesuvius-demo/180_checkpoint.pth"

In the cell after this one, a `configs` dictionary is generated from the parameters specified here!
<a id='resume'></a>

### Create the  `configs` dictionary 

In [13]:
configs = create_configs(n_epochs = n_epochs,
                         resume_path = resume_path, 
                         save_dir = save_dir, 
                         n_z = n_z,
                         n_y = n_y, 
                         n_x = n_x,
                         anisotropy_factor = pixel_size_z_microns/pixel_size_x_microns, 
                         train_lr=5e-3,
                         save_checkpoint_frequency=20
                         )

`configs` dictionary successfully created with: 
 -- n_epochs equal to 500, 
 -- save_dir equal to experiment/Vesuvius-demo, 
 -- n_z equal to 256, 
 -- n_y equal to 256, 
 -- n_x equal to 256, 


In [14]:
import torch

print(torch.cuda.is_available())  # Checks if CUDA is available on your system
print(torch.version.cuda)         # Shows the CUDA version PyTorch was built with


True
12.1


### Begin training!

Executing the next cell would begin the training. 

In [15]:
begin_training(train_dataset_dict, val_dataset_dict, model_dict, loss_dict, configs)

3-D `train` dataloader created! Accessing data from crops/Vesuvius/train/
Number of images in `train` directory is 13
Number of instances in `train` directory is 13
Number of center images in `train` directory is 13
*************************
train_dataset: torch.float16
3-D `val` dataloader created! Accessing data from crops/Vesuvius/val/
Number of images in `val` directory is 13
Number of instances in `val` directory is 13
Number of center images in `val` directory is 13
*************************
Creating Branched Erfnet 3D with [6, 1] outputs
initialize last layer with size:  torch.Size([16, 6, 2, 2, 2])
Created spatial emb loss function with:                     n_sigma: 3, foreground_weight: 1.1116606927043988
*************************
Created logger with keys:  ('train', 'val', 'iou')
Starting epoch 0
learning rate: 0.005


100%|██████████| 13/13 [00:07<00:00,  1.73it/s]
100%|██████████| 13/13 [00:03<00:00,  4.12it/s]


===> train loss: 2.30
===> val loss: 2.29, val iou: 0.00
=> saving checkpoint
Starting epoch 1
learning rate: 0.004977494364660345


100%|██████████| 13/13 [00:05<00:00,  2.29it/s]
100%|██████████| 13/13 [00:03<00:00,  4.25it/s]


===> train loss: 2.30
===> val loss: 2.29, val iou: 0.00
=> saving checkpoint
Starting epoch 2
learning rate: 0.004954977417064171


100%|██████████| 13/13 [00:05<00:00,  2.31it/s]
100%|██████████| 13/13 [00:03<00:00,  4.22it/s]


===> train loss: 2.29
===> val loss: 2.29, val iou: 0.00
=> saving checkpoint
Starting epoch 3
learning rate: 0.004932449094349201


100%|██████████| 13/13 [00:05<00:00,  2.26it/s]
100%|██████████| 13/13 [00:03<00:00,  4.24it/s]


===> train loss: 2.29
===> val loss: 2.28, val iou: 0.00
=> saving checkpoint
Starting epoch 4
learning rate: 0.004909909332982876


100%|██████████| 13/13 [00:05<00:00,  2.29it/s]
100%|██████████| 13/13 [00:03<00:00,  4.22it/s]


===> train loss: 2.28
===> val loss: 2.28, val iou: 0.00
=> saving checkpoint
Starting epoch 5
learning rate: 0.004887358068751748


100%|██████████| 13/13 [00:05<00:00,  2.29it/s]
100%|██████████| 13/13 [00:03<00:00,  4.23it/s]


===> train loss: 2.28
===> val loss: 2.28, val iou: 0.00
=> saving checkpoint
Starting epoch 6
learning rate: 0.004864795236750653


100%|██████████| 13/13 [00:05<00:00,  2.27it/s]
100%|██████████| 13/13 [00:03<00:00,  4.15it/s]


===> train loss: 2.28
===> val loss: 2.28, val iou: 0.00
=> saving checkpoint
Starting epoch 7
learning rate: 0.004842220771371654


100%|██████████| 13/13 [00:05<00:00,  2.29it/s]
100%|██████████| 13/13 [00:03<00:00,  4.31it/s]


===> train loss: 2.28
===> val loss: 2.28, val iou: 0.00
=> saving checkpoint
Starting epoch 8
learning rate: 0.0048196346062927544


100%|██████████| 13/13 [00:05<00:00,  2.31it/s]
100%|██████████| 13/13 [00:03<00:00,  4.23it/s]


===> train loss: 2.28
===> val loss: 2.28, val iou: 0.00
=> saving checkpoint
Starting epoch 9
learning rate: 0.00479703667446636


100%|██████████| 13/13 [00:05<00:00,  2.31it/s]
100%|██████████| 13/13 [00:03<00:00,  4.24it/s]


===> train loss: 2.27
===> val loss: 2.28, val iou: 0.00
=> saving checkpoint
Starting epoch 10
learning rate: 0.004774426908107499


100%|██████████| 13/13 [00:05<00:00,  2.32it/s]
100%|██████████| 13/13 [00:03<00:00,  4.11it/s]


===> train loss: 2.27
===> val loss: 2.28, val iou: 0.00
=> saving checkpoint
Starting epoch 11
learning rate: 0.0047518052386817935


100%|██████████| 13/13 [00:05<00:00,  2.30it/s]
100%|██████████| 13/13 [00:03<00:00,  4.21it/s]


===> train loss: 2.27
===> val loss: 2.27, val iou: 0.00
=> saving checkpoint
Starting epoch 12
learning rate: 0.004729171596893161


100%|██████████| 13/13 [00:05<00:00,  2.29it/s]
100%|██████████| 13/13 [00:03<00:00,  4.12it/s]


===> train loss: 2.27
===> val loss: 2.27, val iou: 0.00
=> saving checkpoint
Starting epoch 13
learning rate: 0.0047065259126712455


100%|██████████| 13/13 [00:05<00:00,  2.32it/s]
100%|██████████| 13/13 [00:03<00:00,  4.16it/s]


===> train loss: 2.27
===> val loss: 2.27, val iou: 0.00
=> saving checkpoint
Starting epoch 14
learning rate: 0.004683868115158587


100%|██████████| 13/13 [00:05<00:00,  2.30it/s]
100%|██████████| 13/13 [00:03<00:00,  4.16it/s]


===> train loss: 2.27
===> val loss: 2.27, val iou: 0.00
=> saving checkpoint
Starting epoch 15
learning rate: 0.004661198132697498


100%|██████████| 13/13 [00:05<00:00,  2.26it/s]
100%|██████████| 13/13 [00:03<00:00,  4.24it/s]


===> train loss: 2.26
===> val loss: 2.27, val iou: 0.00
=> saving checkpoint
Starting epoch 16
learning rate: 0.004638515892816641


100%|██████████| 13/13 [00:05<00:00,  2.26it/s]
100%|██████████| 13/13 [00:03<00:00,  4.13it/s]


===> train loss: 2.26
===> val loss: 2.26, val iou: 0.00
=> saving checkpoint
Starting epoch 17
learning rate: 0.004615821322217328


100%|██████████| 13/13 [00:05<00:00,  2.31it/s]
100%|██████████| 13/13 [00:03<00:00,  4.14it/s]


===> train loss: 2.26
===> val loss: 2.26, val iou: 0.00
=> saving checkpoint
Starting epoch 18
learning rate: 0.0045931143467594976


100%|██████████| 13/13 [00:05<00:00,  2.30it/s]
100%|██████████| 13/13 [00:03<00:00,  4.20it/s]


===> train loss: 2.26
===> val loss: 2.26, val iou: 0.00
=> saving checkpoint
Starting epoch 19
learning rate: 0.004570394891447373


100%|██████████| 13/13 [00:05<00:00,  2.28it/s]
100%|██████████| 13/13 [00:03<00:00,  4.32it/s]


===> train loss: 2.26
===> val loss: 2.26, val iou: 0.00
=> saving checkpoint
Starting epoch 20
learning rate: 0.004547662880414811


100%|██████████| 13/13 [00:05<00:00,  2.29it/s]
100%|██████████| 13/13 [00:03<00:00,  4.27it/s]


===> train loss: 2.26
===> val loss: 2.26, val iou: 0.00
=> saving checkpoint
Starting epoch 21
learning rate: 0.004524918236910306


100%|██████████| 13/13 [00:05<00:00,  2.29it/s]
100%|██████████| 13/13 [00:03<00:00,  4.29it/s]


===> train loss: 2.26
===> val loss: 2.25, val iou: 0.00
=> saving checkpoint
Starting epoch 22
learning rate: 0.004502160883281645


100%|██████████| 13/13 [00:05<00:00,  2.27it/s]
100%|██████████| 13/13 [00:03<00:00,  4.23it/s]


===> train loss: 2.26
===> val loss: 2.25, val iou: 0.00
=> saving checkpoint
Starting epoch 23
learning rate: 0.004479390740960227


100%|██████████| 13/13 [00:05<00:00,  2.28it/s]
100%|██████████| 13/13 [00:03<00:00,  4.11it/s]


===> train loss: 2.25
===> val loss: 2.25, val iou: 0.00
=> saving checkpoint
Starting epoch 24
learning rate: 0.004456607730445


100%|██████████| 13/13 [00:05<00:00,  2.31it/s]
100%|██████████| 13/13 [00:03<00:00,  4.21it/s]


===> train loss: 2.25
===> val loss: 2.25, val iou: 0.00
=> saving checkpoint
Starting epoch 25
learning rate: 0.004433811771286037


100%|██████████| 13/13 [00:05<00:00,  2.27it/s]
100%|██████████| 13/13 [00:03<00:00,  4.09it/s]


===> train loss: 2.25
===> val loss: 2.25, val iou: 0.00
=> saving checkpoint
Starting epoch 26
learning rate: 0.004411002782067719


100%|██████████| 13/13 [00:05<00:00,  2.25it/s]
100%|██████████| 13/13 [00:03<00:00,  4.31it/s]


===> train loss: 2.25
===> val loss: 2.25, val iou: 0.00
=> saving checkpoint
Starting epoch 27
learning rate: 0.00438818068039153


100%|██████████| 13/13 [00:05<00:00,  2.29it/s]
100%|██████████| 13/13 [00:03<00:00,  4.06it/s]


===> train loss: 2.25
===> val loss: 2.25, val iou: 0.00
=> saving checkpoint
Starting epoch 28
learning rate: 0.00436534538285843


100%|██████████| 13/13 [00:05<00:00,  2.29it/s]
100%|██████████| 13/13 [00:03<00:00,  4.14it/s]


===> train loss: 2.25
===> val loss: 2.25, val iou: 0.00
=> saving checkpoint
Starting epoch 29
learning rate: 0.004342496805050826


100%|██████████| 13/13 [00:05<00:00,  2.27it/s]
100%|██████████| 13/13 [00:03<00:00,  4.14it/s]


===> train loss: 2.25
===> val loss: 2.25, val iou: 0.00
=> saving checkpoint
Starting epoch 30
learning rate: 0.0043196348615140955


100%|██████████| 13/13 [00:05<00:00,  2.31it/s]
100%|██████████| 13/13 [00:03<00:00,  4.28it/s]


===> train loss: 2.25
===> val loss: 2.25, val iou: 0.00
=> saving checkpoint
Starting epoch 31
learning rate: 0.004296759465737673


100%|██████████| 13/13 [00:05<00:00,  2.29it/s]
100%|██████████| 13/13 [00:03<00:00,  4.12it/s]


===> train loss: 2.25
===> val loss: 2.24, val iou: 0.00
=> saving checkpoint
Starting epoch 32
learning rate: 0.004273870530135671


100%|██████████| 13/13 [00:05<00:00,  2.29it/s]
100%|██████████| 13/13 [00:03<00:00,  4.16it/s]


===> train loss: 2.24
===> val loss: 2.24, val iou: 0.00
=> saving checkpoint
Starting epoch 33
learning rate: 0.004250967966027037


100%|██████████| 13/13 [00:05<00:00,  2.26it/s]
100%|██████████| 13/13 [00:03<00:00,  4.19it/s]


===> train loss: 2.24
===> val loss: 2.24, val iou: 0.00
=> saving checkpoint
Starting epoch 34
learning rate: 0.00422805168361521


100%|██████████| 13/13 [00:05<00:00,  2.29it/s]
100%|██████████| 13/13 [00:03<00:00,  4.29it/s]


===> train loss: 2.24
===> val loss: 2.24, val iou: 0.00
=> saving checkpoint
Starting epoch 35
learning rate: 0.004205121591967288


100%|██████████| 13/13 [00:05<00:00,  2.27it/s]
100%|██████████| 13/13 [00:03<00:00,  4.10it/s]


===> train loss: 2.24
===> val loss: 2.24, val iou: 0.00
=> saving checkpoint
Starting epoch 36
learning rate: 0.004182177598992669


100%|██████████| 13/13 [00:05<00:00,  2.27it/s]
100%|██████████| 13/13 [00:03<00:00,  4.13it/s]


===> train loss: 2.24
===> val loss: 2.24, val iou: 0.00
=> saving checkpoint
Starting epoch 37
learning rate: 0.004159219611421163


100%|██████████| 13/13 [00:05<00:00,  2.28it/s]
100%|██████████| 13/13 [00:03<00:00,  4.15it/s]


===> train loss: 2.24
===> val loss: 2.24, val iou: 0.00
=> saving checkpoint
Starting epoch 38
learning rate: 0.004136247534780547


100%|██████████| 13/13 [00:05<00:00,  2.32it/s]
100%|██████████| 13/13 [00:03<00:00,  4.26it/s]


===> train loss: 2.24
===> val loss: 2.24, val iou: 0.00
=> saving checkpoint
Starting epoch 39
learning rate: 0.004113261273373556


100%|██████████| 13/13 [00:05<00:00,  2.31it/s]
100%|██████████| 13/13 [00:03<00:00,  4.14it/s]


===> train loss: 2.24
===> val loss: 2.24, val iou: 0.00
=> saving checkpoint
Starting epoch 40
learning rate: 0.004090260730254292


100%|██████████| 13/13 [00:05<00:00,  2.32it/s]
100%|██████████| 13/13 [00:03<00:00,  4.16it/s]


===> train loss: 2.24
===> val loss: 2.24, val iou: 0.00
=> saving checkpoint
Starting epoch 41
learning rate: 0.004067245807204016


100%|██████████| 13/13 [00:05<00:00,  2.26it/s]
100%|██████████| 13/13 [00:03<00:00,  4.27it/s]


===> train loss: 2.24
===> val loss: 2.24, val iou: 0.00
=> saving checkpoint
Starting epoch 42
learning rate: 0.004044216404706331


100%|██████████| 13/13 [00:05<00:00,  2.30it/s]
100%|██████████| 13/13 [00:03<00:00,  4.26it/s]


===> train loss: 2.24
===> val loss: 2.24, val iou: 0.00
=> saving checkpoint
Starting epoch 43
learning rate: 0.004021172421921706


100%|██████████| 13/13 [00:05<00:00,  2.26it/s]
100%|██████████| 13/13 [00:03<00:00,  4.28it/s]


===> train loss: 2.24
===> val loss: 2.24, val iou: 0.00
=> saving checkpoint
Starting epoch 44
learning rate: 0.003998113756661347


100%|██████████| 13/13 [00:05<00:00,  2.32it/s]
100%|██████████| 13/13 [00:03<00:00,  4.30it/s]


===> train loss: 2.24
===> val loss: 2.24, val iou: 0.00
=> saving checkpoint
Starting epoch 45
learning rate: 0.00397504030536037


100%|██████████| 13/13 [00:05<00:00,  2.31it/s]
100%|██████████| 13/13 [00:03<00:00,  4.12it/s]


===> train loss: 2.24
===> val loss: 2.24, val iou: 0.00
=> saving checkpoint
Starting epoch 46
learning rate: 0.003951951963050278


100%|██████████| 13/13 [00:05<00:00,  2.30it/s]
100%|██████████| 13/13 [00:03<00:00,  4.18it/s]


===> train loss: 2.24
===> val loss: 2.24, val iou: 0.00
=> saving checkpoint
Starting epoch 47
learning rate: 0.003928848623330685


100%|██████████| 13/13 [00:05<00:00,  2.29it/s]
100%|██████████| 13/13 [00:03<00:00,  4.25it/s]


===> train loss: 2.24
===> val loss: 2.24, val iou: 0.00
=> saving checkpoint
Starting epoch 48
learning rate: 0.003905730178340304


100%|██████████| 13/13 [00:05<00:00,  2.32it/s]
100%|██████████| 13/13 [00:03<00:00,  4.27it/s]


===> train loss: 2.23
===> val loss: 2.23, val iou: 0.00
=> saving checkpoint
Starting epoch 49
learning rate: 0.0038825965187271345


100%|██████████| 13/13 [00:05<00:00,  2.29it/s]
100%|██████████| 13/13 [00:03<00:00,  4.23it/s]


===> train loss: 2.23
===> val loss: 2.23, val iou: 0.00
=> saving checkpoint
Starting epoch 50
learning rate: 0.0038594475336178524


100%|██████████| 13/13 [00:05<00:00,  2.32it/s]
100%|██████████| 13/13 [00:03<00:00,  4.18it/s]


===> train loss: 2.23
===> val loss: 2.23, val iou: 0.00
=> saving checkpoint
Starting epoch 51
learning rate: 0.0038362831105863537


100%|██████████| 13/13 [00:05<00:00,  2.28it/s]
100%|██████████| 13/13 [00:03<00:00,  4.11it/s]


===> train loss: 2.23
===> val loss: 2.23, val iou: 0.00
=> saving checkpoint
Starting epoch 52
learning rate: 0.003813103135621442


100%|██████████| 13/13 [00:05<00:00,  2.26it/s]
100%|██████████| 13/13 [00:03<00:00,  4.05it/s]


===> train loss: 2.23
===> val loss: 2.23, val iou: 0.00
=> saving checkpoint
Starting epoch 53
learning rate: 0.003789907493093607


100%|██████████| 13/13 [00:05<00:00,  2.26it/s]
100%|██████████| 13/13 [00:03<00:00,  4.22it/s]


===> train loss: 2.23
===> val loss: 2.23, val iou: 0.00
=> saving checkpoint
Starting epoch 54
learning rate: 0.003766696065720893


100%|██████████| 13/13 [00:05<00:00,  2.28it/s]
100%|██████████| 13/13 [00:03<00:00,  4.27it/s]


===> train loss: 2.23
===> val loss: 2.23, val iou: 0.00
=> saving checkpoint
Starting epoch 55
learning rate: 0.0037434687345337947


100%|██████████| 13/13 [00:05<00:00,  2.29it/s]
100%|██████████| 13/13 [00:03<00:00,  4.26it/s]


===> train loss: 2.23
===> val loss: 2.23, val iou: 0.00
=> saving checkpoint
Starting epoch 56
learning rate: 0.0037202253788391638


100%|██████████| 13/13 [00:05<00:00,  2.31it/s]
100%|██████████| 13/13 [00:03<00:00,  4.28it/s]


===> train loss: 2.23
===> val loss: 2.23, val iou: 0.00
=> saving checkpoint
Starting epoch 57
learning rate: 0.003696965876183094


100%|██████████| 13/13 [00:05<00:00,  2.26it/s]
100%|██████████| 13/13 [00:03<00:00,  4.14it/s]


===> train loss: 2.23
===> val loss: 2.23, val iou: 0.00
=> saving checkpoint
Starting epoch 58
learning rate: 0.0036736901023127285


100%|██████████| 13/13 [00:05<00:00,  2.32it/s]
100%|██████████| 13/13 [00:03<00:00,  4.15it/s]


===> train loss: 2.23
===> val loss: 2.23, val iou: 0.00
=> saving checkpoint
Starting epoch 59
learning rate: 0.003650397931136982


100%|██████████| 13/13 [00:05<00:00,  2.30it/s]
100%|██████████| 13/13 [00:03<00:00,  4.15it/s]


===> train loss: 2.23
===> val loss: 2.23, val iou: 0.00
=> saving checkpoint
Starting epoch 60
learning rate: 0.0036270892346860996


100%|██████████| 13/13 [00:05<00:00,  2.32it/s]
100%|██████████| 13/13 [00:03<00:00,  4.25it/s]


===> train loss: 2.23
===> val loss: 2.23, val iou: 0.00
=> saving checkpoint
Starting epoch 61
learning rate: 0.003603763883070051


100%|██████████| 13/13 [00:05<00:00,  2.28it/s]
100%|██████████| 13/13 [00:03<00:00,  4.29it/s]


===> train loss: 2.23
===> val loss: 2.23, val iou: 0.00
=> saving checkpoint
Starting epoch 62
learning rate: 0.0035804217444356783


100%|██████████| 13/13 [00:05<00:00,  2.28it/s]
100%|██████████| 13/13 [00:03<00:00,  4.20it/s]


===> train loss: 2.23
===> val loss: 2.23, val iou: 0.00
=> saving checkpoint
Starting epoch 63
learning rate: 0.003557062684922589


100%|██████████| 13/13 [00:05<00:00,  2.29it/s]
100%|██████████| 13/13 [00:03<00:00,  4.13it/s]


===> train loss: 2.23
===> val loss: 2.23, val iou: 0.00
=> saving checkpoint
Starting epoch 64
learning rate: 0.003533686568617708


100%|██████████| 13/13 [00:05<00:00,  2.28it/s]
100%|██████████| 13/13 [00:03<00:00,  4.11it/s]


===> train loss: 2.23
===> val loss: 2.22, val iou: 0.00
=> saving checkpoint
Starting epoch 65
learning rate: 0.00351029325750848


100%|██████████| 13/13 [00:05<00:00,  2.28it/s]
100%|██████████| 13/13 [00:03<00:00,  4.22it/s]


===> train loss: 2.22
===> val loss: 2.22, val iou: 0.00
=> saving checkpoint
Starting epoch 66
learning rate: 0.0034868826114346314


100%|██████████| 13/13 [00:05<00:00,  2.27it/s]
100%|██████████| 13/13 [00:03<00:00,  4.10it/s]


===> train loss: 2.22
===> val loss: 2.22, val iou: 0.00
=> saving checkpoint
Starting epoch 67
learning rate: 0.0034634544880384707


100%|██████████| 13/13 [00:05<00:00,  2.28it/s]
100%|██████████| 13/13 [00:03<00:00,  4.18it/s]


===> train loss: 2.22
===> val loss: 2.22, val iou: 0.00
=> saving checkpoint
Starting epoch 68
learning rate: 0.003440008742713641


100%|██████████| 13/13 [00:05<00:00,  2.26it/s]
100%|██████████| 13/13 [00:03<00:00,  4.20it/s]


===> train loss: 2.22
===> val loss: 2.22, val iou: 0.00
=> saving checkpoint
Starting epoch 69
learning rate: 0.0034165452285523008


100%|██████████| 13/13 [00:05<00:00,  2.31it/s]
100%|██████████| 13/13 [00:03<00:00,  4.23it/s]


===> train loss: 2.22
===> val loss: 2.22, val iou: 0.00
=> saving checkpoint
Starting epoch 70
learning rate: 0.0033930637962906254


100%|██████████| 13/13 [00:05<00:00,  2.26it/s]
100%|██████████| 13/13 [00:03<00:00,  4.05it/s]


===> train loss: 2.22
===> val loss: 2.22, val iou: 0.00
=> saving checkpoint
Starting epoch 71
learning rate: 0.0033695642942526095


100%|██████████| 13/13 [00:05<00:00,  2.32it/s]
100%|██████████| 13/13 [00:03<00:00,  4.06it/s]


===> train loss: 2.22
===> val loss: 2.22, val iou: 0.00
=> saving checkpoint
Starting epoch 72
learning rate: 0.0033460465682920745


100%|██████████| 13/13 [00:05<00:00,  2.25it/s]
100%|██████████| 13/13 [00:03<00:00,  4.26it/s]


===> train loss: 2.22
===> val loss: 2.22, val iou: 0.00
=> saving checkpoint
Starting epoch 73
learning rate: 0.0033225104617328177


100%|██████████| 13/13 [00:05<00:00,  2.31it/s]
100%|██████████| 13/13 [00:03<00:00,  4.08it/s]


===> train loss: 2.22
===> val loss: 2.22, val iou: 0.00
=> saving checkpoint
Starting epoch 74
learning rate: 0.0032989558153068282


100%|██████████| 13/13 [00:05<00:00,  2.28it/s]
100%|██████████| 13/13 [00:03<00:00,  4.21it/s]


===> train loss: 2.22
===> val loss: 2.22, val iou: 0.00
=> saving checkpoint
Starting epoch 75
learning rate: 0.003275382467090493


100%|██████████| 13/13 [00:05<00:00,  2.28it/s]
100%|██████████| 13/13 [00:03<00:00,  4.20it/s]


===> train loss: 2.22
===> val loss: 2.22, val iou: 0.00
=> saving checkpoint
Starting epoch 76
learning rate: 0.0032517902524387098


100%|██████████| 13/13 [00:05<00:00,  2.30it/s]
100%|██████████| 13/13 [00:03<00:00,  4.28it/s]


===> train loss: 2.22
===> val loss: 2.22, val iou: 0.01
=> saving checkpoint
Starting epoch 77
learning rate: 0.003228179003916818


100%|██████████| 13/13 [00:05<00:00,  2.27it/s]
100%|██████████| 13/13 [00:03<00:00,  4.17it/s]


===> train loss: 2.22
===> val loss: 2.22, val iou: 0.01
=> saving checkpoint
Starting epoch 78
learning rate: 0.003204548551230261


100%|██████████| 13/13 [00:05<00:00,  2.33it/s]
100%|██████████| 13/13 [00:03<00:00,  4.25it/s]


===> train loss: 2.21
===> val loss: 2.22, val iou: 0.01
=> saving checkpoint
Starting epoch 79
learning rate: 0.003180898721151886


100%|██████████| 13/13 [00:05<00:00,  2.30it/s]
100%|██████████| 13/13 [00:03<00:00,  4.24it/s]


===> train loss: 2.22
===> val loss: 2.22, val iou: 0.01
=> saving checkpoint
Starting epoch 80
learning rate: 0.0031572293374467764


100%|██████████| 13/13 [00:05<00:00,  2.29it/s]
100%|██████████| 13/13 [00:03<00:00,  4.17it/s]


===> train loss: 2.21
===> val loss: 2.21, val iou: 0.01
=> saving checkpoint
Starting epoch 81
learning rate: 0.0031335402207945113


100%|██████████| 13/13 [00:05<00:00,  2.29it/s]
100%|██████████| 13/13 [00:03<00:00,  4.14it/s]


===> train loss: 2.21
===> val loss: 2.21, val iou: 0.01
=> saving checkpoint
Starting epoch 82
learning rate: 0.0031098311887087574


100%|██████████| 13/13 [00:05<00:00,  2.26it/s]
100%|██████████| 13/13 [00:03<00:00,  4.12it/s]


===> train loss: 2.21
===> val loss: 2.21, val iou: 0.01
=> saving checkpoint
Starting epoch 83
learning rate: 0.003086102055454047


100%|██████████| 13/13 [00:05<00:00,  2.28it/s]
100%|██████████| 13/13 [00:03<00:00,  4.31it/s]


===> train loss: 2.21
===> val loss: 2.21, val iou: 0.01
=> saving checkpoint
Starting epoch 84
learning rate: 0.0030623526319596625


100%|██████████| 13/13 [00:05<00:00,  2.26it/s]
100%|██████████| 13/13 [00:03<00:00,  4.19it/s]


===> train loss: 2.21
===> val loss: 2.21, val iou: 0.01
=> saving checkpoint
Starting epoch 85
learning rate: 0.003038582725730446


100%|██████████| 13/13 [00:05<00:00,  2.30it/s]
100%|██████████| 13/13 [00:03<00:00,  4.10it/s]


===> train loss: 2.21
===> val loss: 2.21, val iou: 0.01
=> saving checkpoint
Starting epoch 86
learning rate: 0.0030147921407544602


100%|██████████| 13/13 [00:05<00:00,  2.27it/s]
100%|██████████| 13/13 [00:03<00:00,  4.13it/s]


===> train loss: 2.21
===> val loss: 2.21, val iou: 0.01
=> saving checkpoint
Starting epoch 87
learning rate: 0.0029909806774072955


100%|██████████| 13/13 [00:05<00:00,  2.30it/s]
100%|██████████| 13/13 [00:03<00:00,  4.32it/s]


===> train loss: 2.21
===> val loss: 2.21, val iou: 0.01
=> saving checkpoint
Starting epoch 88
learning rate: 0.00296714813235293


100%|██████████| 13/13 [00:05<00:00,  2.31it/s]
100%|██████████| 13/13 [00:03<00:00,  4.19it/s]


===> train loss: 2.21
===> val loss: 2.21, val iou: 0.01
=> saving checkpoint
Starting epoch 89
learning rate: 0.002943294298440934


100%|██████████| 13/13 [00:05<00:00,  2.28it/s]
100%|██████████| 13/13 [00:03<00:00,  4.23it/s]


===> train loss: 2.21
===> val loss: 2.21, val iou: 0.01
=> saving checkpoint
Starting epoch 90
learning rate: 0.0029194189645999013


100%|██████████| 13/13 [00:05<00:00,  2.30it/s]
100%|██████████| 13/13 [00:03<00:00,  4.28it/s]


===> train loss: 2.21
===> val loss: 2.21, val iou: 0.01
=> saving checkpoint
Starting epoch 91
learning rate: 0.0028955219157268818


100%|██████████| 13/13 [00:05<00:00,  2.29it/s]
100%|██████████| 13/13 [00:03<00:00,  4.29it/s]


===> train loss: 2.21
===> val loss: 2.21, val iou: 0.01
=> saving checkpoint
Starting epoch 92
learning rate: 0.0028716029325726706


100%|██████████| 13/13 [00:05<00:00,  2.25it/s]
100%|██████████| 13/13 [00:03<00:00,  4.28it/s]


===> train loss: 2.21
===> val loss: 2.20, val iou: 0.01
=> saving checkpoint
Starting epoch 93
learning rate: 0.0028476617916227283


100%|██████████| 13/13 [00:05<00:00,  2.29it/s]
100%|██████████| 13/13 [00:03<00:00,  4.18it/s]


===> train loss: 2.20
===> val loss: 2.20, val iou: 0.01
=> saving checkpoint
Starting epoch 94
learning rate: 0.002823698264973549


100%|██████████| 13/13 [00:05<00:00,  2.26it/s]
100%|██████████| 13/13 [00:03<00:00,  4.11it/s]


===> train loss: 2.20
===> val loss: 2.20, val iou: 0.01
=> saving checkpoint
Starting epoch 95
learning rate: 0.0027997121202042266


100%|██████████| 13/13 [00:05<00:00,  2.28it/s]
100%|██████████| 13/13 [00:03<00:00,  4.17it/s]


===> train loss: 2.20
===> val loss: 2.20, val iou: 0.01
=> saving checkpoint
Starting epoch 96
learning rate: 0.0027757031202430183


100%|██████████| 13/13 [00:05<00:00,  2.26it/s]
100%|██████████| 13/13 [00:03<00:00,  4.24it/s]


===> train loss: 2.20
===> val loss: 2.20, val iou: 0.01
=> saving checkpoint
Starting epoch 97
learning rate: 0.002751671023228639


100%|██████████| 13/13 [00:05<00:00,  2.26it/s]
100%|██████████| 13/13 [00:03<00:00,  4.15it/s]


===> train loss: 2.20
===> val loss: 2.20, val iou: 0.01
=> saving checkpoint
Starting epoch 98
learning rate: 0.002727615582366029


100%|██████████| 13/13 [00:05<00:00,  2.26it/s]
100%|██████████| 13/13 [00:03<00:00,  4.22it/s]


===> train loss: 2.20
===> val loss: 2.20, val iou: 0.01
=> saving checkpoint
Starting epoch 99
learning rate: 0.0027035365457763246


100%|██████████| 13/13 [00:05<00:00,  2.26it/s]
100%|██████████| 13/13 [00:03<00:00,  4.17it/s]


===> train loss: 2.20
===> val loss: 2.20, val iou: 0.01
=> saving checkpoint
Starting epoch 100
learning rate: 0.002679433656340733


100%|██████████| 13/13 [00:05<00:00,  2.29it/s]
100%|██████████| 13/13 [00:03<00:00,  4.23it/s]


===> train loss: 2.20
===> val loss: 2.20, val iou: 0.01
=> saving checkpoint
Starting epoch 101
learning rate: 0.0026553066515380024


100%|██████████| 13/13 [00:05<00:00,  2.30it/s]
100%|██████████| 13/13 [00:03<00:00,  4.23it/s]


===> train loss: 2.20
===> val loss: 2.20, val iou: 0.01
=> saving checkpoint
Starting epoch 102
learning rate: 0.0026311552632751595


100%|██████████| 13/13 [00:05<00:00,  2.27it/s]
100%|██████████| 13/13 [00:03<00:00,  4.29it/s]


===> train loss: 2.20
===> val loss: 2.20, val iou: 0.01
=> saving checkpoint
Starting epoch 103
learning rate: 0.002606979217711156


100%|██████████| 13/13 [00:05<00:00,  2.33it/s]
100%|██████████| 13/13 [00:03<00:00,  4.25it/s]


===> train loss: 2.20
===> val loss: 2.20, val iou: 0.01
=> saving checkpoint
Starting epoch 104
learning rate: 0.002582778235073065


100%|██████████| 13/13 [00:05<00:00,  2.31it/s]
100%|██████████| 13/13 [00:03<00:00,  4.16it/s]


===> train loss: 2.20
===> val loss: 2.19, val iou: 0.01
=> saving checkpoint
Starting epoch 105
learning rate: 0.002558552029464411


100%|██████████| 13/13 [00:05<00:00,  2.28it/s]
100%|██████████| 13/13 [00:03<00:00,  4.18it/s]


===> train loss: 2.19
===> val loss: 2.19, val iou: 0.01
=> saving checkpoint
Starting epoch 106
learning rate: 0.0025343003086652368


100%|██████████| 13/13 [00:05<00:00,  2.30it/s]
100%|██████████| 13/13 [00:03<00:00,  4.25it/s]


===> train loss: 2.19
===> val loss: 2.19, val iou: 0.01
=> saving checkpoint
Starting epoch 107
learning rate: 0.0025100227739234303


100%|██████████| 13/13 [00:05<00:00,  2.31it/s]
100%|██████████| 13/13 [00:03<00:00,  4.23it/s]


===> train loss: 2.19
===> val loss: 2.19, val iou: 0.01
=> saving checkpoint
Starting epoch 108
learning rate: 0.002485719119736858


100%|██████████| 13/13 [00:05<00:00,  2.28it/s]
100%|██████████| 13/13 [00:03<00:00,  4.20it/s]


===> train loss: 2.19
===> val loss: 2.19, val iou: 0.01
=> saving checkpoint
Starting epoch 109
learning rate: 0.002461389033625775


100%|██████████| 13/13 [00:05<00:00,  2.26it/s]
100%|██████████| 13/13 [00:03<00:00,  4.27it/s]


===> train loss: 2.19
===> val loss: 2.19, val iou: 0.01
=> saving checkpoint
Starting epoch 110
learning rate: 0.002437032195894977


100%|██████████| 13/13 [00:05<00:00,  2.30it/s]
100%|██████████| 13/13 [00:03<00:00,  4.26it/s]


===> train loss: 2.19
===> val loss: 2.19, val iou: 0.01
=> saving checkpoint
Starting epoch 111
learning rate: 0.002412648279385112


100%|██████████| 13/13 [00:05<00:00,  2.30it/s]
100%|██████████| 13/13 [00:03<00:00,  4.24it/s]


===> train loss: 2.19
===> val loss: 2.19, val iou: 0.01
=> saving checkpoint
Starting epoch 112
learning rate: 0.002388236949212524


100%|██████████| 13/13 [00:05<00:00,  2.29it/s]
100%|██████████| 13/13 [00:03<00:00,  4.21it/s]


===> train loss: 2.19
===> val loss: 2.19, val iou: 0.01
=> saving checkpoint
Starting epoch 113
learning rate: 0.002363797862496971


100%|██████████| 13/13 [00:05<00:00,  2.26it/s]
100%|██████████| 13/13 [00:03<00:00,  4.20it/s]


===> train loss: 2.19
===> val loss: 2.19, val iou: 0.01
=> saving checkpoint
Starting epoch 114
learning rate: 0.0023393306680765


100%|██████████| 13/13 [00:05<00:00,  2.27it/s]
100%|██████████| 13/13 [00:03<00:00,  4.23it/s]


===> train loss: 2.19
===> val loss: 2.19, val iou: 0.01
=> saving checkpoint
Starting epoch 115
learning rate: 0.002314835006208722


100%|██████████| 13/13 [00:05<00:00,  2.32it/s]
100%|██████████| 13/13 [00:03<00:00,  4.25it/s]


===> train loss: 2.19
===> val loss: 2.19, val iou: 0.02
=> saving checkpoint
Starting epoch 116
learning rate: 0.0022903105082576663


100%|██████████| 13/13 [00:05<00:00,  2.31it/s]
100%|██████████| 13/13 [00:03<00:00,  4.29it/s]


===> train loss: 2.19
===> val loss: 2.18, val iou: 0.02
=> saving checkpoint
Starting epoch 117
learning rate: 0.002265756796365339


100%|██████████| 13/13 [00:05<00:00,  2.29it/s]
100%|██████████| 13/13 [00:03<00:00,  4.21it/s]


===> train loss: 2.19
===> val loss: 2.18, val iou: 0.02
=> saving checkpoint
Starting epoch 118
learning rate: 0.0022411734831070474


100%|██████████| 13/13 [00:05<00:00,  2.30it/s]
100%|██████████| 13/13 [00:03<00:00,  4.13it/s]


===> train loss: 2.18
===> val loss: 2.18, val iou: 0.01
=> saving checkpoint
Starting epoch 119
learning rate: 0.002216560171129477


100%|██████████| 13/13 [00:05<00:00,  2.32it/s]
100%|██████████| 13/13 [00:03<00:00,  4.27it/s]


===> train loss: 2.18
===> val loss: 2.18, val iou: 0.02
=> saving checkpoint
Starting epoch 120
learning rate: 0.0021919164527704348


100%|██████████| 13/13 [00:05<00:00,  2.30it/s]
100%|██████████| 13/13 [00:03<00:00,  4.21it/s]


===> train loss: 2.18
===> val loss: 2.18, val iou: 0.02
=> saving checkpoint
Starting epoch 121
learning rate: 0.002167241909659091


100%|██████████| 13/13 [00:05<00:00,  2.27it/s]
100%|██████████| 13/13 [00:03<00:00,  4.13it/s]


===> train loss: 2.18
===> val loss: 2.18, val iou: 0.02
=> saving checkpoint
Starting epoch 122
learning rate: 0.0021425361122954585


100%|██████████| 13/13 [00:05<00:00,  2.30it/s]
100%|██████████| 13/13 [00:03<00:00,  4.11it/s]


===> train loss: 2.18
===> val loss: 2.18, val iou: 0.02
=> saving checkpoint
Starting epoch 123
learning rate: 0.0021177986196077485


100%|██████████| 13/13 [00:05<00:00,  2.30it/s]
100%|██████████| 13/13 [00:03<00:00,  4.25it/s]


===> train loss: 2.18
===> val loss: 2.18, val iou: 0.02
=> saving checkpoint
Starting epoch 124
learning rate: 0.0020930289784861405


100%|██████████| 13/13 [00:05<00:00,  2.30it/s]
100%|██████████| 13/13 [00:03<00:00,  4.13it/s]


===> train loss: 2.18
===> val loss: 2.18, val iou: 0.02
=> saving checkpoint
Starting epoch 125
learning rate: 0.002068226723291381


100%|██████████| 13/13 [00:05<00:00,  2.32it/s]
100%|██████████| 13/13 [00:03<00:00,  4.16it/s]


===> train loss: 2.18
===> val loss: 2.18, val iou: 0.02
=> saving checkpoint
Starting epoch 126
learning rate: 0.0020433913753364944


100%|██████████| 13/13 [00:05<00:00,  2.32it/s]
100%|██████████| 13/13 [00:03<00:00,  4.23it/s]


===> train loss: 2.18
===> val loss: 2.18, val iou: 0.02
=> saving checkpoint
Starting epoch 127
learning rate: 0.0020185224423397573


100%|██████████| 13/13 [00:05<00:00,  2.29it/s]
100%|██████████| 13/13 [00:03<00:00,  4.11it/s]


===> train loss: 2.18
===> val loss: 2.18, val iou: 0.02
=> saving checkpoint
Starting epoch 128
learning rate: 0.001993619417846922


100%|██████████| 13/13 [00:05<00:00,  2.32it/s]
100%|██████████| 13/13 [00:03<00:00,  4.22it/s]


===> train loss: 2.17
===> val loss: 2.17, val iou: 0.02
=> saving checkpoint
Starting epoch 129
learning rate: 0.001968681780620511


100%|██████████| 13/13 [00:05<00:00,  2.31it/s]
100%|██████████| 13/13 [00:03<00:00,  4.14it/s]


===> train loss: 2.17
===> val loss: 2.17, val iou: 0.02
=> saving checkpoint
Starting epoch 130
learning rate: 0.0019437089939938173


100%|██████████| 13/13 [00:05<00:00,  2.26it/s]
100%|██████████| 13/13 [00:03<00:00,  4.13it/s]


===> train loss: 2.17
===> val loss: 2.17, val iou: 0.02
=> saving checkpoint
Starting epoch 131
learning rate: 0.001918700505187031


100%|██████████| 13/13 [00:05<00:00,  2.30it/s]
100%|██████████| 13/13 [00:03<00:00,  4.18it/s]


===> train loss: 2.17
===> val loss: 2.17, val iou: 0.02
=> saving checkpoint
Starting epoch 132
learning rate: 0.0018936557445826967


100%|██████████| 13/13 [00:05<00:00,  2.32it/s]
100%|██████████| 13/13 [00:03<00:00,  4.23it/s]


===> train loss: 2.17
===> val loss: 2.17, val iou: 0.02
=> saving checkpoint
Starting epoch 133
learning rate: 0.0018685741249574433


100%|██████████| 13/13 [00:05<00:00,  2.28it/s]
100%|██████████| 13/13 [00:03<00:00,  4.14it/s]


===> train loss: 2.17
===> val loss: 2.17, val iou: 0.02
=> saving checkpoint
Starting epoch 134
learning rate: 0.0018434550406666598


100%|██████████| 13/13 [00:05<00:00,  2.27it/s]
100%|██████████| 13/13 [00:03<00:00,  4.25it/s]


===> train loss: 2.17
===> val loss: 2.17, val iou: 0.02
=> saving checkpoint
Starting epoch 135
learning rate: 0.001818297866778471


100%|██████████| 13/13 [00:05<00:00,  2.29it/s]
100%|██████████| 13/13 [00:03<00:00,  4.17it/s]


===> train loss: 2.17
===> val loss: 2.17, val iou: 0.02
=> saving checkpoint
Starting epoch 136
learning rate: 0.0017931019581530385


100%|██████████| 13/13 [00:05<00:00,  2.33it/s]
100%|██████████| 13/13 [00:03<00:00,  4.27it/s]


===> train loss: 2.17
===> val loss: 2.17, val iou: 0.02
=> saving checkpoint
Starting epoch 137
learning rate: 0.0017678666484628196


100%|██████████| 13/13 [00:05<00:00,  2.29it/s]
100%|██████████| 13/13 [00:03<00:00,  4.12it/s]


===> train loss: 2.17
===> val loss: 2.17, val iou: 0.02
=> saving checkpoint
Starting epoch 138
learning rate: 0.0017425912491490017


100%|██████████| 13/13 [00:05<00:00,  2.26it/s]
100%|██████████| 13/13 [00:03<00:00,  4.24it/s]


===> train loss: 2.17
===> val loss: 2.17, val iou: 0.02
=> saving checkpoint
Starting epoch 139
learning rate: 0.0017172750483088594


100%|██████████| 13/13 [00:05<00:00,  2.29it/s]
100%|██████████| 13/13 [00:03<00:00,  4.07it/s]


===> train loss: 2.17
===> val loss: 2.17, val iou: 0.02
=> saving checkpoint
Starting epoch 140
learning rate: 0.0016919173095082494


100%|██████████| 13/13 [00:05<00:00,  2.27it/s]
100%|██████████| 13/13 [00:03<00:00,  4.16it/s]


===> train loss: 2.16
===> val loss: 2.16, val iou: 0.02
=> saving checkpoint
Starting epoch 141
learning rate: 0.0016665172705128705


100%|██████████| 13/13 [00:05<00:00,  2.29it/s]
100%|██████████| 13/13 [00:03<00:00,  4.24it/s]


===> train loss: 2.17
===> val loss: 2.16, val iou: 0.02
=> saving checkpoint
Starting epoch 142
learning rate: 0.0016410741419312688


100%|██████████| 13/13 [00:05<00:00,  2.31it/s]
100%|██████████| 13/13 [00:03<00:00,  4.26it/s]


===> train loss: 2.16
===> val loss: 2.16, val iou: 0.02
=> saving checkpoint
Starting epoch 143
learning rate: 0.0016155871057618057


100%|██████████| 13/13 [00:05<00:00,  2.26it/s]
100%|██████████| 13/13 [00:03<00:00,  4.26it/s]


===> train loss: 2.16
===> val loss: 2.16, val iou: 0.02
=> saving checkpoint
Starting epoch 144
learning rate: 0.0015900553138349974


100%|██████████| 13/13 [00:05<00:00,  2.28it/s]
100%|██████████| 13/13 [00:03<00:00,  4.10it/s]


===> train loss: 2.16
===> val loss: 2.16, val iou: 0.02
=> saving checkpoint
Starting epoch 145
learning rate: 0.0015644778861416784


100%|██████████| 13/13 [00:05<00:00,  2.28it/s]
100%|██████████| 13/13 [00:03<00:00,  4.25it/s]


===> train loss: 2.16
===> val loss: 2.16, val iou: 0.02
=> saving checkpoint
Starting epoch 146
learning rate: 0.0015388539090363923


100%|██████████| 13/13 [00:05<00:00,  2.28it/s]
100%|██████████| 13/13 [00:03<00:00,  4.15it/s]


===> train loss: 2.16
===> val loss: 2.16, val iou: 0.02
=> saving checkpoint
Starting epoch 147
learning rate: 0.001513182433304212


100%|██████████| 13/13 [00:05<00:00,  2.27it/s]
100%|██████████| 13/13 [00:03<00:00,  4.23it/s]


===> train loss: 2.16
===> val loss: 2.16, val iou: 0.02
=> saving checkpoint
Starting epoch 148
learning rate: 0.0014874624720778262


100%|██████████| 13/13 [00:05<00:00,  2.26it/s]
100%|██████████| 13/13 [00:03<00:00,  4.20it/s]


===> train loss: 2.16
===> val loss: 2.16, val iou: 0.02
=> saving checkpoint
Starting epoch 149
learning rate: 0.0014616929985901933


100%|██████████| 13/13 [00:05<00:00,  2.31it/s]
100%|██████████| 13/13 [00:03<00:00,  4.09it/s]


===> train loss: 2.16
===> val loss: 2.16, val iou: 0.03
=> saving checkpoint
Starting epoch 150
learning rate: 0.0014358729437462937


100%|██████████| 13/13 [00:05<00:00,  2.30it/s]
100%|██████████| 13/13 [00:03<00:00,  4.07it/s]


===> train loss: 2.16
===> val loss: 2.16, val iou: 0.03
=> saving checkpoint
Starting epoch 151
learning rate: 0.001410001193495505


100%|██████████| 13/13 [00:05<00:00,  2.28it/s]
100%|██████████| 13/13 [00:03<00:00,  4.19it/s]


===> train loss: 2.16
===> val loss: 2.16, val iou: 0.03
=> saving checkpoint
Starting epoch 152
learning rate: 0.0013840765859838176


100%|██████████| 13/13 [00:05<00:00,  2.29it/s]
100%|██████████| 13/13 [00:03<00:00,  4.26it/s]


===> train loss: 2.16
===> val loss: 2.15, val iou: 0.03
=> saving checkpoint
Starting epoch 153
learning rate: 0.0013580979084624686


100%|██████████| 13/13 [00:05<00:00,  2.27it/s]
100%|██████████| 13/13 [00:03<00:00,  4.28it/s]


===> train loss: 2.15
===> val loss: 2.15, val iou: 0.03
=> saving checkpoint
Starting epoch 154
learning rate: 0.0013320638939265195


100%|██████████| 13/13 [00:05<00:00,  2.28it/s]
100%|██████████| 13/13 [00:03<00:00,  4.22it/s]


===> train loss: 2.15
===> val loss: 2.15, val iou: 0.03
=> saving checkpoint
Starting epoch 155
learning rate: 0.0013059732174533927


100%|██████████| 13/13 [00:05<00:00,  2.32it/s]
100%|██████████| 13/13 [00:03<00:00,  4.19it/s]


===> train loss: 2.15
===> val loss: 2.15, val iou: 0.03
=> saving checkpoint
Starting epoch 156
learning rate: 0.0012798244922073102


100%|██████████| 13/13 [00:05<00:00,  2.29it/s]
100%|██████████| 13/13 [00:03<00:00,  4.25it/s]


===> train loss: 2.15
===> val loss: 2.15, val iou: 0.03
=> saving checkpoint
Starting epoch 157
learning rate: 0.001253616265070845


100%|██████████| 13/13 [00:05<00:00,  2.29it/s]
100%|██████████| 13/13 [00:03<00:00,  4.15it/s]


===> train loss: 2.15
===> val loss: 2.15, val iou: 0.03
=> saving checkpoint
Starting epoch 158
learning rate: 0.0012273470118592878


100%|██████████| 13/13 [00:05<00:00,  2.30it/s]
100%|██████████| 13/13 [00:03<00:00,  4.29it/s]


===> train loss: 2.15
===> val loss: 2.15, val iou: 0.03
=> saving checkpoint
Starting epoch 159
learning rate: 0.001201015132067082


100%|██████████| 13/13 [00:05<00:00,  2.31it/s]
100%|██████████| 13/13 [00:03<00:00,  4.27it/s]


===> train loss: 2.15
===> val loss: 2.15, val iou: 0.03
=> saving checkpoint
Starting epoch 160
learning rate: 0.0011746189430880188


100%|██████████| 13/13 [00:05<00:00,  2.28it/s]
100%|██████████| 13/13 [00:03<00:00,  4.24it/s]


===> train loss: 2.15
===> val loss: 2.15, val iou: 0.03
=> saving checkpoint
Starting epoch 161
learning rate: 0.0011481566738419757


100%|██████████| 13/13 [00:05<00:00,  2.33it/s]
100%|██████████| 13/13 [00:03<00:00,  4.26it/s]


===> train loss: 2.15
===> val loss: 2.15, val iou: 0.03
=> saving checkpoint
Starting epoch 162
learning rate: 0.0011216264577304455


100%|██████████| 13/13 [00:05<00:00,  2.28it/s]
100%|██████████| 13/13 [00:03<00:00,  4.07it/s]


===> train loss: 2.15
===> val loss: 2.15, val iou: 0.03
=> saving checkpoint
Starting epoch 163
learning rate: 0.001095026324830597


100%|██████████| 13/13 [00:05<00:00,  2.28it/s]
100%|██████████| 13/13 [00:03<00:00,  4.11it/s]


===> train loss: 2.15
===> val loss: 2.15, val iou: 0.03
=> saving checkpoint
Starting epoch 164
learning rate: 0.0010683541932226925


100%|██████████| 13/13 [00:05<00:00,  2.25it/s]
100%|██████████| 13/13 [00:03<00:00,  4.19it/s]


===> train loss: 2.15
===> val loss: 2.15, val iou: 0.03
=> saving checkpoint
Starting epoch 165
learning rate: 0.0010416078593278445


100%|██████████| 13/13 [00:05<00:00,  2.30it/s]
100%|██████████| 13/13 [00:03<00:00,  4.22it/s]


===> train loss: 2.15
===> val loss: 2.15, val iou: 0.03
=> saving checkpoint
Starting epoch 166
learning rate: 0.00101478498711157


100%|██████████| 13/13 [00:05<00:00,  2.33it/s]
100%|██████████| 13/13 [00:03<00:00,  4.31it/s]


===> train loss: 2.15
===> val loss: 2.15, val iou: 0.03
=> saving checkpoint
Starting epoch 167
learning rate: 0.000987883095982645


100%|██████████| 13/13 [00:05<00:00,  2.31it/s]
100%|██████████| 13/13 [00:03<00:00,  4.22it/s]


===> train loss: 2.15
===> val loss: 2.15, val iou: 0.03
=> saving checkpoint
Starting epoch 168
learning rate: 0.0009608995471851452


100%|██████████| 13/13 [00:05<00:00,  2.31it/s]
100%|██████████| 13/13 [00:03<00:00,  4.21it/s]


===> train loss: 2.15
===> val loss: 2.15, val iou: 0.03
=> saving checkpoint
Starting epoch 169
learning rate: 0.0009338315284429349


100%|██████████| 13/13 [00:05<00:00,  2.29it/s]
100%|██████████| 13/13 [00:03<00:00,  4.31it/s]


===> train loss: 2.15
===> val loss: 2.14, val iou: 0.03
=> saving checkpoint
Starting epoch 170
learning rate: 0.0009066760365683728


100%|██████████| 13/13 [00:05<00:00,  2.31it/s]
100%|██████████| 13/13 [00:03<00:00,  4.22it/s]


===> train loss: 2.14
===> val loss: 2.14, val iou: 0.03
=> saving checkpoint
Starting epoch 171
learning rate: 0.000879429857688226


100%|██████████| 13/13 [00:05<00:00,  2.32it/s]
100%|██████████| 13/13 [00:03<00:00,  4.12it/s]


===> train loss: 2.14
===> val loss: 2.14, val iou: 0.03
=> saving checkpoint
Starting epoch 172
learning rate: 0.0008520895446665837


100%|██████████| 13/13 [00:05<00:00,  2.32it/s]
100%|██████████| 13/13 [00:03<00:00,  4.25it/s]


===> train loss: 2.14
===> val loss: 2.14, val iou: 0.03
=> saving checkpoint
Starting epoch 173
learning rate: 0.0008246513912127221


100%|██████████| 13/13 [00:05<00:00,  2.31it/s]
100%|██████████| 13/13 [00:03<00:00,  4.21it/s]


===> train loss: 2.14
===> val loss: 2.14, val iou: 0.03
=> saving checkpoint
Starting epoch 174
learning rate: 0.0007971114020458232


100%|██████████| 13/13 [00:05<00:00,  2.28it/s]
100%|██████████| 13/13 [00:03<00:00,  4.17it/s]


===> train loss: 2.14
===> val loss: 2.14, val iou: 0.03
=> saving checkpoint
Starting epoch 175
learning rate: 0.0007694652583405727


100%|██████████| 13/13 [00:05<00:00,  2.32it/s]
100%|██████████| 13/13 [00:03<00:00,  4.13it/s]


===> train loss: 2.14
===> val loss: 2.14, val iou: 0.03
=> saving checkpoint
Starting epoch 176
learning rate: 0.0007417082774876436


100%|██████████| 13/13 [00:05<00:00,  2.29it/s]
100%|██████████| 13/13 [00:03<00:00,  4.11it/s]


===> train loss: 2.14
===> val loss: 2.14, val iou: 0.03
=> saving checkpoint
Starting epoch 177
learning rate: 0.0007138353659566017


100%|██████████| 13/13 [00:05<00:00,  2.25it/s]
100%|██████████| 13/13 [00:03<00:00,  4.28it/s]


===> train loss: 2.14
===> val loss: 2.14, val iou: 0.03
=> saving checkpoint
Starting epoch 178
learning rate: 0.0006858409637258909


100%|██████████| 13/13 [00:05<00:00,  2.32it/s]
100%|██████████| 13/13 [00:03<00:00,  4.21it/s]


===> train loss: 2.14
===> val loss: 2.14, val iou: 0.03
=> saving checkpoint
Starting epoch 179
learning rate: 0.0006577189783170008


100%|██████████| 13/13 [00:05<00:00,  2.29it/s]
100%|██████████| 13/13 [00:03<00:00,  4.30it/s]


===> train loss: 2.14
===> val loss: 2.14, val iou: 0.03
=> saving checkpoint
Starting epoch 180
learning rate: 0.0006294627058970835


100%|██████████| 13/13 [00:05<00:00,  2.31it/s]
100%|██████████| 13/13 [00:03<00:00,  4.22it/s]


===> train loss: 2.14
===> val loss: 2.14, val iou: 0.03
=> saving checkpoint
Starting epoch 181
learning rate: 0.0006010647361370384


100%|██████████| 13/13 [00:05<00:00,  2.31it/s]
100%|██████████| 13/13 [00:03<00:00,  4.08it/s]


===> train loss: 2.14
===> val loss: 2.14, val iou: 0.03
=> saving checkpoint
Starting epoch 182
learning rate: 0.0005725168364427263


100%|██████████| 13/13 [00:05<00:00,  2.32it/s]
100%|██████████| 13/13 [00:03<00:00,  4.21it/s]


===> train loss: 2.14
===> val loss: 2.14, val iou: 0.03
=> saving checkpoint
Starting epoch 183
learning rate: 0.0005438098096832071


100%|██████████| 13/13 [00:05<00:00,  2.33it/s]
100%|██████████| 13/13 [00:03<00:00,  4.26it/s]


===> train loss: 2.14
===> val loss: 2.14, val iou: 0.03
=> saving checkpoint
Starting epoch 184
learning rate: 0.0005149333174180893


100%|██████████| 13/13 [00:05<00:00,  2.33it/s]
100%|██████████| 13/13 [00:03<00:00,  4.10it/s]


===> train loss: 2.14
===> val loss: 2.14, val iou: 0.03
=> saving checkpoint
Starting epoch 185
learning rate: 0.0004858756575557835


100%|██████████| 13/13 [00:05<00:00,  2.29it/s]
100%|██████████| 13/13 [00:03<00:00,  4.27it/s]


===> train loss: 2.14
===> val loss: 2.14, val iou: 0.03
=> saving checkpoint
Starting epoch 186
learning rate: 0.0004566234808391386


100%|██████████| 13/13 [00:05<00:00,  2.27it/s]
100%|██████████| 13/13 [00:03<00:00,  4.28it/s]


===> train loss: 2.14
===> val loss: 2.14, val iou: 0.03
=> saving checkpoint
Starting epoch 187
learning rate: 0.0004271614236989052


100%|██████████| 13/13 [00:05<00:00,  2.30it/s]
100%|██████████| 13/13 [00:03<00:00,  4.27it/s]


===> train loss: 2.14
===> val loss: 2.14, val iou: 0.03
=> saving checkpoint
Starting epoch 188
learning rate: 0.0003974716243773811


100%|██████████| 13/13 [00:05<00:00,  2.29it/s]
100%|██████████| 13/13 [00:03<00:00,  4.25it/s]


===> train loss: 2.14
===> val loss: 2.14, val iou: 0.03
=> saving checkpoint
Starting epoch 189
learning rate: 0.0003675330722208635


100%|██████████| 13/13 [00:05<00:00,  2.28it/s]
100%|██████████| 13/13 [00:03<00:00,  4.22it/s]


===> train loss: 2.14
===> val loss: 2.14, val iou: 0.03
=> saving checkpoint
Starting epoch 190
learning rate: 0.0003373207119183911


100%|██████████| 13/13 [00:05<00:00,  2.30it/s]
100%|██████████| 13/13 [00:03<00:00,  4.13it/s]


===> train loss: 2.14
===> val loss: 2.14, val iou: 0.03
=> saving checkpoint
Starting epoch 191
learning rate: 0.000306804176077273


100%|██████████| 13/13 [00:05<00:00,  2.26it/s]
100%|██████████| 13/13 [00:03<00:00,  4.26it/s]


===> train loss: 2.14
===> val loss: 2.14, val iou: 0.03
=> saving checkpoint
Starting epoch 192
learning rate: 0.0002759459322922432


100%|██████████| 13/13 [00:05<00:00,  2.30it/s]
100%|██████████| 13/13 [00:03<00:00,  4.08it/s]


===> train loss: 2.14
===> val loss: 2.14, val iou: 0.03
=> saving checkpoint
Starting epoch 193
learning rate: 0.0002446984645671695


100%|██████████| 13/13 [00:05<00:00,  2.28it/s]
100%|██████████| 13/13 [00:03<00:00,  4.21it/s]


===> train loss: 2.14
===> val loss: 2.14, val iou: 0.03
=> saving checkpoint
Starting epoch 194
learning rate: 0.00021299976955943534


100%|██████████| 13/13 [00:05<00:00,  2.28it/s]
100%|██████████| 13/13 [00:03<00:00,  4.29it/s]


===> train loss: 2.14
===> val loss: 2.14, val iou: 0.03
=> saving checkpoint
Starting epoch 195
learning rate: 0.00018076569369899074


100%|██████████| 13/13 [00:05<00:00,  2.31it/s]
100%|██████████| 13/13 [00:03<00:00,  4.13it/s]


===> train loss: 2.14
===> val loss: 2.14, val iou: 0.03
=> saving checkpoint
Starting epoch 196
learning rate: 0.0001478757636628315


100%|██████████| 13/13 [00:05<00:00,  2.30it/s]
100%|██████████| 13/13 [00:03<00:00,  4.08it/s]


===> train loss: 2.14
===> val loss: 2.14, val iou: 0.03
=> saving checkpoint
Starting epoch 197
learning rate: 0.00011414375027007427


100%|██████████| 13/13 [00:05<00:00,  2.27it/s]
100%|██████████| 13/13 [00:03<00:00,  4.15it/s]


===> train loss: 2.14
===> val loss: 2.14, val iou: 0.03
=> saving checkpoint
Starting epoch 198
learning rate: 7.924465962305572e-05


100%|██████████| 13/13 [00:05<00:00,  2.28it/s]
100%|██████████| 13/13 [00:03<00:00,  4.23it/s]


===> train loss: 2.14
===> val loss: 2.14, val iou: 0.03
=> saving checkpoint
Starting epoch 199
learning rate: 4.2466161615856206e-05


100%|██████████| 13/13 [00:05<00:00,  2.28it/s]
100%|██████████| 13/13 [00:03<00:00,  4.19it/s]


===> train loss: 2.14
===> val loss: 2.14, val iou: 0.03
=> saving checkpoint
Starting epoch 200
learning rate: 0.0


100%|██████████| 13/13 [00:05<00:00,  2.27it/s]
100%|██████████| 13/13 [00:03<00:00,  4.17it/s]


===> train loss: 2.14
===> val loss: 2.14, val iou: 0.03
=> saving checkpoint
Starting epoch 201
learning rate: (-4.038771972680238e-05+1.312276562517239e-05j)


 54%|█████▍    | 7/13 [00:03<00:03,  1.88it/s]


RuntimeError: value cannot be converted to type float without overflow

<div class="alert alert-block alert-warning"> 
  Common causes for errors during training, may include : <br>
    1. Not having <b>center images</b> for  <b>both</b> train and val directories  <br>
    2. <b>Mismatch</b> between type of center-images saved in <b>01-data.ipynb</b> and the type of center chosen in this notebook (see the <b><a href="#center"> center</a></b> parameter in the third code cell in this notebook)   <br>
    3. In case of resuming training from a previous checkpoint, please ensure that the model weights are read from the correct directory, using the <b><a href="#resume"> resume_path</a></b> parameter. Additionally, please ensure that the <b>save_dir</b> parameter for saving the model weights points to a relevant directory. 
</div>