In [1]:
import import_ipynb
from config import (
    device, num_classes, num_epochs, out_dir,
    visualize_transformed_images, num_workers,
)
from model import create_model
from custom_utils import Averager, SaveBestModel, save_model, save_loss_plot
from tqdm.auto import tqdm
from datasets import (
    create_train_dataset, create_valid_dataset, 
    create_train_loader, create_valid_loader
)
import torch
import matplotlib.pyplot as plt
import time

importing Jupyter notebook from config.ipynb
importing Jupyter notebook from model.ipynb
importing Jupyter notebook from custom_utils.ipynb
importing Jupyter notebook from datasets.ipynb


In [2]:
plt.style.use('ggplot')

In [3]:
def train(train_data_loader, model):
    print('Training')
    global train_itr
    global train_loss_list
    
     # initialize tqdm progress bar
    prog_bar = tqdm(train_data_loader, total=len(train_data_loader))
    
    for i, data in enumerate(prog_bar):
        optimizer.zero_grad()
        images, targets = data
        
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        loss_value = losses.item()
        train_loss_list.append(loss_value)
        train_loss_hist.send(loss_value)
        losses.backward()
        optimizer.step()
        train_itr += 1
    
        # update the loss value beside the progress bar for each iteration
        prog_bar.set_description(desc=f"Loss: {loss_value:.4f}")
    return train_loss_list

In [4]:
def validate(valid_data_loader, model):
    print('Validating')
    global val_itr
    global val_loss_list
    
    prog_bar = tqdm(valid_data_loader, total=len(valid_data_loader))
    
    for i, data in enumerate(prog_bar):
        images, targets = data
        
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        
        with torch.no_grad():
            loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        loss_value = losses.item()
        val_loss_list.append(loss_value)
        val_loss_hist.send(loss_value)
        val_itr += 1
        # 更新進度條旁邊的loss
        prog_bar.set_description(desc=f"Loss: {loss_value:.4f}")
    return val_loss_list

In [5]:
if __name__ == '__main__':
    train_dataset = create_train_dataset()
    valid_dataset = create_valid_dataset()
    train_loader = create_train_loader(train_dataset, num_workers)
    valid_loader = create_valid_loader(valid_dataset, num_workers)
    print(f"Number of training samples: {len(train_dataset)}")
    print(f"Number of validation samples: {len(valid_dataset)}\n")
    # init
    model = create_model(num_classes=num_classes)
    model = model.to(device)
    # 獲得參數
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.001, momentum=0.9, weight_decay=0.0005)
    # init
    train_loss_hist = Averager()
    val_loss_hist = Averager()
    train_itr = 1
    val_itr = 1
    # 存取loss方便繪圖
    train_loss_list = []
    val_loss_list = []
    MODEL_NAME = 'model'
#     if visualize_transformed_images:
    if False:
        from custom_utils import show_tranformed_image
        show_tranformed_image(train_loader)
    # init
    save_best_model = SaveBestModel()
    # start
    for epoch in range(num_epochs):
        print(f"\nEPOCH {epoch+1} of {num_epochs}")
        train_loss_hist.reset()
        val_loss_hist.reset()
        # start timer
        start = time.time()
        train_loss = train(train_loader, model)
        val_loss = validate(valid_loader, model)
        print(f"Epoch #{epoch+1} train loss: {train_loss_hist.value:.3f}")   
        print(f"Epoch #{epoch+1} validation loss: {val_loss_hist.value:.3f}")   
        end = time.time()
        print(f"Took {((end - start) / 60):.3f} minutes for epoch {epoch}")
        save_best_model(
            val_loss_hist.value, epoch, model, optimizer
        )
        save_model(epoch, model, optimizer)
        save_loss_plot(out_dir, train_loss, val_loss)

        time.sleep(5)

Number of training samples: 13299
Number of validation samples: 3302



  f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "



EPOCH 1 of 10
Training


  0%|          | 0/1663 [00:00<?, ?it/s]

Validating


  0%|          | 0/413 [00:00<?, ?it/s]

Epoch #1 train loss: 0.452
Epoch #1 validation loss: 0.321
Took 40.707 minutes for epoch 0

Best validation loss: 0.3205595593944589

Saving best model for epoch: 1

SAVING PLOTS COMPLETE...

EPOCH 2 of 10
Training


  0%|          | 0/1663 [00:00<?, ?it/s]

Validating


  0%|          | 0/413 [00:00<?, ?it/s]

Epoch #2 train loss: 0.344
Epoch #2 validation loss: 0.288
Took 32.516 minutes for epoch 1

Best validation loss: 0.2880043705641213

Saving best model for epoch: 2

SAVING PLOTS COMPLETE...

EPOCH 3 of 10
Training


  0%|          | 0/1663 [00:00<?, ?it/s]

Validating


  0%|          | 0/413 [00:00<?, ?it/s]

Epoch #3 train loss: 0.323
Epoch #3 validation loss: 0.278
Took 43.531 minutes for epoch 2

Best validation loss: 0.27814974065770826

Saving best model for epoch: 3

SAVING PLOTS COMPLETE...

EPOCH 4 of 10
Training


  0%|          | 0/1663 [00:00<?, ?it/s]

Validating


  0%|          | 0/413 [00:00<?, ?it/s]

Epoch #4 train loss: 0.311
Epoch #4 validation loss: 0.273
Took 37.312 minutes for epoch 3

Best validation loss: 0.27294119378917153

Saving best model for epoch: 4

SAVING PLOTS COMPLETE...

EPOCH 5 of 10
Training


  0%|          | 0/1663 [00:00<?, ?it/s]

Validating


  0%|          | 0/413 [00:00<?, ?it/s]

Epoch #5 train loss: 0.303
Epoch #5 validation loss: 0.270
Took 47.502 minutes for epoch 4

Best validation loss: 0.26975517627979306

Saving best model for epoch: 5

SAVING PLOTS COMPLETE...

EPOCH 6 of 10
Training


  0%|          | 0/1663 [00:00<?, ?it/s]

Validating


  0%|          | 0/413 [00:00<?, ?it/s]

Epoch #6 train loss: 0.298
Epoch #6 validation loss: 0.269
Took 44.448 minutes for epoch 5

Best validation loss: 0.2688012185707219

Saving best model for epoch: 6

SAVING PLOTS COMPLETE...

EPOCH 7 of 10
Training


  0%|          | 0/1663 [00:00<?, ?it/s]

Validating


  0%|          | 0/413 [00:00<?, ?it/s]

Epoch #7 train loss: 0.293
Epoch #7 validation loss: 0.268
Took 36.389 minutes for epoch 6

Best validation loss: 0.267581511496343

Saving best model for epoch: 7

SAVING PLOTS COMPLETE...

EPOCH 8 of 10
Training


  0%|          | 0/1663 [00:00<?, ?it/s]

Validating


  0%|          | 0/413 [00:00<?, ?it/s]

Epoch #8 train loss: 0.290
Epoch #8 validation loss: 0.267
Took 34.695 minutes for epoch 7

Best validation loss: 0.2668947107530679

Saving best model for epoch: 8

SAVING PLOTS COMPLETE...

EPOCH 9 of 10
Training


  0%|          | 0/1663 [00:00<?, ?it/s]

Validating


  0%|          | 0/413 [00:00<?, ?it/s]

Epoch #9 train loss: 0.286
Epoch #9 validation loss: 0.264
Took 31.313 minutes for epoch 8

Best validation loss: 0.2643428235777065

Saving best model for epoch: 9

SAVING PLOTS COMPLETE...

EPOCH 10 of 10
Training


  0%|          | 0/1663 [00:00<?, ?it/s]

Validating


  0%|          | 0/413 [00:00<?, ?it/s]

Epoch #10 train loss: 0.283
Epoch #10 validation loss: 0.266
Took 29.746 minutes for epoch 9
SAVING PLOTS COMPLETE...
