In [1]:
from model import YOLOv3, ScalePrediction
from library import *
from config import learning_rate, checkpoint_file, ANCHORS, device, batch_size, s
from utils import load_checkpoint, convert_cells_to_bboxes, plot_image, save_checkpoint
from dataset import Dataset
from augment import test_transform
from loss import YOLOLoss
from metrics import nms
import multiprocessing
from augment import train_transform
from train import training_loop

In [2]:
# Setting the load_model to True
load_model = True
save_model = True
# Defining the model, optimizer, loss function and scaler
model = YOLOv3(num_classes = 20).to(device)
optimizer = optim.Adam(model.parameters(), lr = learning_rate)
loss_fn = YOLOLoss()
scaler = torch.cuda.amp.GradScaler()
epochs = 1
batch_size = 8
checkpoint_file = "save model/checkpoint.pth.tar"
if __name__ == '__main__':
    multiprocessing.freeze_support()
    # Loading the checkpoint
    if load_model:
        load_checkpoint(checkpoint_file, model, optimizer, learning_rate)  
    
    
    num_classes = 4
    new_model = YOLOv3(num_classes = 4).to(device)
    state_dict_source = model.state_dict()
    model = model.to('cpu')
    torch.cuda.empty_cache()


==> Loading checkpoint


In [3]:
state_dict_target = new_model.state_dict()
for i, layer_name in enumerate(state_dict_target.keys()):
    if i > 357:
        break
    state_dict_target[layer_name] = state_dict_source[layer_name]

In [4]:
new_model.load_state_dict(state_dict_target)    
print("Thanh cong!")

Thanh cong!


In [5]:
for param in new_model.layers[:10].parameters():
     param.requires_grad = False

In [2]:
load_model = True
save_model = True
new_model = YOLOv3(num_classes = 4).to(device)
new_optimizer = optim.Adam(new_model.parameters(), lr = learning_rate)
loss_fn = YOLOLoss()
scaler = torch.cuda.amp.GradScaler()

Dùng loading checkpoint khi muốn thêm dữ liệu

In [3]:
checkpoint_file = "save model/checkpoint_custom_final.pth.tar"
if load_model:
    load_checkpoint(checkpoint_file, new_model, new_optimizer, learning_rate)  

==> Loading checkpoint


In [4]:
def training_loop(loader, model, optimizer, loss_fn, scaler, scaled_anchors):
    # Creating a progress bar
    progress_bar = tqdm(loader, leave=True)
  
    # Initializing a list to store the losses
    losses = []
    all_box_loss = []
    all_object_loss = []
    all_no_object_loss = []
    all_class_loss = []
  
    # Iterating over the training data
    for _, (x, y) in enumerate(progress_bar):
        x = x.to(device)
        y0, y1, y2 = (
            y[0].to(device),
            y[1].to(device),
            y[2].to(device),
        )
        with torch.cuda.amp.autocast():
            # Getting the model predictions
            outputs = model(x)
            # Calculating the loss at each scale
            loss = (
                  loss_fn(outputs[0], y0, scaled_anchors[0])
                + loss_fn(outputs[1], y1, scaled_anchors[1])
                + loss_fn(outputs[2], y2, scaled_anchors[2])
            )
            a = loss_fn(outputs[0], y0, scaled_anchors[0], mode = "box") \
                + loss_fn(outputs[1], y1, scaled_anchors[1], mode = "box") \
                + loss_fn(outputs[2], y2, scaled_anchors[2], mode = "box")
            all_box_loss.append(a.detach().cpu().numpy())
            b = loss_fn(outputs[0], y0, scaled_anchors[0], mode = "object") \
                                + loss_fn(outputs[1], y1, scaled_anchors[1], mode = "object") \
                                + loss_fn(outputs[2], y2, scaled_anchors[2], mode = "object")
            all_object_loss.append(b.detach().cpu().numpy())
            c = loss_fn(outputs[0], y0, scaled_anchors[0], mode = "no object") \
                                + loss_fn(outputs[1], y1, scaled_anchors[1], mode = "no object") \
                                + loss_fn(outputs[2], y2, scaled_anchors[2], mode = "no object")
            all_no_object_loss.append(c.detach().cpu().numpy())
            d = loss_fn(outputs[0], y0, scaled_anchors[0], mode = "class") \
                                + loss_fn(outputs[1], y1, scaled_anchors[1], mode = "class") \
                                + loss_fn(outputs[2], y2, scaled_anchors[2], mode = "class")
            all_class_loss.append(d.detach().cpu().numpy())
        # Add the loss to the list
        losses.append(loss.item())
  
        # Reset gradients
        optimizer.zero_grad()
  
        # Backpropagate the loss
        scaler.scale(loss).backward()
  
        # Optimization step
        scaler.step(optimizer)
  
        # Update the scaler for next iteration
        scaler.update()
  
        # update progress bar with loss
        mean_loss = sum(losses) / len(losses)
        progress_bar.set_postfix(loss=mean_loss)

    
    return losses, all_box_loss, all_object_loss, all_no_object_loss, all_class_loss

In [16]:
# # Defining the train dataset
epochs = 20
# train_dataset = Dataset(
#     csv_file="more data/train_3.csv",
#     image_dir="more data/dataset_resized_2/dataset_resized_2/Img",
#     label_dir="more data/dataset_resized_2/dataset_resized_2/Label",
#     anchors=ANCHORS,
#     transform=train_transform,
#     num_classes = 4
# )
train_dataset = Dataset(
    csv_file="custom data/train.csv",
    image_dir="custom data/dataset_resized/dataset_resized/Img",
    label_dir="custom data/dataset_resized/dataset_resized/Label",
    anchors=ANCHORS,
    transform=train_transform,
    num_classes = 4
)

# # Defining the train data loader
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size = 4,
    num_workers = 2,
    shuffle = True,
    pin_memory = True,
)
# # Scaling the anchors
scaled_anchors = (
    torch.tensor(ANCHORS) * 
    torch.tensor(s).unsqueeze(1).unsqueeze(1).repeat(1,3,2)
).to(device)

# print(scaled_anchors.shape)

# # Training the model
for e in range(1, epochs+1):
    print("Epoch:", e)
    losses, all_box_loss, all_object_loss, all_no_object_loss, all_class_loss = training_loop(train_loader, new_model, new_optimizer, loss_fn, scaler, scaled_anchors)
    a = np.load("all_loss.npy")
    b = np.load("all_box_loss.npy")
    c = np.load("all_object_loss.npy")
    d = np.load("all_no_object_loss.npy")
    e = np.load("all_no_object_loss.npy")
    losses = np.concatenate((a, losses), axis=0)
    print(losses.shape)
    all_box_loss = np.concatenate((b, all_box_loss), axis=0)
    print(all_box_loss.shape)
    all_object_loss = np.concatenate((c, all_object_loss), axis=0)
    print(all_object_loss.shape)
    all_no_object_loss = np.concatenate((d, all_no_object_loss), axis=0)
    print(all_no_object_loss.shape)
    all_class_loss = np.concatenate((e, all_class_loss), axis=0)
    print(all_class_loss.shape)
    np.save("all_loss.npy", losses)
    np.save("all_box_loss.npy", all_box_loss)
    np.save("all_object_loss.npy", all_object_loss)
    np.save("all_no_object_loss.npy", all_no_object_loss)
    np.save("all_class_loss.npy", all_class_loss)
    # Saving the model
    if save_model:
        save_checkpoint(new_model, new_optimizer, filename=f"save model/checkpoint_custom_final.pth.tar")

Epoch: 1


100%|██████████| 36/36 [00:15<00:00,  2.39it/s, loss=0.368]


(4102,)
(4102,)
(4102,)
(4102,)
(4102,)
==> Saving checkpoint
Epoch: 2


100%|██████████| 36/36 [00:14<00:00,  2.50it/s, loss=0.375]


(4138,)
(4138,)
(4138,)
(4138,)
(4138,)
==> Saving checkpoint
Epoch: 3


100%|██████████| 36/36 [00:15<00:00,  2.40it/s, loss=0.354]


(4174,)
(4174,)
(4174,)
(4174,)
(4174,)
==> Saving checkpoint
Epoch: 4


100%|██████████| 36/36 [00:16<00:00,  2.14it/s, loss=0.332]


(4210,)
(4210,)
(4210,)
(4210,)
(4210,)
==> Saving checkpoint
Epoch: 5


100%|██████████| 36/36 [00:14<00:00,  2.42it/s, loss=0.366]


(4246,)
(4246,)
(4246,)
(4246,)
(4246,)
==> Saving checkpoint
Epoch: 6


100%|██████████| 36/36 [00:15<00:00,  2.38it/s, loss=0.332]


(4282,)
(4282,)
(4282,)
(4282,)
(4282,)
==> Saving checkpoint
Epoch: 7


100%|██████████| 36/36 [00:14<00:00,  2.48it/s, loss=0.337]


(4318,)
(4318,)
(4318,)
(4318,)
(4318,)
==> Saving checkpoint
Epoch: 8


100%|██████████| 36/36 [00:16<00:00,  2.24it/s, loss=0.333]


(4354,)
(4354,)
(4354,)
(4354,)
(4354,)
==> Saving checkpoint
Epoch: 9


100%|██████████| 36/36 [00:14<00:00,  2.42it/s, loss=0.377]


(4390,)
(4390,)
(4390,)
(4390,)
(4390,)
==> Saving checkpoint
Epoch: 10


100%|██████████| 36/36 [00:14<00:00,  2.44it/s, loss=0.357]


(4426,)
(4426,)
(4426,)
(4426,)
(4426,)
==> Saving checkpoint
Epoch: 11


100%|██████████| 36/36 [00:14<00:00,  2.41it/s, loss=0.322]


(4462,)
(4462,)
(4462,)
(4462,)
(4462,)
==> Saving checkpoint
Epoch: 12


100%|██████████| 36/36 [00:14<00:00,  2.47it/s, loss=0.35] 


(4498,)
(4498,)
(4498,)
(4498,)
(4498,)
==> Saving checkpoint
Epoch: 13


100%|██████████| 36/36 [00:14<00:00,  2.42it/s, loss=0.334]


(4534,)
(4534,)
(4534,)
(4534,)
(4534,)
==> Saving checkpoint
Epoch: 14


100%|██████████| 36/36 [00:14<00:00,  2.48it/s, loss=0.326]


(4570,)
(4570,)
(4570,)
(4570,)
(4570,)
==> Saving checkpoint
Epoch: 15


100%|██████████| 36/36 [00:14<00:00,  2.42it/s, loss=0.309]


(4606,)
(4606,)
(4606,)
(4606,)
(4606,)
==> Saving checkpoint
Epoch: 16


100%|██████████| 36/36 [00:14<00:00,  2.48it/s, loss=0.311]


(4642,)
(4642,)
(4642,)
(4642,)
(4642,)
==> Saving checkpoint
Epoch: 17


100%|██████████| 36/36 [00:14<00:00,  2.46it/s, loss=0.29] 


(4678,)
(4678,)
(4678,)
(4678,)
(4678,)
==> Saving checkpoint
Epoch: 18


100%|██████████| 36/36 [00:14<00:00,  2.43it/s, loss=0.306]


(4714,)
(4714,)
(4714,)
(4714,)
(4714,)
==> Saving checkpoint
Epoch: 19


100%|██████████| 36/36 [00:15<00:00,  2.37it/s, loss=0.288]


(4750,)
(4750,)
(4750,)
(4750,)
(4750,)
==> Saving checkpoint
Epoch: 20


100%|██████████| 36/36 [00:14<00:00,  2.44it/s, loss=0.276]


(4786,)
(4786,)
(4786,)
(4786,)
(4786,)
==> Saving checkpoint


In [17]:
torch.cuda.empty_cache()