In [None]:
# 1. Install necessary libraries
!pip install torch torchvision numpy Pillow

# 2. Imports
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader
from PIL import Image
import os
import numpy as np



Unzip Dataset

In [None]:
# Unzip the file. The -d . extracts contents directly into the current directory (/content/)
!unzip -q dataset.zip -d .

Intall and Import Libraries

In [None]:
# Install required libraries
!pip install torch torchvision numpy Pillow

# Imports
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader
from PIL import Image
import os
import numpy as np
import time




Create utils.py (Data & IoU Logic)

In [None]:
%%writefile utils.py
import torch
import numpy as np
import os
from PIL import Image

# --- UTILITY: Intersection over Union (IoU) ---
def intersection_over_union(box1, box2, box_format="midpoint"):
    """ Calculates IoU for bounding boxes. """
    # Convert [x_c, y_c, w, h] to [x1, y1, x2, y2]
    if box_format == "midpoint":
        box1_x1 = box1[..., 0:1] - box1[..., 2:3] / 2
        box1_y1 = box1[..., 1:2] - box1[..., 3:4] / 2
        box1_x2 = box1[..., 0:1] + box1[..., 2:3] / 2
        box1_y2 = box1[..., 1:2] + box1[..., 3:4] / 2

        box2_x1 = box2[..., 0:1] - box2[..., 2:3] / 2
        box2_y1 = box2[..., 1:2] - box2[..., 3:4] / 2
        box2_x2 = box2[..., 0:1] + box2[..., 2:3] / 2
        box2_y2 = box2[..., 1:2] + box2[..., 3:4] / 2

    # Find coordinates of intersection area
    x1 = torch.max(box1_x1, box2_x1)
    y1 = torch.max(box1_y1, box2_y1)
    x2 = torch.min(box1_x2, box2_x2)
    y2 = torch.min(box1_y2, box2_y2)

    intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)

    # Union area
    box1_area = abs((box1_x2 - box1_x1) * (box1_y2 - box1_y1))
    box2_area = abs((box2_x2 - box2_x1) * (box2_y2 - box2_y1))
    union = box1_area + box2_area - intersection + 1e-6 # Add epsilon for stability

    return intersection / union

# --- CUSTOM DATASET CLASS (Converts YOLO labels to Grid Tensor) ---
class CustomYOLODataset(torch.utils.data.Dataset):
    def __init__(self, data_path, S=7, B=1, C=2, transform=None):
        self.image_dir = os.path.join(data_path, 'images')
        self.label_dir = os.path.join(data_path, 'labels')
        self.transform = transform

        self.image_files = [f for f in os.listdir(self.image_dir) if f.endswith('.jpg')]

        self.S = S  # Grid Size
        self.B = B  # Boxes per cell
        self.C = C  # Classes (e.g., person, car)

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_name = self.image_files[idx]
        label_name = img_name.replace('.jpg', '.txt')

        img_path = os.path.join(self.image_dir, img_name)
        label_path = os.path.join(self.label_dir, label_name)

        image = Image.open(img_path).convert("RGB")

        boxes = []
        if os.path.exists(label_path):
            with open(label_path, 'r') as f:
                for line in f.readlines():
                    # Class x_c y_c w h are normalized to 0-1
                    class_id, x_c, y_c, w, h = map(float, line.strip().split())
                    boxes.append([class_id, x_c, y_c, w, h])

        if self.transform:
            image = self.transform(image)

        # KEY: Convert normalized boxes to the 7x7 Grid Target Tensor
        target_tensor = torch.zeros((self.S, self.S, self.C + 5 * self.B))

        for box in boxes:
            class_id, x_c, y_c, w, h = box
            class_id = int(class_id)

            # i, j are the grid cell coordinates (row, column)
            i = int(self.S * y_c)
            j = int(self.S * x_c)

            i = min(i, self.S - 1)
            j = min(j, self.S - 1)

            # x_cell, y_cell are coords relative to the cell (0 to 1)
            x_cell = self.S * x_c - j
            y_cell = self.S * y_c - i

            if target_tensor[i, j, 4] == 0:
                target_tensor[i, j, 4] = 1.0 # Set confidence
                target_tensor[i, j, 0:4] = torch.tensor([x_cell, y_cell, w, h])
                target_tensor[i, j, 5 + class_id] = 1.0 # One-hot class encoding

        return image, target_tensor

Writing utils.py


Create model.py (Architecture)

In [None]:
%%writefile model.py
import torch
import torch.nn as nn

class BasicYOLODetector(nn.Module):
    def __init__(self, in_channels=3, S=7, B=1, C=2):
        super(BasicYOLODetector, self).__init__()
        self.S = S
        self.B = B
        self.C = C

        # --- Simplified CNN Backbone ---
        self.backbone = nn.Sequential(
            nn.Conv2d(in_channels, 64, kernel_size=7, stride=2, padding=3),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # Output feature map will be (256, S, S)
        )

        # --- Detection Head ---
        # Predicts (S*S) * (C + 5*B) outputs
        self.head = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256 * self.S * self.S, 4096),
            nn.Dropout(0.5),
            nn.LeakyReLU(0.1),
            nn.Linear(4096, self.S * self.S * (self.C + 5 * self.B))
        )

    def forward(self, x):
        x = self.backbone(x)
        x = self.head(x)

        # Reshape to final grid format: (Batch, S, S, C + 5*B)
        return x.reshape(-1, self.S, self.S, self.C + 5 * self.B)

Writing model.py


Create train.py (Loss & Training Loop)


In [None]:
%%writefile train.py
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader
from time import time as t

# Import modules created above
from model import BasicYOLODetector
from utils import CustomYOLODataset, intersection_over_union

# --- HYPERPARAMETERS ---
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
LEARNING_RATE = 1e-4
BATCH_SIZE = 8
WEIGHT_DECAY = 5e-4
EPOCHS = 50
NUM_CLASSES = 2  # Adjust if you have more classes (e.g., 3 for person, car, dog)
GRID_SIZE_S = 7
BOXES_PER_CELL_B = 1
IMG_SIZE = 448
# --- DATA PATH ---
TRAIN_DATA_PATH = "./train" # Points to the unzipped folder in /content/


# --- LOSS FUNCTION ---
class YoloLoss(nn.Module):
    def __init__(self, S=7, B=1, C=2):
        super(YoloLoss, self).__init__()
        self.mse = nn.MSELoss(reduction="sum")
        self.S = S
        self.B = B
        self.C = C
        self.lambda_coord = 5.0
        self.lambda_noobj = 0.5

    def forward(self, predictions, target):
        predictions = predictions.to(DEVICE)
        target = target.to(DEVICE)

        object_mask = target[..., 4].unsqueeze(-1)

        # 1. BOX COORDINATE LOSS (Weighted)
        box_targets = object_mask * target[..., 0:4]
        box_predictions = object_mask * predictions[..., 0:4]

        # Use square root for width and height
        box_predictions[..., 2:4] = torch.sign(box_predictions[..., 2:4]) * torch.sqrt(
            torch.abs(box_predictions[..., 2:4] + 1e-6)
        )
        box_targets[..., 2:4] = torch.sqrt(box_targets[..., 2:4])

        box_loss = self.mse(box_predictions, box_targets)

        # 2. CONFIDENCE LOSS (NO object, Weighted)
        no_object_mask = (1 - object_mask)
        no_object_prediction = no_object_mask * predictions[..., 4:5]
        no_object_target = no_object_mask * target[..., 4:5]
        no_object_loss = self.mse(no_object_prediction, no_object_target)

        # 3. CONFIDENCE LOSS (WITH object)
        object_prediction = object_mask * predictions[..., 4:5]
        object_target = object_mask * target[..., 4:5]
        object_loss = self.mse(object_prediction, object_target)

        # 4. CLASSIFICATION LOSS
        class_targets = object_mask * target[..., 5:5+self.C]
        class_predictions = object_mask * predictions[..., 5:5+self.C]
        class_loss = self.mse(class_predictions, class_targets)

        # --- TOTAL LOSS --- (Normalized by batch size)
        total_loss = (
            self.lambda_coord * box_loss
            + object_loss
            + self.lambda_noobj * no_object_loss
            + class_loss
        ) / BATCH_SIZE

        return total_loss

# --- MAIN TRAINING FUNCTION ---

def train_fn(train_loader, model, optimizer, loss_fn):
    model.train()
    total_loss = 0.0

    for i, (image, target) in enumerate(train_loader):
        image = image.to(DEVICE)

        predictions = model(image)
        loss = loss_fn(predictions, target)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(train_loader)
    return avg_loss


if __name__ == "__main__":

    transform = transforms.Compose([
        transforms.Resize((IMG_SIZE, IMG_SIZE)),
        transforms.ToTensor(),
    ])

    try:
        train_dataset = CustomYOLODataset(
            data_path=TRAIN_DATA_PATH,
            S=GRID_SIZE_S,
            B=BOXES_PER_CELL_B,
            C=NUM_CLASSES,
            transform=transform
        )
    except FileNotFoundError:
        print("\n--- ERROR: Dataset not found ---")
        print(f"Please check if the TRAIN_DATA_PATH: {TRAIN_DATA_PATH} is correct and contains 'images' and 'labels' subfolders.")
        exit()


    train_loader = DataLoader(
        dataset=train_dataset,
        batch_size=BATCH_SIZE,
        shuffle=True,
        num_workers=2,
        pin_memory=True
    )

    model = BasicYOLODetector(S=GRID_SIZE_S, B=BOXES_PER_CELL_B, C=NUM_CLASSES).to(DEVICE)
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
    loss_fn = YoloLoss(S=GRID_SIZE_S, B=BOXES_PER_CELL_B, C=NUM_CLASSES)

    print(f"Starting training on {DEVICE} for {EPOCHS} epochs...")
    start_time = t()

    for epoch in range(EPOCHS):
        epoch_start_time = t()
        avg_loss = train_fn(train_loader, model, optimizer, loss_fn)
        epoch_time = t() - epoch_start_time
        print(f"--- Epoch {epoch + 1}/{EPOCHS} --- Loss: {avg_loss:.4f} (Time: {epoch_time:.2f}s)")

    total_training_time = t() - start_time
    # --- FINAL STEP: SAVE THE TRAINED MODEL WEIGHTS ---
    MODEL_SAVE_PATH = "basic_detector_model.pth"
    torch.save(model.state_dict(), MODEL_SAVE_PATH)
    print(f"\nTraining finished! Total time: {total_training_time:.2f}s")
    print(f"Model weights saved to: /content/{MODEL_SAVE_PATH}")

Writing train.py


Training

In [None]:
!python train.py

Starting training on cuda for 50 epochs...
Traceback (most recent call last):
  File "/content/train.py", line 141, in <module>
    avg_loss = train_fn(train_loader, model, optimizer, loss_fn)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/content/train.py", line 90, in train_fn
    predictions = model(image)
                  ^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1784, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/content/model.py", line 36, in forward
    x = self.head(x)
        ^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
     

Fixing Error

In [None]:
%%writefile model.py
import torch
import torch.nn as nn

class BasicYOLODetector(nn.Module):
    def __init__(self, in_channels=3, S=7, B=1, C=2):
        super(BasicYOLODetector, self).__init__()
        self.S = S
        self.B = B
        self.C = C

        # --- Simplified CNN Backbone ---
        self.backbone = nn.Sequential(
            nn.Conv2d(in_channels, 64, kernel_size=7, stride=2, padding=3),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # Output size: 112x112
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # Output size: 56x56
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # Output size: 28x28 (Final Feature Map)
        )

        # --- Detection Head ---

        # The input size is 256 (channels) * 28 * 28 (feature map spatial size)
        FINAL_FEATURE_SIZE = 256 * 28 * 28 # = 200704
        OUTPUT_SIZE = self.S * self.S * (self.C + 5 * self.B)

        self.head = nn.Sequential(
            nn.Flatten(),
            nn.Linear(FINAL_FEATURE_SIZE, 4096), # FIXED INPUT SIZE
            nn.Dropout(0.5),
            nn.LeakyReLU(0.1),
            nn.Linear(4096, OUTPUT_SIZE)
        )

    def forward(self, x):
        x = self.backbone(x)
        x = self.head(x)

        return x.reshape(-1, self.S, self.S, self.C + 5 * self.B)

Overwriting model.py


Rerun

In [None]:
!python train.py

Starting training on cuda for 50 epochs...
Traceback (most recent call last):
  File "/content/train.py", line 141, in <module>
    avg_loss = train_fn(train_loader, model, optimizer, loss_fn)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/content/train.py", line 95, in train_fn
    optimizer.step()
  File "/usr/local/lib/python3.12/dist-packages/torch/optim/optimizer.py", line 516, in wrapper
    out = func(*args, **kwargs)
          ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/torch/optim/optimizer.py", line 81, in _use_grad
    ret = func(*args, **kwargs)
          ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/torch/optim/adam.py", line 247, in step
    adam(
  File "/usr/local/lib/python3.12/dist-packages/torch/optim/optimizer.py", line 149, in maybe_fallback
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/torch/optim/adam.py", line 949, in a

Reducing Batch Size and Image Size

In [None]:
%%writefile train.py
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader
from time import time as t

# Import modules created above
from model import BasicYOLODetector
from utils import CustomYOLODataset, intersection_over_union

# --- HYPERPARAMETERS ---
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
LEARNING_RATE = 1e-4
BATCH_SIZE = 4
WEIGHT_DECAY = 5e-4
EPOCHS = 50
NUM_CLASSES = 2  # Adjust if you have more classes (e.g., 3 for person, car, dog)
GRID_SIZE_S = 7
BOXES_PER_CELL_B = 1
IMG_SIZE = 224
# --- DATA PATH ---
TRAIN_DATA_PATH = "./train" # Points to the unzipped folder in /content/


# --- LOSS FUNCTION ---
class YoloLoss(nn.Module):
    def __init__(self, S=7, B=1, C=2):
        super(YoloLoss, self).__init__()
        self.mse = nn.MSELoss(reduction="sum")
        self.S = S
        self.B = B
        self.C = C
        self.lambda_coord = 5.0
        self.lambda_noobj = 0.5

    def forward(self, predictions, target):
        predictions = predictions.to(DEVICE)
        target = target.to(DEVICE)

        object_mask = target[..., 4].unsqueeze(-1)

        # 1. BOX COORDINATE LOSS (Weighted)
        box_targets = object_mask * target[..., 0:4]
        box_predictions = object_mask * predictions[..., 0:4]

        # Use square root for width and height
        box_predictions[..., 2:4] = torch.sign(box_predictions[..., 2:4]) * torch.sqrt(
            torch.abs(box_predictions[..., 2:4] + 1e-6)
        )
        box_targets[..., 2:4] = torch.sqrt(box_targets[..., 2:4])

        box_loss = self.mse(box_predictions, box_targets)

        # 2. CONFIDENCE LOSS (NO object, Weighted)
        no_object_mask = (1 - object_mask)
        no_object_prediction = no_object_mask * predictions[..., 4:5]
        no_object_target = no_object_mask * target[..., 4:5]
        no_object_loss = self.mse(no_object_prediction, no_object_target)

        # 3. CONFIDENCE LOSS (WITH object)
        object_prediction = object_mask * predictions[..., 4:5]
        object_target = object_mask * target[..., 4:5]
        object_loss = self.mse(object_prediction, object_target)

        # 4. CLASSIFICATION LOSS
        class_targets = object_mask * target[..., 5:5+self.C]
        class_predictions = object_mask * predictions[..., 5:5+self.C]
        class_loss = self.mse(class_predictions, class_targets)

        # --- TOTAL LOSS --- (Normalized by batch size)
        total_loss = (
            self.lambda_coord * box_loss
            + object_loss
            + self.lambda_noobj * no_object_loss
            + class_loss
        ) / BATCH_SIZE

        return total_loss

# --- MAIN TRAINING FUNCTION ---

def train_fn(train_loader, model, optimizer, loss_fn):
    model.train()
    total_loss = 0.0

    for i, (image, target) in enumerate(train_loader):
        image = image.to(DEVICE)

        predictions = model(image)
        loss = loss_fn(predictions, target)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(train_loader)
    return avg_loss


if __name__ == "__main__":

    transform = transforms.Compose([
        transforms.Resize((IMG_SIZE, IMG_SIZE)),
        transforms.ToTensor(),
    ])

    try:
        train_dataset = CustomYOLODataset(
            data_path=TRAIN_DATA_PATH,
            S=GRID_SIZE_S,
            B=BOXES_PER_CELL_B,
            C=NUM_CLASSES,
            transform=transform
        )
    except FileNotFoundError:
        print("\n--- ERROR: Dataset not found ---")
        print(f"Please check if the TRAIN_DATA_PATH: {TRAIN_DATA_PATH} is correct and contains 'images' and 'labels' subfolders.")
        exit()


    train_loader = DataLoader(
        dataset=train_dataset,
        batch_size=BATCH_SIZE,
        shuffle=True,
        num_workers=2,
        pin_memory=True
    )

    model = BasicYOLODetector(S=GRID_SIZE_S, B=BOXES_PER_CELL_B, C=NUM_CLASSES).to(DEVICE)
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
    loss_fn = YoloLoss(S=GRID_SIZE_S, B=BOXES_PER_CELL_B, C=NUM_CLASSES)

    print(f"Starting training on {DEVICE} for {EPOCHS} epochs...")
    start_time = t()

    for epoch in range(EPOCHS):
        epoch_start_time = t()
        avg_loss = train_fn(train_loader, model, optimizer, loss_fn)
        epoch_time = t() - epoch_start_time
        print(f"--- Epoch {epoch + 1}/{EPOCHS} --- Loss: {avg_loss:.4f} (Time: {epoch_time:.2f}s)")

    total_training_time = t() - start_time
    # --- FINAL STEP: SAVE THE TRAINED MODEL WEIGHTS ---
    MODEL_SAVE_PATH = "basic_detector_model.pth"
    torch.save(model.state_dict(), MODEL_SAVE_PATH)
    print(f"\nTraining finished! Total time: {total_training_time:.2f}s")
    print(f"Model weights saved to: /content/{MODEL_SAVE_PATH}")

Overwriting train.py


In [None]:
!python train.py

Starting training on cuda for 50 epochs...
Traceback (most recent call last):
  File "/content/train.py", line 141, in <module>
    avg_loss = train_fn(train_loader, model, optimizer, loss_fn)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/content/train.py", line 90, in train_fn
    predictions = model(image)
                  ^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1784, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/content/model.py", line 40, in forward
    x = self.head(x)
        ^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
     

Fixing dimension mismatch error

In [None]:
%%writefile model.py
import torch
import torch.nn as nn

class BasicYOLODetector(nn.Module):
    def __init__(self, in_channels=3, S=7, B=1, C=2):
        super(BasicYOLODetector, self).__init__()
        self.S = S
        self.B = B
        self.C = C

        # --- Simplified CNN Backbone (UNCHANGED LAYERS) ---
        self.backbone = nn.Sequential(
            nn.Conv2d(in_channels, 64, kernel_size=7, stride=2, padding=3),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
        )

        # --- Detection Head (FIXED INPUT SIZE) ---
        # The input size must match the actual output of the backbone: 256 * H * W
        # Assuming IMG_SIZE=224 was used, H*W = 14*14
        FINAL_FEATURE_SIZE = 256 * 14 * 14 # = 50176 (MATCHES ERROR TRACE)
        OUTPUT_SIZE = self.S * self.S * (self.C + 5 * self.B)

        self.head = nn.Sequential(
            nn.Flatten(),
            nn.Linear(FINAL_FEATURE_SIZE, 4096), # FIXED: Uses 50176 as input size
            nn.Dropout(0.5),
            nn.LeakyReLU(0.1),
            nn.Linear(4096, OUTPUT_SIZE)
        )

    def forward(self, x):
        x = self.backbone(x)
        x = self.head(x)

        return x.reshape(-1, self.S, self.S, self.C + 5 * self.B)

Overwriting model.py


Start Training

In [None]:
!python train.py

Starting training on cuda for 50 epochs...
--- Epoch 1/50 --- Loss: 9.4470 (Time: 22.99s)
--- Epoch 2/50 --- Loss: 7.1794 (Time: 22.52s)
--- Epoch 3/50 --- Loss: 5.9684 (Time: 22.69s)
--- Epoch 4/50 --- Loss: 4.9764 (Time: 22.23s)
--- Epoch 5/50 --- Loss: 4.1942 (Time: 22.28s)
--- Epoch 6/50 --- Loss: 3.6067 (Time: 22.29s)
--- Epoch 7/50 --- Loss: 3.1940 (Time: 22.29s)
--- Epoch 8/50 --- Loss: 3.4673 (Time: 22.38s)
--- Epoch 9/50 --- Loss: 3.0225 (Time: 22.27s)
--- Epoch 10/50 --- Loss: 2.4428 (Time: 22.25s)
--- Epoch 11/50 --- Loss: 2.3059 (Time: 22.27s)
--- Epoch 12/50 --- Loss: 2.1518 (Time: 22.27s)
--- Epoch 13/50 --- Loss: 1.9649 (Time: 22.17s)
--- Epoch 14/50 --- Loss: 1.8396 (Time: 22.31s)
--- Epoch 15/50 --- Loss: 1.8856 (Time: 22.36s)
--- Epoch 16/50 --- Loss: 1.7175 (Time: 22.30s)
--- Epoch 17/50 --- Loss: 1.6137 (Time: 22.12s)
--- Epoch 18/50 --- Loss: 1.5093 (Time: 22.32s)
--- Epoch 19/50 --- Loss: 1.4675 (Time: 22.27s)
--- Epoch 20/50 --- Loss: 1.3952 (Time: 22.29s)
--- Ep

Downloading the model

In [None]:
from google.colab import files
files.download('/content/basic_detector_model.pth')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>