### Setup

In [1]:
# The notebook is intended to be run inside a tensorflow docker container
# To run without container, run: %pip install torch torchvision
# Additional packages
# %pip install matplotlib numpy pillow opencv-python-headless torchvision torch>=1.0 tensorflow tensorboard terminaltables tqdm
%pip install matplotlib
%pip install numpy
%pip install pillow
%pip install opencv-python-headless
# %pip install torchvision
# %pip install torch>=1.0
# %pip install tensorflow
# %pip install tensorboard
%pip install terminaltables
%pip install tqdm

[0mNote: you may need to restart the kernel to use updated packages.
[0mNote: you may need to restart the kernel to use updated packages.
[0mNote: you may need to restart the kernel to use updated packages.
[0mNote: you may need to restart the kernel to use updated packages.
Collecting terminaltables
  Downloading terminaltables-3.1.10-py2.py3-none-any.whl (15 kB)
Installing collected packages: terminaltables
Successfully installed terminaltables-3.1.10
[0mNote: you may need to restart the kernel to use updated packages.
Collecting tqdm
  Downloading tqdm-4.66.1-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.3/78.3 KB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tqdm
Successfully installed tqdm-4.66.1
[0mNote: you may need to restart the kernel to use updated packages.


### Variables


In [4]:
# Some constants
RGB_MAX = 255.0
BASE_DIR = '/app/project/' # Mounted project directory inside container
IMG_SIZE = 250 # Always square

In [5]:
import os
# Paths for data
weights_dir = os.path.join(BASE_DIR, 'weights')
rico_datasets = {"rico": os.path.join(weights_dir, 'rico'), 
                 "rico2k": os.path.join(weights_dir, 'rico2k'), 
                 "rico5box": os.path.join(weights_dir, 'rico5box'), 
                 "rico10k": os.path.join(weights_dir, 'rico10k'), 
                 "ricotext": os.path.join(weights_dir, 'ricotext')}
base_data_dir = os.path.join(BASE_DIR, 'tmp', 'data')
base_train_dir = os.path.join(base_data_dir, 'train')
base_test_dir = os.path.join(base_data_dir, 'test')
extensions = {"image": '.jpg', "label": '.txt'}
classes = {
    "lv_btn": {"name": "button", "index": 0},
    "lv_checkbox": {"name": "checkbox", "index": 1},
    "lv_label": {"name": "label", "index": 2},
    "lv_slider": {"name": "slider", "index": 3},
    "lv_switch": {"name": "switch", "index": 4}
}
class_names = [classes[key]["name"] for key in classes.keys()]
widget_names = [key for key in classes.keys()]

### Functions

### Data import

In [6]:
import torch
# Check if CUDA (GPU support) is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [7]:
from torch.utils.data import Dataset, DataLoader
from PIL import Image

class YoloDataset(Dataset):
    def __init__(self, image_folder, label_folder, S=[13, 26, 52], B=3, C=80, transform=None):
        self.image_files = [os.path.join(image_folder, x) for x in os.listdir(image_folder)]
        self.label_files = [os.path.join(label_folder, x) for x in os.listdir(label_folder)]
        self.image_size = 416  # YOLOv3 uses 416x416 images
        self.S = S  # List of scales
        self.B = B  # Number of bounding boxes
        self.C = C  # Number of classes
        self.transform = transform

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, index):
        # Load and transform the image
        image_file = self.image_files[index]
        image = Image.open(image_file).convert('RGB')
        if self.transform:
            image = self.transform(image)

        # Initialize label tensors for each scale
        label_tensor_list = [torch.zeros((self.C + 5) * self.B, s, s) for s in self.S]

        # Get correct label file for the image 
        # (rpartition will split from end of string to character: 0=path, 1=basename)
        label_index = self.label_files.index(image_file.rpartition('/')[1]) # Use image basename to find label
        # Load label
        label_file = self.label_files[label_index]
        boxes = []
        with open(label_file) as f:
            for line in f.readlines(): # Each line is a box
                class_id, x_center, y_center, width, height = [
                    float(x) for x in line.replace('\n', '').split()
                ]
                boxes.append([class_id, x_center, y_center, width, height, 1])

        # Fill the label tensors
        for box in boxes:
            class_id, x_center, y_center, width, height, confidence = box
            # We assume that the annotations were normalized by the width and height of the image.
            # i.e., x_center and width are divided by the width of the image
            # and similarly for y_center and height.

            # Assign the box to the tensor corresponding to the scale
            for scale_idx, s in enumerate(self.S):
                i, j = int(s * y_center), int(s * x_center)  # Which grid cell
                anchor_on_scale = scale_idx  # Which anchor (here we're just using the index)
                
                # Locate the cell responsible and assign the bounding box
                label_tensor = label_tensor_list[scale_idx]
                label_tensor[..., i, j] = torch.tensor(
                    [x_center, y_center, width, height, confidence] + [0] * self.C
                )
                label_tensor[class_id, i, j] = 1

        return image, label_tensor_list

# Usage
image_dir = os.path.join(base_train_dir, 'images')
label_dir = os.path.join(base_train_dir, 'labels')
dataset = YoloDataset(image_dir, label_dir)
loader = DataLoader(dataset, batch_size=16, shuffle=True)

FileNotFoundError: [Errno 2] No such file or directory: '/app/project/tmp/data/train/images'

- `S` is a list of the sizes of the feature maps at different scales.
- `B` is the number of anchors used.
- `C` is the number of classes in the dataset.
- The label tensors are initialized as zero tensors for each feature map scale.
- For each object in the image, the correct cell in each scale's feature map is located, and the bounding box and class label are placed in the corresponding position in the label tensor.
- The bounding boxes are assumed to be normalized, with coordinates as fractions of the image dimensions. You may need to adjust this if your labels are in a different format.

In [None]:
from torchvision.transforms import transforms
from models import ModelYOLOv3  # This should match the class name of the YOLOv3 model in your models.py

# Initialize the YOLOv3 model
num_classes = 80  # Replace with the number of classes in your dataset
model = ModelYOLOv3(num_classes).to(device)  # Make sure to pass the correct number of classes to the model

# Load pre-trained weights (if available)
model.load_state_dict(torch.load(rico_datasets["rico"], map_location=device))

# Continue with setting up your dataset and data loaders
image_folder = 'path/to/images'
label_folder = 'path/to/labels'

# Define your transformations
transform = transforms.Compose([
    transforms.Resize((416, 416)),  # Resize to input size
    transforms.ToTensor(),  # Convert to tensor
])

# Instantiate your custom dataset
dataset = YoloDataset(image_folder, label_folder, transform=transform)
loader = DataLoader(dataset, batch_size=16, shuffle=True)

# Training loop
model.train()  # Set the model to training mode
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)  # Define your optimizer
for epoch in range(num_epochs):
    for imgs, targets in loader:
        imgs, targets = imgs.to(device), [target.to(device) for target in targets]  # Move to the appropriate device
        optimizer.zero_grad()
        output = model(imgs)  # Forward pass
        loss = compute_loss(output, targets)  # You'll need to define compute_loss according to YOLOv3 loss
        loss.backward()  # Backward pass
        optimizer.step()  # Update weights
        print(f"Epoch [{epoch}/{num_epochs}], Loss: {loss.item():.4f}")

# compute_loss is a function you will need to define. It should calculate the loss for YOLOv3.
# This will involve calculating the objectness loss, the class prediction loss,
# and the bounding box regression loss. You will likely need to iterate through
# each of the three scales that YOLOv3 outputs and calculate the loss for each,
# then sum them up.

In [None]:
# Optimizer and loss function
from loss import YoloLoss  # This should be the path to your YOLOv3 loss function implementation

criterion = YoloLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

In [None]:
# Training
num_epochs = 50
for epoch in range(num_epochs):
    for images, labels in loader:
        images, labels = images.to(device), labels.to(device)
        
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        print(f"Epoch [{epoch}/{num_epochs}], Loss: {loss.item():.4f}")
