In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import numpy as np
from torch.utils.data import Dataset, DataLoader
import os
import math
from tqdm import tqdm
from PIL import Image, ImageDraw
from sklearn.metrics import f1_score, precision_score, recall_score

def create_conv_block(in_channels, out_channels, kernel_size, stride, padding):
    return nn.Sequential(
        nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=False),
        nn.BatchNorm2d(out_channels),
        nn.LeakyReLU(0.1),
    )

class YOLO(nn.Module):
    def __init__(self, grid_size):
        super(YOLO, self).__init__()
        self.grid_size = grid_size
        
        self.conv1 = create_conv_block(3, 16, 3, 1, 1)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = create_conv_block(16, 32, 3, 1, 1)
        self.conv3 = create_conv_block(32, 64, 3, 1, 1)
        self.conv4 = create_conv_block(64, 128, 3, 1, 1)
        self.conv5 = create_conv_block(128, 256, 3, 1, 1)
        self.fc1 = nn.Linear(256 * (640 // 32) * (640 // 32), 1024)
        self.fc2 = nn.Linear(1024, grid_size * grid_size * (5 + 1))

    def forward(self, x):
        x = self.pool(self.conv1(x))
        x = self.pool(self.conv2(x))
        x = self.pool(self.conv3(x))
        x = self.pool(self.conv4(x))
        x = self.pool(self.conv5(x))
        x = x.view(-1, 256 * (640 // 32) * (640 // 32))
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

class CustomYOLODataset(Dataset):
    def __init__(self, image_folder, label_folder, grid_size, transform=None):
        self.image_folder = image_folder
        self.label_folder = label_folder
        self.transform = transform
        self.image_files = [f for f in os.listdir(image_folder) if f.endswith('.jpg')]
        self.grid_size=grid_size

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_folder, self.image_files[idx])
        label_path = os.path.join(self.label_folder, self.image_files[idx].replace('.jpg', '.txt'))

        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)

        #double check this transformation
        label = torch.zeros((grid_size, grid_size, 6))
        if os.path.exists(label_path):
            with open(label_path, 'r') as f:
                for line in f.readlines():
                    class_id, x, y, w, h = map(float, line.split())
                    grid_x = math.floor(x * self.grid_size)
                    grid_y = math.floor(y * self.grid_size)
                    x_offset = (x * self.grid_size) - grid_x
                    y_offset = (y * self.grid_size) - grid_y
                    label[grid_y, grid_x, :] = torch.tensor([x_offset, y_offset, w, h, 1, class_id])
        else:
            raise Exception(f'{label_path} does not exist')

        return image, label.view(-1)

transform = transforms.Compose([
    transforms.Resize((640, 640)),
    transforms.ToTensor(),
])

grid_size = 7
num_epochs = 20
batch_size = 4

train_folder = '/Users/devinma/Desktop/Stanford/CS230/DroneSegment.v1i.yolov5pytorch/train'
train_folder = '/Users/devinma/Desktop/Stanford/CS230/DroneSegment.v1i.yolov5pytorch/test'
output_folder = '/Users/devinma/Desktop/Stanford/CS230/DroneSegment.v1i.yolov5pytorch/yolo_v1_output_images'
weight_output_folder = '/Users/devinma/Desktop/Stanford/CS230/DroneSegment.v1i.yolov5pytorch/yolo_v1_weights'

dataset = CustomYOLODataset(image_folder=train_folder+'/images', label_folder=train_folder+'/labels', grid_size=grid_size, transform=transform)
train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
yolo = YOLO(grid_size=grid_size)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
yolo = yolo.to(device)

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(yolo.parameters(), lr=0.001)

# Training loop
for epoch in range(num_epochs):
    running_loss = 0.0

    for batch_idx, data in enumerate(tqdm(train_loader, desc=f"Epoch [{epoch + 1}/{num_epochs}]", unit="batch")):
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        outputs = yolo(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f'Epoch [{epoch + 1}], Loss: {running_loss / len(train_loader)}')

print('Finished Training')
torch.save(yolo.state_dict(), weight_output_folder + '/yolo_model.pth')
print('Model saved')

print('Starting Testing')
yolo.eval()
test_dataset = CustomYOLODataset(image_folder=train_folder+'/images', label_folder=train_folder+'/labels', transform=transform)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)
os.makedirs(output_folder, exist_ok=True)



Epoch [1/5]: 100%|██████████| 707/707 [14:38<00:00,  1.24s/batch]


Epoch [1], Loss: 0.0714


Epoch [2/5]: 100%|██████████| 707/707 [14:29<00:00,  1.23s/batch]


Epoch [2], Loss: 0.0283


Epoch [3/5]: 100%|██████████| 707/707 [14:34<00:00,  1.24s/batch]


Epoch [3], Loss: 0.0225


Epoch [4/5]: 100%|██████████| 707/707 [14:29<00:00,  1.23s/batch]


Epoch [4], Loss: 0.0190


Epoch [5/5]: 100%|██████████| 707/707 [14:33<00:00,  1.24s/batch]


Epoch [5], Loss: 0.0183


Epoch [6/5]: 100%|██████████| 707/707 [14:31<00:00,  1.23s/batch]


Epoch [6], Loss: 0.0156


Epoch [7/5]: 100%|██████████| 707/707 [14:31<00:00,  1.23s/batch]


Epoch [7], Loss: 0.0145


Epoch [8/5]: 100%|██████████| 707/707 [14:36<00:00,  1.24s/batch]


Epoch [8], Loss: 0.0129


Epoch [9/5]: 100%|██████████| 707/707 [14:35<00:00,  1.24s/batch]


Epoch [9], Loss: 0.0126


Epoch [10/5]: 100%|██████████| 707/707 [14:29<00:00,  1.23s/batch]


Epoch [10], Loss: 0.0115


Epoch [11/5]: 100%|██████████| 707/707 [14:24<00:00,  1.22s/batch]


Epoch [11], Loss: 0.0110


Epoch [12/5]: 100%|██████████| 707/707 [14:23<00:00,  1.22s/batch]


Epoch [12], Loss: 0.0103


Epoch [13/5]: 100%|██████████| 707/707 [14:23<00:00,  1.22s/batch]


Epoch [13], Loss: 0.0102


Epoch [14/5]: 100%|██████████| 707/707 [14:38<00:00,  1.24s/batch]


Epoch [14], Loss: 0.0098


Epoch [15/5]: 100%|██████████| 707/707 [14:21<00:00,  1.22s/batch]


Epoch [15], Loss: 0.0096


Epoch [16/5]: 100%|██████████| 707/707 [14:23<00:00,  1.22s/batch]


Epoch [16], Loss: 0.0092


Epoch [17/5]: 100%|██████████| 707/707 [14:26<00:00,  1.23s/batch]


Epoch [17], Loss: 0.0100


Epoch [18/5]: 100%|██████████| 707/707 [14:21<00:00,  1.22s/batch]


Epoch [18], Loss: 0.0093


Epoch [19/5]: 100%|██████████| 707/707 [14:22<00:00,  1.22s/batch]


Epoch [19], Loss: 0.0088


Epoch [20/5]: 100%|██████████| 707/707 [14:22<00:00,  1.22s/batch]


Epoch [20], Loss: 0.0085
Finished Training
Model saved as yolo_model.pth
Starting Testing


In [26]:
y_true = []
y_pred = []

with torch.no_grad():
    for data in test_loader:
        inputs, labels, img_path = data
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = yolo(inputs)
        predictions = torch.sigmoid(outputs).view(grid_size, grid_size, 6).cpu().numpy()
        y_true.extend(labels.cpu().numpy())
        y_pred.extend((predictions[..., 0] > 0.5).astype(int))

        # Draw boxes on the test images, double check this
        original_image = Image.open(img_path[0]).convert('RGB')
        draw = ImageDraw.Draw(original_image)

        for row in range(grid_size):
            for col in range(grid_size):
                if predictions[row, col, 4] > 0.5 and predictions[row, col, 5] < 0.5:  # If confidence score is high and classified as 0
                    x_offset, y_offset, w, h = predictions[row, col, 0:4]
                    x_center = (col + x_offset) / grid_size * 640
                    y_center = (row + y_offset) / grid_size * 640
                    box_w = w * 640
                    box_h = h * 640
                    x1 = int(x_center - box_w / 2)
                    y1 = int(y_center - box_h / 2)
                    x2 = int(x_center + box_w / 2)
                    y2 = int(y_center + box_h / 2)
                    draw.rectangle([x1, y1, x2, y2], outline='red', width=3)
        output_path = os.path.join(output_folder, os.path.basename(img_path[0]))
        original_image.save(output_path)

In [None]:
#fix bugs to get precision, recall, f1
y_true = np.concatenate(y_true).flatten()
y_pred = np.concatenate(y_pred).flatten()
y_true_binary = (y_true > 0.5).astype(int)
y_pred_binary = (y_pred > 0.5).astype(int)
f1 = f1_score(y_true_binary, y_pred_binary)
precision = precision_score(y_true_binary, y_pred_binary)
recall = recall_score(y_true_binary, y_pred_binary)

print(f'F1 Score: {f1:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')

ValueError: zero-dimensional arrays cannot be concatenated