In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms.functional as FT
from tqdm import tqdm
from torch.utils.data import DataLoader
from classes import ret_classes
from loss import loss
from utils import ConvBlock,GlobalAvgPool2d,Compose
from models import darkNet,YOLOv1
from Datasets import ImageNet,VOCDataset

Model Architecture

(filter size,channels in,no. of filters, stride, padding)

(DarkNet architecture):

(7,3,64,2,3)


M


(3,64,192,1,s)


M


(1,192,128,1,s)


(3,128,256,1,s)


(1,256,256,1,s)


(3,256,512,1,s)


M


[

(1,512,256,1,s)

(3,256,512,1,s)

repeat 4 times]


(1,512,512,1,s)


(3,512,1024,1,s)


M


[

(1,1024,512,1,s)

(3,512,1024,1,s)

repeat 2 times]

------

(The rest of the YOLO architecture):

(3,1024,1024,1,s)


(3,1024,1024,2,1)


(3,1024,1024,1,s)


(3,1024,1024,1,s)

In [2]:
seed = 100
torch.manual_seed(seed)

lr = 0.001
loss_fn = nn.CrossEntropyLoss(reduction = 'sum')
device = "mps" if torch.backends.mps.is_available() else "cpu"
#device = "cpu"
device = torch.device(device)
batch = 8
weight_decay = 0.0005   #0.0005
epochs = 5

In [3]:
DN = darkNet().to(device)
train = ImageNet(classes=ret_classes())
val = ImageNet(classes=ret_classes(),train = False)
optimizer = optim.Adam(DN.parameters(), lr=lr, weight_decay=weight_decay)

In [4]:
train_loader = DataLoader(
        dataset=train,
        batch_size=batch,
        shuffle=True,
)

test_loader = DataLoader(
        dataset=val,
        batch_size=batch,
        shuffle=True,
)

tr_acc_info = []
loss_info = []
val_acc_info = []
cost_info = []
max_val_acc = 0

In [None]:
checkpoint = torch.load("DN_best.pth")
print("=> Loading checkpoint")
DN.load_state_dict(checkpoint["state_dict"])
optimizer.load_state_dict(checkpoint["optimizer"])
max_val_acc = checkpoint["accuracy"]

In [None]:
import json
file = open('performance.json')
print("=> Loading performance")
performance = json.load(file)
tr_acc_info = performance["training accuracy"]
loss_info = performance["loss"]
val_acc_info = performance["validation accuracy"]
cost_info = performance["cost"]

In [None]:
for epoch in range(1):
    
    loop = tqdm(train_loader)
    total_loss = 0
    tr_crct = 0
    
    for x,y in loop:
        x, y = x.to(device), y.to(device)
        DN = DN.to(device)
        out = DN(x)
        
        loss = loss_fn(out, y)
        
        _,y_hat = torch.max(out,dim=1)
        tr_crct +=  (y_hat == y).sum().item()
        total_loss += loss.item()

        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        loop.set_postfix(loss=loss.item())
        
        loss_info.append([])
        loss_info[-1].extend([loss.item()])
        
        torch.mps.empty_cache()
        

    print(f"Cost of epoch was {total_loss/60000}")
    print(f"Accuracy of epoch was {tr_crct/60000}")   
    
    cost_info.append(total_loss/60000)
    tr_acc_info.append(tr_crct/60000)
    
    
    DN.eval()
    #only 10% of the test set at a time
    val_crct = 0
    for i in range(625): #8 at a time 5000 in total per epoch
        x_val,y_val = next(iter(test_loader))
        x_val,y_val = x_val.to(device),y_val.to(device)
        DN = DN.to(device)

        out_val = DN(x_val)
        _,y_val_hat = torch.max(out_val,dim=1)
        #top 1 accuracy
        val_crct += (y_val_hat == y_val).sum().item()
        #top 5 accuracy

        torch.mps.empty_cache()
    DN.train()
    
    val_acc = (val_crct/5000)
    print(f"Accuracy of epoch on 5000 test samples was {val_acc}")
    val_acc_info.append(val_acc)

    checkpoint = {
                "state_dict": DN.state_dict(),
                "optimizer": optimizer.state_dict(),
                "accuracy": val_acc
           }
    if val_acc > max_val_acc :
        max_val_acc = val_acc
        print("=> Saving checkpoint (BEST)")
        torch.save(checkpoint, "DN_best.pth")
    
    print("=> Saving checkpoint (LAST)")
    torch.save(checkpoint, "DN_last.pth")
#3 epochs

In [None]:
performance = {
    "training accuracy" : tr_acc_info,
    "loss" : loss_info,
    "validation accuracy" : val_acc_info,
    "cost" : cost_info
}

In [None]:
save_file = open("performance.json", "w")  
json.dump(performance, save_file, indent = 6)  
save_file.close() 

In [None]:
import matplotlib.pyplot as plt

plt.plot(loss_info)
plt.show()

In [None]:
l = loss()
p = torch.randn((2,3,448,448))
t = torch.randn((2,7,7,25))

In [None]:
m2 = YOLOv1(pretrained_backbone=True).to('cpu')
p_2 = m2(p)
l(p_2,t)

In [None]:
m1 = YOLOv1().to('cpu')
p_1 = m1(p)
l(p_1,t)

In [None]:
x = []
x.append([])
x[-1].extend([3])
x[-1].extend([5])
x

TRAINING PART

Hyper parameters

In [None]:
seed = 100
torch.manual_seed(seed)

learning_rate = 0.0001   #schedueling 0.001->0.01 for 75 epochs 0.001 for 30 0.0001 for 30
#device = "mps" if torch.backends.mps.is_available() else "cpu"
device = "cpu"
batch = 16
weight_decay = 0       #0.0005
epochs = 30
PIN_MEMORY = False
IMG_DIR = "data/images"
LABEL_DIR = "data/labels"
transform = Compose([transforms.Resize((448, 448)), transforms.ToTensor()])


In [None]:
def train_fn(train_loader, model, optimizer, loss_fn): #train
    loop = tqdm(train_loader, leave=True)
    mean_loss = []

    for batch_idx, (x, y) in enumerate(loop):
        x, y = x.to(device), y.to(device)
        out = model(x)
        loss = loss_fn(out, y)
        mean_loss.append(loss.item())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        loop.set_postfix(loss=loss.item())
    
    print(f"Mean loss was {sum(mean_loss)/len(mean_loss)}")

In [None]:
model = YOLOv1(pretrained_backbone=True).to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
scheduler = 
loss_fn = loss()

train = VOCDataset("data/100examples.csv",img_path=IMG_DIR,label_path=LABEL_DIR,transform=transform)

test = VOCDataset("data/test.csv", img_path=IMG_DIR,label_path=LABEL_DIR, transform=transform)

train_loader = DataLoader(
        dataset=train,
        batch_size=batch,
        pin_memory=PIN_MEMORY,
        shuffle=True,
        drop_last=False,
    )

test_loader = DataLoader(
        dataset=test,
        batch_size=batch,
        pin_memory=PIN_MEMORY,
        shuffle=True,
        drop_last=False,
    )
for epoch in range(epochs):

        #pred_boxes, target_boxes = get_bboxes(train_loader, model, iou_threshold=0.5, threshold=0.4)

        #mean_avg_prec = mean_average_precision(pred_boxes, target_boxes, iou_threshold=0.5)

        #if mean_avg_prec > 0.9:
        #    checkpoint = {
        #        "state_dict": model.state_dict(),
        #        "optimizer": optimizer.state_dict(),
        #    }
        #    save_checkpoint(checkpoint, filename=LOAD_MODEL_FILE)
        #    import time
        #    time.sleep(10)

        train_fn(train_loader, model, optimizer, loss_fn)

# 0.0001 and drop after 20 epochs

*- schedueler
*- transform
*- nms
*- plot output
*- mAP

In [None]:
def nonn_max_suppression(prediction,iou_threshold,prob_threshold): #Utils
    #send pred[i]
    #per example
    #(g,g,30)
    #per grid
    #[[0->19 classes , po, x,y,w,h, po,x,y,w,h]]
    bboxes =[] #[[class,po, x,y,w,h, po,x,y,w,h]] only if p
    
    #Removing low prob boxes
    
    for row in prediction:
        for grid in row:
            class_ = int(torch.argmax(grid[:20]))
            if grid[20] > grid[25]:
                if grid[20] > prob_threshold:
                    bboxes.append([class_]+grid[20:25].tolist())
            else :
                if grid[25] > prob_threshold:
                    bboxes.append([class_]+grid[25:].tolist())
    
    bboxes = sorted(bboxes, key=lambda x: x[1], reverse=True)
    bboxes_after_nms = []
    
    while bboxes:
        chosen_box = bboxes.pop(0)
        bboxes = [box for box in bboxes if box[0] != chosen_box[0] or intersection_over_union(
                torch.tensor(chosen_box[1]),
                torch.tensor(box[1]),
                box_format=box_format) < iou_threshold]

        bboxes_after_nms.append(chosen_box)

    #boxes_after_nms =  torch.tensor(boxes_after_nms) 
    
    
    #[[class,po,x,y,w,h],",","]
    

    return boxes_after_nms   

In [None]:
def parse_labels(labels): #Utils
    #[[class,po,x,y,w,h],","]
    labels_parsed = []
    for label in labels:
        for row in label:
            for grid in row:              

In [None]:
def get_bboxes(loader,model,iou_threshold,threshold,pred_format="cells",device="mps"): #Utils
    all_pred_boxes = []
    all_true_boxes = []

    # make sure model is in eval before get bboxes to turn off batchnorm and dropout while forward po
    model.eval()
    train_idx = 0

    for batch_idx, (x, labels) in enumerate(loader):
        x = x.to(device)
        labels = labels.to(device)

        with torch.no_grad():
            predictions = model(x)

        batch_size = x.shape[0]
        true_bboxes = parse_labels(labels)

        for idx in range(batch_size):
            nms_boxes = non_max_suppression(
                predictions[idx],
                iou_threshold=iou_threshold,
                threshold=threshold
            )


            #if batch_idx == 0 and idx == 0:
            #    plot_image(x[idx].permute(1,2,0).to("cpu"), nms_boxes)
            #    print(nms_boxes)

            for nms_box in nms_boxes:
                all_pred_boxes.append([train_idx] + nms_box)

            for box in true_bboxes[idx]:
                if box[1] > threshold:
                    all_true_boxes.append([train_idx] + box)

            train_idx += 1

    model.train()
    return all_pred_boxes, all_true_boxes

In [None]:
class YOLO(nn.Module):
    def __init__(self,grids=7,boxes=2,classes=20):
        super(YOLO,self).__init__()
        self.maxpool = nn.MaxPool2d(kernel_size=2,stride=2)
        self.dropout = nn.Dropout(p=0.5)
        self.darkNet = self.dark_net()
        self.fullyConnected =self.fully_connected(grids,boxes,classes)
    def forward(self,x):
        x = self.darkNet(x)
        out = self.fullyConnected(x)
        return out
    def dark_net(self):
        layers = [
             ConvBlock(7,3,64,2,3),
             self.maxpool,
             ConvBlock(3,64,192),
             self.maxpool,
             ConvBlock(1,192,128),
             ConvBlock(3,128,256),
             ConvBlock(1,256,256),
             ConvBlock(3,256,512),
             self.maxpool,
             
            ConvBlock(1,512,256),
            ConvBlock(3,256,512),
            ConvBlock(1,512,256),
            ConvBlock(3,256,512),
            ConvBlock(1,512,256),
            ConvBlock(3,256,512),
            ConvBlock(1,512,256),
            ConvBlock(3,256,512),

            ConvBlock(1,512,512),
            ConvBlock(3,512,1024),
            self.maxpool,

            ConvBlock(1,1024,512),
            ConvBlock(3,512,1024),
            ConvBlock(1,1024,512),
            ConvBlock(3,512,1024),
            #Pre-training net work
            


            ConvBlock(3,1024,1024),
            ConvBlock(3,1024,1024,2,1),
            ConvBlock(3,1024,1024),
            ConvBlock(3,1024,1024)    
         ]
        
        return nn.Sequential(*layers)

    def fully_connected(self,grids,boxes,classes):
        return nn.Sequential(
            nn.Flatten(),
            nn.Linear(grids*grids*1024, 1024),
            nn.LeakyReLU(0.1),
            nn.Dropout(p=0.5),
            nn.Linear(1024, grids*grids*(classes+ boxes*5)))

#done

In [None]:
for x, y in train_loader: # Here
    x = x.to(DEVICE)
    bboxes = model(x)
    for idx in range(8):
        #bboxes = cellboxes_to_boxes(model(x))
        bboxes = non_max_suppression(bboxes[idx], iou_threshold=0.5, threshold=0.4)
        plot_image(x[idx].permute(1,2,0).to("cpu"), bboxes)

    import sys
    sys.exit()