In [None]:
import time
import numpy as np
import torch
import matplotlib.patches as patches
import matplotlib.pyplot as plt
from collections import Counter
import torch.nn as nn
from collections import Counter
import os
import pandas as pd
from PIL import Image
import torch.optim as optim
from tqdm import tqdm
import sys
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

In [None]:
def intersection_over_union(boxes_preds , boxes_real , box_format = "midpoint"):
    #top left point of the image is 0,0 and buttom right corner is 1,1
    if box_format == "midpoint":
        box1_x1 = boxes_preds[... , 0:1] - (boxes_preds[... , 2:3]/2)
        box1_y1 = boxes_preds[... , 1:2] - (boxes_preds[... , 3:4]/2) 
        box1_x2 = boxes_preds[... , 0:1] + (boxes_preds[... , 2:3]/2)
        box1_y2 = boxes_preds[... , 1:2] + (boxes_preds[... , 3:4]/2)

        box2_x1 = boxes_real[... , 0:1] - (boxes_real[... , 2:3]/2)
        box2_y1 = boxes_real[... , 1:2] - (boxes_real[... , 3:4]/2)
        box2_x2 = boxes_real[... , 0:1] + (boxes_real[... , 2:3]/2)
        box2_y2 = boxes_real[... , 1:2] + (boxes_real[... , 3:4]/2)
        
        
    if box_format =="edges":
        box1_x1 = boxes_preds[... , 0:1]
        box1_y1 = boxes_preds[... , 1:2]
        box1_x2 = boxes_preds[... , 2:3]
        box1_y2 = boxes_preds[... , 3:4]

        box2_x1 = boxes_real[... , 0:1]
        box2_y1 = boxes_real[... , 1:2]
        box2_x2 = boxes_real[... , 2:3]
        box2_y2 = boxes_real[... , 3:4]
    
    x1 = torch.max(box1_x1 , box2_x1)
    y1 = torch.max(box1_y1 , box2_y1)
    x2 = torch.min(box1_x2 , box2_x2)
    y2 = torch.min(box1_y2 , box2_y2)
    
    intersection_area = (x2 - x1).clamp(0)  *  (y2 - y1).clamp(0)
    
    box1_area = abs((box1_x2 - box1_x1) * (box1_y2 - box1_y1))
    box2_area = abs((box2_x2 - box2_x1) * (box2_y2 - box2_y1))
    union_area = (box1_area + box2_area - intersection_area + 1e-6) #adding a very small value to avoid division by 0
    
    IOU = intersection_area/union_area
    return IOU

In [None]:
def non_max_supression (bboxes , iou_thresh , prob_thresh , box_format = "midpoint"):
  # the shape of the box should be [class , propability of class , x1 , y1 , x2 , y2]

  assert type(bboxes) == list  
    
  bboxes = [box for box in bboxes if box[1] > prob_thresh]
  bboxes = sorted(bboxes, key = lambda x: x[1] , reverse = True)
  bboxes_after_nms = []

  while(bboxes):
    chosen_box = bboxes.pop(0)
    bboxes = [box for box in bboxes if box[0]!= chosen_box[0] 
              or intersection_over_union(torch.tensor(chosen_box[2:]) , torch.tensor(box[2:]) , box_format = box_format) < iou_thresh]
    
    bboxes_after_nms.append(chosen_box)

  return bboxes_after_nms

In [None]:
def performance (pred_boxes , true_boxes , iou_thresh = 0.5 , box_format = "midpoint" , num_classes = 20):
    #pred_boxes = [[index , class_pred , probability , x1 , y1 , x2 , y2], ...]
    average_precisions = []
    saver_from_div_0 = 1e-6
    
    for c in range (num_classes):
        detections = []
        ground_truths = []
        
        for detection in pred_boxes:
            if detection[1] == c :
                detections.append(detection)
                
        for truth in true_boxes:
            if truth[1] == c :
                ground_truths.append(truth)
                
        #number of real bounding boxes for class c in all the dataset for each image 
        #returns dectionary {image0: #bboxes for image 0 , ...} 
        amount_bboxes = Counter([t[0] for t in ground_truths])  
        
        for key , val in amount_bboxes.items():
            amount_bboxes[key] = torch.zeros(val)
            
        detections.sort(key = lambda x: x[2] , reverse = True)
        true_pos  = torch.zeros((len(detections)))
        false_pos = torch.zeros((len(detections)))
        total_true_bboxes = len(ground_truths)
        
        #if there is no bbox for this image then just continue to the next image
        if total_true_bboxes == 0:
            continue
        
        for detection_idx , detection in enumerate(detections):
            ground_truth_img = [bbox for bbox in ground_truths if bbox[0] == detection[0]]
            num_gts = len(ground_truth_img)
            best_iou = 0 
            
            for idx_bbox , bbox in enumerate(ground_truth_img):
                new_iou = intersection_over_union(torch.tensor(detection[3:]) , torch.tensor(bbox[3:]) ,  box_format = box_format)
                if new_iou > best_iou:
                    best_iou = new_iou
                    best_iou_idx = idx_bbox
                    
            if best_iou > iou_thresh:
                if amount_bboxes[detection[0]][best_iou_idx] == 0:
                    amount_bboxes[detection[0]][best_iou_idx] = 1
                    true_pos[detection_idx] = 1

                else:
                    false_pos[detection_idx] = 1

            else:
                false_pos[detection_idx] = 1
                    
            #now we measured the true positives and FP  for all the images in our prediction
            #and now we can calculate percision and recall
            
            tp_cumsum = torch.cumsum(true_pos , dim = 0)
            fp_cumsum = torch.cumsum(false_pos , dim = 0)
            
            recalls = tp_cumsum / (total_true_bboxes + saver_from_div_0)
            percisions = torch.divide(tp_cumsum ,(tp_cumsum + fp_cumsum + saver_from_div_0))
            
            recalls = torch.cat((torch.tensor([0]), recalls))
            percisions = torch.cat((torch.tensor([1]), percisions))
            
            #trapazoide function calculates the area under a curve with y points as first idx 
            #and x points as 2nd idx
            area_under_PR = torch.trapz(percisions , recalls)
            
            #now we add a mean average percision for class c  
            average_precisions.append(area_under_PR)
    
    #now get the average of all class averages
    MAP = sum(average_precisions) / (len(average_precisions) + saver_from_div_0)
    return MAP

In [None]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import numpy as np

def plot_image(image, boxes, labels=None):
    """Plots predicted bounding boxes on the image with optional labels"""
    im = np.array(image)
    height, width, _ = im.shape

    # Create figure and axes
    fig, ax = plt.subplots(1)
    # Display the image
    ax.imshow(im)

    # box[0] is x midpoint, box[2] is width
    # box[1] is y midpoint, box[3] is height

    # Create a Rectangle patch
    for i, box in enumerate(boxes):
        box = box[2:]
        assert len(box) == 4, "Got more values than in x, y, w, h, in a box!"
        upper_left_x = box[0] - box[2] / 2
        upper_left_y = box[1] - box[3] / 2
        rect = patches.Rectangle(
            (upper_left_x * width, upper_left_y * height),
            box[2] * width,
            box[3] * height,
            linewidth=1,
            edgecolor="r",
            facecolor="none",
        )
        # Add the patch to the Axes
        ax.add_patch(rect)

        # Add label if available
        if labels:
            plt.text(
                upper_left_x * width,
                upper_left_y * height,
                f"{labels}",
                color="r",
                backgroundcolor="white",
                fontsize=8,
            )

    plt.show()

In [None]:
def get_bboxes(
    loader,
    model,
    iou_threshold,
    threshold,
    pred_format="cells",
    box_format="midpoint",
    device="cpu",
):
    all_pred_boxes = []
    all_true_boxes = []

    # make sure model is in eval before get bboxes
    model.eval()
    train_idx = 0

    for batch_idx, (x, labels) in enumerate(loader):
        #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% when threading works
        x = x.to(device)
        labels = labels.to(device)

        with torch.no_grad():
            predictions = model(x)

        batch_size = x.shape[0]
        true_bboxes = cellboxes_to_boxes(labels)
        bboxes = cellboxes_to_boxes(predictions)

        for idx in range(batch_size):
            nms_boxes =  non_max_supression(
                bboxes[idx],
                iou_thresh=iou_threshold,
                prob_thresh=threshold,
                box_format=box_format,
            )


            for nms_box in nms_boxes:
                all_pred_boxes.append([train_idx] + nms_box)

            for box in true_bboxes[idx]:
                # many will get converted to 0 pred
                if box[1] > threshold:
                    all_true_boxes.append([train_idx] + box)

            train_idx += 1

    model.train()
    return all_pred_boxes, all_true_boxes

In [None]:
def convert_cellboxes(predictions, S=7 , b=2 , c=20 , device = "cpu"):
    """
    Converts bounding boxes output from Yolo with
    an image split size of S into entire image ratios
    rather than relative to cell ratios. Tried to do this
    vectorized, but this resulted in quite difficult to read
    code... Use as a black box? Or implement a more intuitive,
    using 2 for loops iterating range(S) and convert them one
    by one, resulting in a slower but more readable implementation.
    """
    #%%%%%%%%%%%%%%%% When we can use cuda
    predictions = predictions.to(device)
    batch_size = predictions.shape[0]  #num images you made prediction for
    predictions = predictions.reshape(batch_size, S, S, (c+5*b))
    bboxes1 = predictions[..., 21:25]
    bboxes2 = predictions[..., 26:30]
    scores = torch.cat(
        (predictions[..., 20].unsqueeze(0), predictions[..., 25].unsqueeze(0)), dim=0
    )
    best_box = scores.argmax(0).unsqueeze(-1)
    best_boxes = bboxes1 * (1 - best_box) + best_box * bboxes2
    cell_indices = torch.arange(S).repeat(batch_size, S , 1).unsqueeze(-1).to(device)
    x = 1 / S * (best_boxes[..., :1] + cell_indices)
    y = 1 / S * (best_boxes[..., 1:2] + cell_indices.permute(0, 2, 1, 3))
    w_y = 1 / S * best_boxes[..., 2:4]
    converted_bboxes = torch.cat((x, y, w_y), dim=-1)
    predicted_class = predictions[..., :20].argmax(-1).unsqueeze(-1)
    best_confidence = torch.max(predictions[..., 20], predictions[..., 25]).unsqueeze(-1)
    converted_preds = torch.cat(
        (predicted_class, best_confidence, converted_bboxes), dim = -1)

    return converted_preds

def cellboxes_to_boxes(out, S=7):
    #converts the bboxes outputed from convert_cellboxes to normal lists insted of 
    #pytorch tensors to be able to put them in a file again or compare them to the 
    #outputs in the files
    converted_pred = convert_cellboxes(out).reshape(out.shape[0], S * S, -1)
    converted_pred[..., 0] = converted_pred[..., 0].long()
    all_bboxes = []

    for ex_idx in range(out.shape[0]):
        bboxes = []

        for bbox_idx in range(S * S):
            bboxes.append([x.item() for x in converted_pred[ex_idx, bbox_idx, :]])
        all_bboxes.append(bboxes)

    return all_bboxes

In [None]:
def save_checkpoint(state, filename):
    print("=> Saving checkpoint")
    torch.save(state, filename)
    
def load_checkpoint(checkpoint, model, optimizer):
    print("=> Loading checkpoint")
    model.load_state_dict(checkpoint["state_dict"])
    optimizer.load_state_dict(checkpoint["optimizer"])

In [None]:
architecture_config = [
    #(kernel size , #filters , stride , padding)
    (7,64,2,3),
    #max pooling layer (2,2)stride = 2
    "m",
    (3,192,1,1),
    
    #m (2,2)
    "m",
    
    (1,128,1,0),
    (3,256,1,1),
    (1,256,1,0),
    (3,512,1,1),
    
    #m(2,2) 
    "m",
    
    #these two conv layers repeated 4 times
    [(1,256,1,0), (3,512,1,1), 4],

    #***
    (1,512,1,0),
    (3,1024,1,1),
    
    #m (2,2)
    "m",
    [(1,512,1,0) , (3,1024,1,1),2],

    #***
    (3,1024,1,1),
    (3,1024,2,1),
    (3,1024,1,1),
    (3,1024,1,1)   
    #final output os of size 7*7*1024
    #total 24 conv layers
]

In [None]:
class cnn_block(nn.Module):
    #kwargs is for specifying the stride , kernel size , padding
    def __init__(self , in_channels , out_channels , **kwargs):
        super(cnn_block , self).__init__()
        self.conv = nn.Conv2d(in_channels , out_channels , bias = False , **kwargs)
        #make batch normalization so that training is speed up
        self.batchnorm = nn.BatchNorm2d(out_channels)
        #apply the leaky relu activation function to the output:
        #0.1 is the slope we will be multiplying to the negative part of the prediction 
        self.leakyrelu = nn.LeakyReLU(0.1)
        
    def forward (self , x):
        x = self.conv(x)
        x = self.batchnorm(x)
        x = self.leakyrelu(x)
        return x 

In [None]:
class yolov1(nn.Module):
    def __init__ (self , in_channels = 3 , **kwargs):
        super(yolov1 , self).__init__()
        self.architecture = architecture_config
        self.in_channels = in_channels
        #here we create the convolutional layers based on the architecture we specified
        self.darknet = self._create_conv_layers(self.architecture)
        
        #here we create a fully connected layer
        self.fcs = self._create_fcs(**kwargs)
        
    def forward(self , x):
        x = self.darknet(x)
        x = torch.flatten(x , start_dim = 1) # make the vector of each kernel seperate in 1 dimention it will then be totaly flattened indide _create_fcs method
        x = self.fcs(x)
        return x
     
    #make this mehod private
    def _create_conv_layers (self  , architecture):
        layers = []
        in_channels = self.in_channels
        
        for x in architecture:
            if type(x) == tuple:
                layers += [
                    cnn_block(
                    in_channels , out_channels = x[1] , kernel_size = x[0] , stride = x[2] , padding = x[3],
                    )
                ]
                in_channels = x[1]
                
            if type(x) == str:
                layers += [nn.MaxPool2d(kernel_size = (2, 2) , stride = (2, 2))]
                
            elif type(x) == list:
                conv1 = x[0]
                conv2 = x[1]
                num_repeats = x[2]
                
                for i in range (num_repeats):
                    layers += [
                    cnn_block(
                    in_channels , out_channels = conv1[1] , kernel_size = conv1[0] , stride = conv1[2] , padding = conv1[3],
                    )
                    ]
                    
                    layers += [
                    cnn_block(
                    conv1[1] , out_channels = conv2[1] , kernel_size = conv2[0] , stride = conv2[2] , padding = conv2[3],
                    )
                    ]              
                    in_channels = conv2[1]
       
        return nn.Sequential(*layers)
        
    def _create_fcs (self , split_size , num_boxes , num_classes):
        s , b , c = split_size , num_boxes , num_classes
        return nn.Sequential(
            #flatten layer of the 7*7*1024 out of the fature map
            nn.Flatten(),
            #first fully connected layer that is 496 in size __ originallly in hte paper it was with 4096
            #$$$$$$$$$$$$$$$$$$$$$$$$444we made this  instead of 4096 in paper for power limitations
            nn.Linear(1024 * s * s , 496), #it takes input size and output size
            #drop out layer for normalization
            nn.Dropout(0.0),
            nn.LeakyReLU(0.1),
            #$$$$$$$$$$$$$$$$$$$$$$$$444we made this  instead of 4096 in paper for power limitations
            nn.Linear(496 , s*s * (c+(b*5))), #the output of this is still 1d and we will have to reshape it          
        )
        
        
        
def test(s = 7 , b = 2 , c = 20):
        model = yolov1(split_size = s , num_boxes = b , num_classes = c)
        x = torch.randn((2,3,448,448))
        #call the forward function
        print(model(x).shape)
    
test()                

In [None]:
class yolo_loss(nn.Module):
    def __init__(self , s = 7 , b = 2 , c = 20):
        super(yolo_loss,self).__init__()
        self.mse = nn.MSELoss(reduction = "sum")
        self.s = s
        self.b = b
        self.c = c
        self.lambda_noobj = 0.5
        self.lambda_coord = 5
        
    def forward(self , predictions , targets):
        #predictions for 1 bbox at dimention 3: [p0 , p0 ,..., p19 , c1 , x1 , y1 ,w1 , h1 , x2 , y2 ,w2 , h2]
        # target shape -> [p0 , p0 ,..., p19 , c1 , x1 , y1 ,w1 , h1 , 0 , 0 , 0 ,0]
        predictions = predictions.reshape(-1 , self.s , self.s , (self.c + (self.b * 5)))
        iou_b1 = intersection_over_union(predictions[...,21:25] , targets[...,21:25])
        iou_b2 = intersection_over_union(predictions[...,26:30] , targets[...,21:25])
        
        ious = torch.cat([iou_b1.unsqueeze(0) , iou_b2.unsqueeze(0)] , dim = 0)
        iou_maxes , best_box = torch.max(ious , dim = 0)
        
        #identity matrix for obj matrix
        exists_box = targets[...,20].unsqueeze(3)  #we could have make it exists_box = target[...,20:21]
        
        ###************ calculate error for bbox coordinates**********************
        boxes_predictions = exists_box * (
            (
               best_box * predictions [... , 26:30] +
               (1- best_box) * predictions [... , 21:25]
            )
        )
        boxes_targets = exists_box * targets [... , 21:25]
        #now we want instead of having w , h we want sqrt of w , sqrt(h) to exist
        #add 1e-6 to the sqrt because when we calculate the derivative if sqrt = 0
        #the der = infinity
        
        boxes_predictions [... , 2:4] = torch.sign(boxes_predictions[... , 2:4]) * torch.sqrt(
                torch.abs(boxes_predictions[... , 2:4]+ 1e-6) 
            )
        
        # *****************why when i added an epsilon here it reached to some map and stopped at this value
        boxes_targets [..., 2:4] = torch.sqrt(boxes_targets[... , 2:4])
        
        boxes_losses = self.mse(
            torch.flatten(boxes_predictions , end_dim = -2) ,
            torch.flatten(boxes_targets , end_dim = -2)
        )      
        
        
        ###************ calculate error for negative errors (obj case) ***************************
        #(N,s,s,1) -> (N*S*S)
        boxes_pre_obj = exists_box*(best_box * predictions[...,25:26] +
                                        (1-best_box) * predictions[...,20:21])
        boxes_tar_obj = exists_box*(targets[...,20:21])
        
        boxes_obj_loss = self.mse(
           torch.flatten(boxes_pre_obj),  #%%%%%%%%%%%%%%%%%%%%%%% why not end_dim = -2 %%%%%%%%%%%%%%%%%%%%
           torch.flatten(boxes_tar_obj)
        )
         
        
        ###************ calculate error for positive errors (no obj case)
        #(N,s,s,1) -> (N, S*S)
        boxes1_pre_noobj = (1 - exists_box) * (predictions[...,20:21])
        boxes2_pre_noobj = (1 - exists_box) * (predictions[...,25:26])                                      
        boxes_tar_noobj =  (1 - exists_box) * (targets[...,20:21])
        
        boxes_noobj_loss = self.mse(
           torch.flatten(boxes1_pre_noobj , start_dim = 1),  #%%%%%%%%%%%%%%%%%%%%%%% why not end_dim = -2 %%%%%%%%%%%%%%%%%%%%
           torch.flatten(boxes_tar_noobj  , start_dim = 1)
        )
                                               
        boxes_noobj_loss += self.mse(
           torch.flatten(boxes2_pre_noobj , start_dim = 1),  #%%%%%%%%%%%%%%%%%%%%%%% why not end_dim = -2 %%%%%%%%%%%%%%%%%%%%
           torch.flatten(boxes_tar_noobj , start_dim = 1)
        )
        
        ###************ calculate error for class errors
        class_predictions = exists_box * predictions[... , :20]
        class_targets = exists_box * targets[... , :20]
        class_loss = self.mse(
            torch.flatten(class_predictions , end_dim = -2 , ),
            torch.flatten(class_targets , end_dim = -2)
        )  
        
        #total loss *************************************************
        total_loss = (self.lambda_coord * boxes_losses +
                                               boxes_obj_loss +
                                               self.lambda_noobj * boxes_noobj_loss +
                                               class_loss
                     )                        
        return total_loss                                                 

In [None]:
class pascal_dataset (torch.utils.data.Dataset):
    def __init__(self , csv_file , img_dir , label_dir , s = 7 ,b = 2, c = 20 ,transform = None):
        self.annotations = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.label_dir = label_dir
        self.s = s
        self.b = b
        self.c = c
        self.transform = transform
        
    def __len__(self):
        #returns num of samples used in the dataset
        return  len(self.annotations)
    
    def __getitem__ (self , index):
        label_path = os.path.join(self.label_dir , self.annotations.iloc[index , 1])
        boxes = []
        with open(label_path) as f:
            for label in f.readlines():
                class_label , x , y , width , height = [
                    float(x) if float(x)!=int(float(x)) else int (x) 
                    for x in label.replace("\n" , "").split()
                ] 
                boxes.append([class_label , x , y , width , height])


        img_path = os.path.join(self.img_dir ,self.annotations.iloc[index ,0])
        image = Image.open(img_path)

        boxes = torch.tensor(boxes)  
        if self.transform:
            image , bboxes = self.transform(image , boxes)

        label_matrix = torch.zeros((self.s , self.s , self.c + self.b * 5))
        for box in boxes:
            class_label , x , y , width , height = box.tolist()
            class_label = int(class_label)
            #get i,j of the middle point
            i,j = int(self.s * y) , int(self.s * x)
            #get x , y coord relative to the cell responsiple for the prediction
            x_cell , y_cell = ((self.s * x) - j)  , ((self.s * y) - i)

            width_cell , height_cell =  (width * self.s ,
                                         height * self.s)

            if label_matrix[i , j , 20] == 0:
                label_matrix[i , j , 20] = 1
                box_coordinates = torch.tensor([x_cell , y_cell , width_cell , height_cell])
                label_matrix[i , j , 21:25] = box_coordinates
                label_matrix[i , j , class_label] = 1


        return image , label_matrix 


In [None]:
seed = 123
torch.manual_seed(seed)
learning_rate = 2e-5
batch_size = 12
#for sake of simplicity we can let weight decay regularization= 0    
weight_decay = 0
epochs = 600
#num workers who will be loading the data i.e. number of threads
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%5 num workers = 2 if wanted threading
num_workers = 0
#we will try to access the GPU for loading the data
pin_memory = True
load_model = False
device = "cpu"
model_dir = "D:\\هنا حيث الروعة كلها\\machine learning\\archive (6)\\yolo_model.pth.tar"  #in shaa allah will put here the directory of model after training 

img_dir = "D:\\هنا حيث الروعة كلها\\machine learning\\archive (6)\\images"
labels_dir = "D:\\هنا حيث الروعة كلها\\machine learning\\archive (6)\\labels"

class compose(object):
    def __init__ (self , transforms):
        self.transforms = transforms
        
    def __call__ (self , img , bboxes):
        for t in self.transforms:
            img , bboxes = t(img) , bboxes
          
        return img , bboxes
        

transform = compose([transforms.Resize((448, 448)), transforms.ToTensor()])

def train_fn (train_loader , model , optimizer , loss_fn):
    loop = tqdm(train_loader , leave = True)   
    mean_loss = []
    
    for batch_idx , (x ,y) in enumerate(loop):
        #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% when threading works
        x, y = x.to(device), y.to(device)
        out = model(x)
        loss = loss_fn (out , y)
        mean_loss.append(loss.item())
        
        optimizer.zero_grad() # make the optimization is when we reach a zero slope dl / dw
        loss.backward()  # make backward propagation calculate dl / dw
        optimizer.step()  # update parameters
        
        loop.set_postfix(loss = loss.item())
    
    print (f"Mean loss is {sum(mean_loss) / (len(mean_loss)+ 1e-6)}")
        
def main ():
    #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% when threading works
    model = yolov1 (split_size = 7 , num_boxes = 2 , num_classes = 20).to(device)
    optimizer = optim.Adam(
        model.parameters() , lr = learning_rate , weight_decay = weight_decay
    )
    loss_fn = yolo_loss()
    
    if load_model:
        load_checkpoint(torch.load(model_dir), model, optimizer)
    
    train_dataset = pascal_dataset(
        "D:\\هنا حيث الروعة كلها\\machine learning\\archive (6)\\100examples.csv",
        transform = transform,
        img_dir = img_dir,
        label_dir = labels_dir
              
    )
    
    test_dataset = pascal_dataset(
        "D:\\هنا حيث الروعة كلها\\machine learning\\archive (6)\\test.csv",
        transform = transform, 
        img_dir = img_dir,
        label_dir = labels_dir
             
    )
    
    train_loader = DataLoader(
        dataset = train_dataset,
        batch_size = batch_size,
        num_workers = num_workers,
        pin_memory = pin_memory,
        shuffle = True ,
        drop_last = False,
    )
    
    test_loader = DataLoader(
        dataset = test_dataset,
        batch_size = batch_size,
        num_workers = num_workers,
        pin_memory = pin_memory,
        shuffle = True ,
        drop_last = True,
    )
    
    max_map = -1
    for epoch in range (epochs):                  
        pred_boxes , target_boxes = get_bboxes(train_loader , model , iou_threshold = 0.5 , threshold = 0.4)
        mean_avg_percision = performance(pred_boxes , target_boxes , iou_thresh = 0.5 , box_format = "midpoint")
        print(f"mean average percision: {mean_avg_percision}")
        print("epoch: " , epoch)
         
        ##save a checkpoint of the model each time we reach a good MAP
        if mean_avg_percision > 0.9:
            if mean_avg_percision > 0.99 and epoch >=200:
                checkpoint = {
                   "state_dict": model.state_dict(),
                    "optimizer": optimizer.state_dict(),
                }
                save_checkpoint(checkpoint, filename=model_dir)
                time.sleep(10)
                break
                
            elif mean_avg_percision > max_map:
                max_map = mean_avg_percision
                checkpoint = {
                   "state_dict": model.state_dict(),
                    "optimizer": optimizer.state_dict(),
                }
                save_checkpoint(checkpoint, filename=model_dir)
                time.sleep(10)

        train_fn (train_loader , model , optimizer , loss_fn)   
        
if __name__ == "__main__":
    main() 

In [None]:
seed = 123
torch.manual_seed(seed)
learning_rate = 2e-5
batch_size = 12
#for sake of simplicity we can let weight decay regularization= 0    
weight_decay = 0
epochs = 600
#num workers who will be loading the data i.e. number of threads
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%5 num workers = 2 if wanted threading
num_workers = 0
#we will try to access the GPU for loading the data
pin_memory = True
load_model = False
device = "cpu"
model_dir = "D:\\هنا حيث الروعة كلها\\machine learning\\archive (6)\\yolo_model.pth.tar"  #in shaa allah will put here the directory of model after training 

img_dir = "D:\\هنا حيث الروعة كلها\\machine learning\\archive (6)\\images"
labels_dir = "D:\\هنا حيث الروعة كلها\\machine learning\\archive (6)\\labels"

class compose(object):
    def __init__ (self , transforms):
        self.transforms = transforms
        
    def __call__ (self , img , bboxes):
        for t in self.transforms:
            img , bboxes = t(img) , bboxes
          
        return img , bboxes
        

transform = compose([transforms.Resize((448, 448)), transforms.ToTensor()])


def test_yolo (model_dir  , test_data , s=7 , b=2 ,c=20 ):
    
    model = yolov1 (split_size = s , num_boxes = b , num_classes = c).to(device)
    optimizer = optim.Adam(
        model.parameters() , lr = learning_rate , weight_decay = weight_decay
    )
    load_checkpoint(torch.load(model_dir), model, optimizer)
    test_dataset = pascal_dataset(
        test_data,
        transform = transform, 
        img_dir = img_dir,
        label_dir = labels_dir
             
    )
    test_loader = DataLoader(
        dataset = test_dataset,
        batch_size = batch_size,
        num_workers = num_workers,
        pin_memory = pin_memory,
        shuffle = True ,
        drop_last = True,
    )
    #################### plot the images
    for x,y in (test_loader):
        for idx in range(8):
            x = x.to(device)
            out = model(x)
            out = cellboxes_to_boxes(out)
            out = non_max_supression(out[idx], iou_thresh=0.5, prob_thresh=0.4, box_format="midpoint")
            ##to plot the image we need to change its dimentions from(channels , height , width) -> (hight,width ,channels)
            classd = out[0][0]
            
            if (classd == 0):
                pred = ["aeroplane"]
            elif (classd == 1):
                pred = ["bicycle"]
            elif (classd == 2):
                pred = ["bird"]
            elif (classd == 3):
                pred = ["boat"]
            elif (classd == 4):
                pred = ["bottle"]
            elif (classd == 5):
                pred = ["bus"]
            elif (classd == 6):
                pred = ["car"]
            elif (classd == 7):
                pred = ["cat"]
            elif (classd == 8):
                pred = ["chair"]
            elif (classd == 9):
                pred = ["cow"]
            elif (classd == 10):
                pred = ["dining table"]
            elif (classd == 11):
                pred = ["dog"]
            elif (classd == 12):
                pred = ["horse"]
            elif (classd == 13):
                pred = ["motorbike"]
            elif (classd == 14):
                pred = ["person"]
            elif (classd == 15):
                pred = ["potted plant"]
            elif (classd == 16):
                pred = ["sheep"]
            elif (classd == 17):
                pred = ["sofa"]
            elif (classd == 18):
                pred = ["train"]
            elif (classd == 19):
                pred = ["TV monitor"]
            else:
                pred = ["Unknown"]

                 
            
            plot_image(x[idx].permute(1,2,0).to("cpu"), out , pred)
    ######################## plot the images 
    
test_yolo(model_dir , "D:\\هنا حيث الروعة كلها\\machine learning\\archive (6)\\test.csv")

In [None]:
model = yolov1 (split_size = 7 , num_boxes = 2 , num_classes = 20).to(device)
optimizer = optim.Adam(
    model.parameters() , lr = learning_rate , weight_decay = weight_decay
)
load_checkpoint(torch.load(model_dir), model, optimizer)
test_dataset = pascal_dataset(
    "D:\\هنا حيث الروعة كلها\\machine learning\\archive (6)\\test.csv",
    transform = transform, 
    img_dir = img_dir,
    label_dir = labels_dir

)
test_loader = DataLoader(
    dataset = test_dataset,
    batch_size = batch_size,
    num_workers = num_workers,
    pin_memory = pin_memory,
    shuffle = True ,
    drop_last = True,
)

with torch.no_grad():
    for x,y in (test_loader):
        x = x.to(device)
        y_eval = model.forward(x)
        loss_fn = yolo_loss()
        loss = loss_fn(y_eval , y)
        
print(loss)