# Training Custom YOLO

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import CocoDetection
from torchvision.models.detection import yolov3_tiny

In [None]:
model = yolov3_tiny(pretrained=False)

model.load_state_dict(torch.load('yolov3-tiny.weights'))

num_classes = 80  
model.classifier[6] = nn.Linear(1024, num_classes)

transform = transforms.Compose([
    transforms.ToTensor(),
])
train_dataset = CocoDetection(root='/path/to/coco/', annFile='/path/to/coco/annotations/instances_train2014.json', transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)


optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
criterion = nn.CrossEntropyLoss()


epochs = 10  
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

for epoch in range(epochs):
    for images, targets in train_loader:
        images, targets = images.to(device), targets.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

    print(f'Epoch {epoch+1}/{epochs}, Loss: {loss.item()}')

torch.save(model.state_dict(), 'yolov3_tiny_custom.pth')


# Extracting Weights and Bias

In [None]:
import tensorflow as tf
import numpy as np

In [None]:
keras_model_path = 'yolov3-tiny.h5'
loaded_model = tf.keras.models.load_model(keras_model_path)

weights = loaded_model.get_weights()

for i, layer_weights in enumerate(weights):
    filename = f'layer_{i}_weights.txt'
    with open(filename, 'w') as file:
        for weight in layer_weights.flatten():
            file.write(f'{weight}\n')

# Test Image and Kernel

In [None]:
three_d_im = np.array([[[1,2,3],[4,5,6],[7,8,9],[10,11,12],[13,14,15]],
                      [[16,17,18],[19,20,21],[22,23,24],[25,26,27],[28,29,30]],
                      [[-31,-32,-33],[-34,-35,-36],[37,38,39],[40,42,42],[43,44,45]],
                      [[-46,-47,-48],[-49,-50,-51],[52,53,54],[55,56,57],[58,59,60]],
                      [[-61,-62,-63],[-64,-65,-66],[67,68,69],[70,71,72],[73,74,75]] 
                      ])
three_d_kernel = np.array([
                          [[1, 2, 1],[2, 1, 2],[1, 2, 1]],
                          [[1, 2, 1],[2, 1, 2],[1, 2, 1]],
                          [[1, 2, 1],[2, 1, 2],[1, 2, 1]]
                          ])
print(three_d_im.shape)
print(three_d_kernel.shape)

# Check array dimentions 

In [None]:
three_d_i = np.array([[[1,2,3],[4,5,6],[7,8,9],[10,11,12]],
                      [[16,17,18],[19,20,21],[22,23,24],[25,26,27]],
                      [[-31,-32,-33],[-34,-35,-36],[37,38,39],[40,42,42]],
                      [[-46,-47,-48],[-49,-50,-51],[52,53,54],[55,56,57]],
                      [[-61,-62,-63],[-64,-65,-66],[67,68,69],[70,71,72]] 
                      ])
print(three_d_i.shape)
height, width, depth  = three_d_i.shape
print(height)
print(three_d_i[1,2,2])

# Convolution

In [None]:
def three_d_conv_one_filter(img,kernel):
    im_dimensions = img.shape
    kernel_dimensions = kernel.shape

    n_i_height =  (im_dimensions[0] - kernel_dimensions[0]) + 1 
    n_i_width  =  (im_dimensions[1] - kernel_dimensions[1]) + 1
    n_i_depth  =  (im_dimensions[2] - kernel_dimensions[2]) + 1

    k_height = kernel_dimensions[0]
    k_width  = kernel_dimensions[1]
    k_depth  = kernel_dimensions[2]
    
    new_img = np.zeros((n_i_height, n_i_width, n_i_depth), dtype=img.dtype)

    for d in range(n_i_depth):
        for h in range(n_i_height):
            for w in range(n_i_width):
                conv_sum = 0
                for k_d in range(k_depth):
                    for k_h in range(k_height):
                        for k_w in range(k_width):
                            conv_sum += img[(h + k_h), (w + k_w), (d + k_d)] * kernel[k_h, k_w, k_d]
                new_img[h, w, d] = conv_sum      
                                                      
    return new_img

In [None]:
conv_result = three_d_conv_one_filter(three_d_i,three_d_kernel)
print(conv_result.shape,"\n\n", conv_result)

In [None]:
def three_d_conv(img,kernel):
    im_dimensions = img.shape
    kernel_dimensions = kernel.shape

    k_height  = kernel_dimensions[0]
    k_width   = kernel_dimensions[1]
    k_depth   = kernel_dimensions[2]
    k_filters = kernel_dimensions[3]

    n_i_height =  (im_dimensions[0] - kernel_dimensions[0]) + 1 
    n_i_width  =  (im_dimensions[1] - kernel_dimensions[1]) + 1
    n_i_depth  =  (im_dimensions[2] - kernel_dimensions[2]) + 1

    new_img = np.zeros((n_i_height, n_i_width, k_filters), dtype=img.dtype)
    
    for d in range(n_i_depth): # -->> this will be performed only once but I will leave it to be more generic
        for h in range(n_i_height):
            for w in range(n_i_width):
                for k_f in range(k_filters):
                    conv_sum = 0
                    for k_d in range(k_depth):
                        for k_h in range(k_height):
                            for k_w in range(k_width):
                                conv_sum += img[(h + k_h), (w + k_w), (d + k_d)] * kernel[k_h, k_w, k_d, k_f]
                    new_img[h, w, k_f] = conv_sum      
                                                      
    return new_img

# Batch Normalization

In [None]:
def batchNorm(img,gamma,beta,mean,variance):
    epsilon = 0.001
    height, width, depth = img.shape 
    
    new_img = np.zeros((height, width, depth), dtype=img.dtype)
    
    for h in range(height):
        for w in range(width):
            for d in range(depth):
                x = img[h,w,d]
                x_norm = (x - mean[d]) / np.sqrt(variance[d] + epsilon)
                y = (gamma[d] * x_norm) + beta[d]
                new_img[w,h,d] = y
    return new_img            

In [None]:
image = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])
gamma = np.array([0.5, 1.5])
beta = np.array([-0.5, 0.5])
mean = np.array([2.0, 4.0])
variance = np.array([1.5, 2.5])

batchNorm_res = batchNorm(image,gamma,beta,mean,variance)
print(batchNorm_res.shape,"\n",batchNorm_res)

# LeakyReLU

In [None]:
def Leaky_ReLU(img):
    im_shape = img.shape
    new_img = np.zeros((im_shape[0], im_shape[1], im_shape[2]), dtype=int)
    
    for d in range(im_shape[2]):
        for h in range(im_shape[0]):
            for w in range(im_shape[1]):
               if(img[h,w,d] > 0):
                   new_img[h,w,d] = img[h,w,d]
               else:
                   new_img[h,w,d] = -0.01 * img[h,w,d]
                
    return new_img    

In [None]:
act_result = Leaky_ReLU(conv_result)
print(act_result.shape,"\n\n", act_result)

# MaxPool

In [None]:
def maxpool_three_d_image(img, stride):
    height, width, depth = img.shape
    pooled_height = height // stride
    pooled_width = width // stride

    output_image = np.zeros((pooled_height, pooled_width, depth), dtype=img.dtype)

    for d in range(depth):
        for h in range(0,height - 1,stride):
            for w in range(0,width - 1,stride):
                
                max_val = img[h,w,d]
                
                if(img[h,w+1,d] > max_val):
                    max_val = img[h,w+1,d]
                if(img[h+1,w,d] > max_val):
                    max_val = img[h+1,w,d]
                if(img[h+1,w+1,d]>max_val):    
                    max_val = img[h+1,w+1,d]
                    
                output_image[h // stride, w // stride, d] = max_val

    return output_image

In [None]:
test_image = np.array([
    [[1], [2], [3], [4], [17]],
    [[5], [6], [7], [8], [18]],
    [[9], [10], [11], [12],[19]],
    [[13], [14], [15], [16],[20]],
    [[1], [100], [0], [18],[21]]
], dtype=int)

In [None]:
print(test_image.shape)
maxpool_res = maxpool_three_d_image(test_image,2)
print(maxpool_res.shape,"\n\n",maxpool_res)

# Up sampling 

In [None]:
def upsample_three_d_image(img, stride):
    
    width,height,depth = img.shape
    upscaled_height = height * stride
    upscaled_width = width * stride

    output_image = np.zeros((upscaled_width, upscaled_height, depth), dtype=img.dtype)

    for d in range(depth):
        for h in range(upscaled_height):
            for w in range(upscaled_width):
                original_h = h // stride
                original_w = w // stride

                output_image[w, h, d] = img[original_w, original_h, d]

    return output_image

In [None]:
upsample_result = upsample_three_d_image(act_result, 2)
print(upsample_result.shape,"\n\n", upsample_result)

# Concatenation

In [None]:
def Concatenation(conv1, conv2):
    width1,height1,depth1 = conv1.shape
    width2,height2,depth2 = conv2.shape
    
    output_image = np.zeros((width1, height1, (depth1 + depth2)), dtype=conv1.dtype)

    for d in range(depth1):
        for h in range(height1):
            for w in range(width1):
                output_image[w,h,d] = conv1[w,h,d]

    for d in range(depth2):
        for h in range(height2):
            for w in range(width2):
                output_image[w,h,d + depth1] = conv2[w,h,d] 

    return output_image

In [None]:
concat_res = Concatenation(test_image,test_image)
print(concat_res.shape,"\n",concat_res)

# YOLO

In [None]:
def yolo(img, anchors, yolo_size, objectness_threshold):
    height,width,depth = img.shape
    anchor_height, anchor_width = anchors.shape
    
    yolo_anchors = np.zeros((anchor_height//2, anchor_width), dtype=anchors.dtype)
    
    if(yolo_size == 1):
        for i in range(anchor_height // 2):
            for j in range(anchor_width):
                yolo_anchors[i,j] = anchors[i,j]
    else:
        for i in range(3, anchor_height):
           for j in range(anchor_width):
                yolo_anchors[i-3,j] = anchors[i,j]

    img_width, img_height = 416, 416
    cell_width = img_width/width 
    cell_height = img_height/height

    detected_boxes = []
    
    for h in range(height):
        for w in range(width):
            for b in range(anchor_height // 2):
                confidence = img[h,w,b * 2]
                if(confidence > objectness_threshold):
                    pred_x = img[h,w,(b * 2) + 1]
                    pred_y = img[h,w,(b * 2) + 2]
                    pred_width = img[h,w,(b * 2) + 3]
                    pred_height = img[h,w,(b * 2) + 4] 
                    
                    sigmoid_x = 1 / (1 + np.exp(-pred_x))
                    sigmoid_y = 1 / (1 + np.exp(-pred_y))
                    
                    x_absolute = (w + sigmoid_x) * cell_width
                    y_absolute = (h + sigmoid_y )* cell_height
                    width_absolute = np.exp(pred_width) * yolo_anchors[b, 0]  
                    height_absolute = np.exp(pred_height) * yolo_anchors[b, 1] 

                    detected_boxes.append([confidence, x_absolute, y_absolute, width_absolute, height_absolute, 1])
                    
    
    return detected_boxes

# NMS

In [None]:
def nms(detected_boxes,iou_threshold):
    detected_boxes = np.array(detected_boxes)
    # sorting the array
    for i in range(len(detected_boxes) - 1):
        for j in range(i + 1, len(detected_boxes)):
            if detected_boxes[i][0] < detected_boxes[j][0]:
                detected_boxes[i] = detected_boxes[j]
                detected_boxes[j] = detected_boxes[i]

    non_suppressed_boxes = []

    # starting iou calculations 
    # 0 -> deleted
    # 1 -> not chicked
    # 2 -> checked and added
    for i in range(len(detected_boxes)):
        if(detected_boxes[i,5] == 1):
            detected_boxes[i,5] = 2
            for j in range(len(detected_boxes)):
                if((j != i) and (detected_boxes[j,5] == 1)):
                    x1, y1, w1, h1 = detected_boxes[i,0], detected_boxes[i,1], detected_boxes[i,2], detected_boxes[i,3]
                    x2, y2, w2, h2 = detected_boxes[j,0], detected_boxes[j,1], detected_boxes[j,2], detected_boxes[j,3]

                    x_intersection, y_intersection, w_intersection, h_intersection = 0, 0, 0, 0
                    
                    if(x1 > x2):
                        x_intersection = x1
                    else:
                        x_intersection = x2
                    if(y1 > y2):
                        y_intersection = y1
                    else:
                        y_intersection = y2
                    if((x1 + w1) < (x2 + w2)):
                        w_intersection = (x1 + w1)
                    else:
                        w_intersection = (x2 + w2)
                    if((w_intersection - x_intersection) < 0):
                        w_intersection = 0
                    if((y1 + h1) < (y2 + h2)):
                        h_intersection = (y1 + h1)
                    else:
                        h_intersection = (y2 + h2)
                    if((h_intersection - y_intersection) < 0):
                        h_intersection = 0

                    area_intersection = w_intersection * h_intersection
                    area_box1 = w1 * h1
                    area_box2 = w2 * h2
                    area_union = area_box1 + area_box2 - area_intersection
                    iou = area_intersection / (area_union + 1e-6) 

                    if(iou > iou_threshold):
                        detected_boxes[j,5] = 0
                        
    for i in range(len(detected_boxes)):
        if(detected_boxes[i,5] == 2):
            non_suppressed_boxes.append(detected_boxes[i])

    return non_suppressed_boxes

In [None]:
image = np.random.rand(13, 13, 255).astype(np.float32)
anchors = np.array([10, 14, 23, 27, 37, 58, 81, 82, 135, 169, 344, 319]).reshape(-1, 2)

In [None]:
yolo_res = yolo(image, anchors, 1, 0.5)
yolo_res

In [None]:
nms_res = nms(yolo_res,0.7)
print(len(yolo_res),len(nms_res))
nms_res

# Helper methods

# Generate random 3d vector

In [1]:
import numpy as np

# 3D vector for image

In [2]:
def generate_random_3d_vector(height, width, depth):
    random_vector = np.random.uniform(0, 10, size=(height, width, depth))

    return random_vector

In [3]:
height = 32
width  = 32
depth  = 3

random_vector_32_32_3 = generate_random_3d_vector(height, width, depth)


print(f"Random 3D Vector (shape: {random_vector_32_32_3.shape}):")

Random 3D Vector (shape: (32, 32, 3)):


# 4D vectors for kernels

In [None]:
def generate_random_4d_vector(height, width, depth, filters):
    random_vector = np.random.uniform(0, 10, size=(height, width, depth, filters))

    return random_vector

In [None]:
krnl_height  = 3
krnl_width   = 3
krnl_depth   = 3
krnl_filters = 16
random_3d_vector_3_3_3_16 = generate_random_4d_vector(krnl_height, krnl_width, krnl_depth, krnl_filters)


print(f"Random 3D Vector (shape: {random_3d_vector_3_3_3_16.shape}):")

# Store the vector in a file 

# 3D image file

In [4]:
def write_vector_to_file(filename, vector):
    with open(filename, 'w') as file:
        file.write("(")
        for i in range(vector.shape[0]):
            if(i == 0):
                file.write("(")
            else:
                file.write("    (")
            for j in range(vector.shape[1]):
                if(j < (vector.shape[1] - 1)):
                    file.write("(" + ", ".join(map(str, vector[i, j])) + "), ")
                else:
                    file.write("(" + ", ".join(map(str, vector[i, j])) + ")")
            if(i < (vector.shape[0] - 1)):
                file.write("),\n")
            else:
                file.write(")")
        file.write(")")

In [5]:
filename = "random_vector_32_32_3.vhdl"
write_vector_to_file(filename, random_vector_32_32_3)

# 4D image file

In [None]:
def write_vector_to_file(filename, vector):
    with open(filename, 'w') as file:
        file.write("(")
        for i in range(vector.shape[0]):
            if(i == 0):
                file.write("(")
            else:
                file.write("    (")
            for j in range(vector.shape[1]):
                file.write("(")
                for k in range(vector.shape[2]):
                    if(k < (vector.shape[2] - 1)):
                        file.write("(" + ", ".join(map(str, vector[i, j, k])) + "), ")
                    else:
                        file.write("(" + ", ".join(map(str, vector[i, j, k])) + ")")
                if(j < (vector.shape[1] - 1)):
                    file.write("),\n")
                else:
                    file.write(")")        
            if(i < (vector.shape[0] - 1)):
                file.write("),\n")
            else:
                file.write(")")
        file.write(")")

In [None]:
filename = "random_vector_3_3_3_16.vhdl"
write_vector_to_file(filename, random_3d_vector_3_3_3_16)

# Perform padding on an image

In [None]:
def pad_3d_image(image, padding):

    pad_height, pad_width, pad_depth = padding

    padded_image = np.pad(image, ((pad_height, pad_height), (pad_width, pad_width), (pad_depth, pad_depth)), mode='constant')

    return padded_image

image_shape = (2, 2, 3)
image = np.random.random(image_shape)

padding = (1, 0, 0)
padded_image = pad_3d_image(image, padding)

print("Original Image Shape:", image.shape, "\n", image)
print("Padded Image Shape:", padded_image.shape, "\n", padded_image)


In [None]:
print(random_vector_416_416_3.shape)
print(random_3d_vector_3_3_16.shape)

In [None]:
res = three_d_conv(random_vector_416_416_3, random_3d_vector_3_3_3_16)

In [None]:
res_batch = 