In [1]:
import os
from __future__ import division

import torch 
import torch.nn as nn
import torch.nn.functional as F 
from torch.autograd import Variable
import numpy as np
import torch.optim as optim
torch.autograd.set_detect_anomaly(True)
cfgfile = "C:/Users/HX/Desktop/yolov4.cfg"
model_file_path = "Model/model.pt"
TL_model_file_path = "Model/TL_model.pt"

def read_cfg_file(cfgfile):
    file = open(cfgfile, 'r')
    lines = file.read().split('\n')

    layer_type = []
    layer_details = []
    current_layer_details = {}
    for line in lines:
        #print(line)
        if line == '':
            continue
        elif line[0] == '#':
            continue
        else:
            if (line[0] == '['):
                layer_type.append(line[1 : -1])
                if current_layer_details != {}:
                    layer_details.append(current_layer_details)
                    current_layer_details = {}
            else:
                current_layer_details.update([(line.split("=")[0].rstrip(), line.split("=")[1].lstrip())])
    layer_details.append(current_layer_details)
    return layer_type, layer_details

 

In [2]:
class Mish(torch.nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, x):
        x = x * torch.tanh(F.softplus(x))
        return x

class Conv_Layer_box(nn.Module):
    def __init__(self, in_channel, out_channel, kernel_size, stride, activation_func, batch_normalization):
        super().__init__()
        padding = (int((kernel_size - 1)/2), int((kernel_size - 1)/2))
        #TBC: linear
        dict_activation_func = {"ReLU": nn.ReLU(inplace=False),
                                "linear": nn.ReLU(inplace=False),
                                "leaky": nn.LeakyReLU(0.1, inplace=False),
                                "mish": Mish()
                               }
        
        if batch_normalization == True:
            bias = False
        else:
            bias = True
        self.conv_box = nn.ModuleList()
        self.conv_box.append(nn.Conv2d(in_channel, out_channel, kernel_size, stride, padding, bias = bias))
        if batch_normalization == True:
            self.conv_box.append(nn.BatchNorm2d(out_channel))
        self.conv_box.append(dict_activation_func[activation_func])
        
    def forward(self, x):
        for layer in self.conv_box:
            x = layer(x)
        return x
    
class Maxpool_pad_Layer_box(nn.Module):
    def __init__(self, maxpool_size):
        super().__init__()
        self.maxpool_size = maxpool_size
        #why there are 2 padding??????????????
        self.pad_1 = int((self.maxpool_size - 1) / 2)
        self.pad_2 = self.pad_1
    def forward(self, x):
        x = F.pad(x, (self.pad_1, self.pad_2, self.pad_1, self.pad_2), mode='replicate')
        x = F.max_pool2d(x, self.maxpool_size, stride=1)
        return x
    
class Upsample_layer(nn.Module):
    def __init__(self, stride):
        super().__init__()
        self.stride = stride
        
    def forward(self, x):
        batch, channel, height, width = x.data.size()
        x = x.view(batch, channel, height, 1, width, 1).expand(batch, channel, height, self.stride, width, self.stride).clone()
        x = x.contiguous().view(batch, channel, height * self.stride, width * self.stride).clone()
        return x
    

        
        
class shortcut(nn.Module):
    def __init__(self):
        super().__init__()
        
class route(nn.Module):
    def __init__(self):
        super().__init__()

In [3]:
def cross_length(a_1, a_2, b_1, b_2):
    if a_1 <=b_1 and a_2 >= b_1:
        return (min(a_2, b_2) - b_1)
    elif a_1 <=b_1 and a_2 <= b_1:
        return 0
    else:
        return cross_length(b_1, b_2, a_1, a_2)

def IoU(x_GT, y_GT, h_GT, w_GT, x_PD, y_PD, h_PD, w_PD):
    area_of_I = cross_length(x_GT, x_GT + h_GT, x_PD, x_PD + h_PD) * cross_length(y_GT, y_GT + h_GT, y_PD, y_PD + h_PD)
    area_of_U = h_GT * w_GT + h_PD * w_PD - area_of_I
    return area_of_I / area_of_U



def axis_conversion(x_centre, y_centre, h, w):
    return (x_centre - h / 2, y_centre - w / 2, h, w)

In [4]:
25/(64)
axis_conversion(6, 6, 3,3)
#IoU(11,11,8,8,12,12,5,5)

(4.5, 4.5, 3, 3)

In [5]:
Yolo_input = np.array([1.0 for i in range(255 * 76 * 76 * 2)]).reshape(2, 255, 76, 76)
Yolo_input = torch.from_numpy(Yolo_input)
#input[:,:,0,0] = 2
#input[:,:,0,0]
anchors = [12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401]
mask = [0, 1, 2]
classes = 80
input_image_size = 608

In [6]:
#add_on_Matrix_x = torch.from_numpy(np.array([[i for j in range(19)] for i in range(19)]))
#add_on_Matrix_y = [[i for i in range(19)] for j in range(19)]
#add_on_Matrix_x

In [7]:
class Yolo(nn.Module):
    def __init__(self, anchors, mask, classes, input_image_size):
        super().__init__()
        self.anchors = anchors
        self.mask = mask
        self.classes = classes
        self.number_of_mask = len(mask)
        self.input_image_size = input_image_size
        #self.Sigmoid_layer = nn.Sigmoid()

    def forward(self, x):
        
        mask = self.mask
        anchors = self.anchors
        classes = self.classes
        number_of_mask = self.number_of_mask
        input_image_size = self.input_image_size
        grid_size = int(input_image_size / x.size(2))
        #print(grid_size)
        #t_x = torch.from_numpy(np.array([0.0 for i in range(number_of_mask * x.size(0) * x.size(2) * x.size(3))]).reshape(number_of_mask * x.size(0), 1, x.size(2), x.size(3)))
        t_x = [None for i in range(number_of_mask)]
        t_y = [None for i in range(number_of_mask)]
        t_w = [None for i in range(number_of_mask)]
        t_h = [None for i in range(number_of_mask)]
        objective_p = [None for i in range(number_of_mask)]
        class_p = [None for i in range(number_of_mask)]
        
        #c_x = [i for i in range(x.size(2))]
        #c_y = [i for i in range(x.size(2))]
        
        add_on_Matrix_x = torch.from_numpy(np.array([[i for j in range(x.size(2))] for i in range(x.size(2))])).cuda()
        add_on_Matrix_y = torch.from_numpy(np.array([[i for i in range(x.size(2))] for j in range(x.size(2))])).cuda()
        
        b_x = [None for i in range(number_of_mask)]
        b_y = [None for i in range(number_of_mask)]
        b_w = [None for i in range(number_of_mask)]
        b_h = [None for i in range(number_of_mask)]
        
        
        anchor_shape_1 = int(len(anchors) / 2)
        anchors = np.array(anchors).reshape(anchor_shape_1, 2)
        
        #print(anchors)
        
        p_w = [None for i in range(number_of_mask)]
        p_h = [None for i in range(number_of_mask)]
        
        
        for i in range(number_of_mask):
            start_point = i * (5 + classes)
            end_point = (i + 1) * (5 + classes)
            p_w[i], p_h[i] = anchors[mask[i]]
            #print(p_w)
            #print(p_h)
            
            t_x[i] = x[:, (start_point + 0) : (start_point + 1), :, :].clone()
            
            t_y[i] = x[:, (start_point + 1) : (start_point + 2), :, :].clone()
            t_w[i] = x[:, (start_point + 2) : (start_point + 3), :, :].clone()
            t_h[i] = x[:, (start_point + 3) : (start_point + 4), :, :].clone()
            objective_p[i] = x[:, (start_point + 4) : (start_point + 5), :, :].clone()
            class_p[i] = x[:, (start_point + 5) : end_point, :, :].clone()
            #print(type(t_x[i]))
            
            #print(F.sigmoid(t_x[i].clone()))
            b_x[i] = torch.sigmoid(t_x[i].clone()) + add_on_Matrix_x
            #print(b_x[i])
            b_y[i] = torch.sigmoid(t_y[i].clone()) + add_on_Matrix_y
            
            #b_x[i][0, 0, :, :] = b_x[i][0, 0, :, :].clone()
            #b_y[i][0, 0, :, :] = b_y[i][0, 0, :, :].clone()
            
            """
            print(t_x[i].size())
            for m in range(x.size(2)):
                for n in range(x.size(2)):
                    b_x[i][:, :, c_x[m], c_y[n]] = c_x[m] + b_x[i][:, :, c_x[m], c_y[n]].clone()
                    b_y[i][:, :, c_x[m], c_y[n]] = c_y[n] + b_y[i][:, :, c_x[m], c_y[n]].clone()
            """
            #need to think whether need to use below 2 lines
            b_x[i] = grid_size * b_x[i].clone()
            b_y[i] = grid_size * b_y[i].clone()
            b_w[i] = p_w[i] * torch.exp(t_w[i].clone())
            b_h[i] = p_h[i] * torch.exp(t_h[i].clone())
            
            objective_p[i] = torch.sigmoid(objective_p[i].clone())
            class_p[i] = torch.sigmoid(class_p[i].clone())
            #torch.reshape(t_x[i])
        
        b_x = torch.stack(b_x).clone()
        b_y = torch.stack(b_y).clone()
        b_w = torch.stack(b_w).clone()
        b_h = torch.stack(b_h).clone()
        objective_p = torch.stack(objective_p).clone()
        class_p = torch.stack(class_p).clone()
        combined_yolo_output = torch.cat((b_x, b_y, b_w, b_h, objective_p, class_p), 2)
        #return b_x, b_y, b_w, b_h, objective_p, class_p
        #return combined_yolo_output
        
        
        #b_x = torch.stack(b_x).clone()
        return combined_yolo_output


"""

#Yolo_run_layer = Yolo(anchors, mask, classes, input_image_size)

#b_x, b_y, b_w, b_h, objective_p, class_p = Yolo_run_layer(Yolo_input)
#combined_yolo_output = torch.cat((b_x, b_y, b_w, b_h, objective_p, class_p), 2)

Yolo_input = np.array([1.0 for i in range(255 * 76 * 76 * 2)]).reshape(2, 255, 76, 76)
Yolo_input = torch.from_numpy(Yolo_input)
target = np.array([0 for i in range(3 * 76 * 76 * 85)])
input_tensor = Yolo_input
output_tensor = torch.Tensor(target)

learning_rate = 0.08
epoch_size = 5
steps_for_printing_out_loss = 1

YOLO_Module_WIP = Yolo(anchors, mask, classes, input_image_size)
YOLO_Module_WIP.cuda()
#Model_WIP.to(device)
loss_functioin = nn.MSELoss()
optimizer = optim.SGD(YOLO_Module_WIP.parameters(), lr = learning_rate)

input = input_tensor.cuda()
target = output_tensor.cuda()

def training_model():
    for i in range(1, epoch_size + 1):
        optimizer.zero_grad()
        output = YOLO_Module_WIP(input.cuda())
        print(output.size())
        #b_x, b_y, b_w, b_h, objective_p, class_p = YOLO_v4_Module_WIP(input.cuda())
        #output = b_x
        loss = loss_functioin(output, target.reshape(output.size(0), output.size(1), output.size(2), output.size(3), output.size(4)))
        loss.backward()
        optimizer.step()
        if i % (steps_for_printing_out_loss) == 0:
            print('Loss (epoch: ' + str(i) + '): ' + str(loss.cpu().detach().numpy()))
    torch.save({'state_dict': YOLO_v4_Module_WIP.state_dict(),'optimizer': optimizer.state_dict()}, model_file_path)

training_model()
"""

"\n\n#Yolo_run_layer = Yolo(anchors, mask, classes, input_image_size)\n\n#b_x, b_y, b_w, b_h, objective_p, class_p = Yolo_run_layer(Yolo_input)\n#combined_yolo_output = torch.cat((b_x, b_y, b_w, b_h, objective_p, class_p), 2)\n\nYolo_input = np.array([1.0 for i in range(255 * 76 * 76 * 2)]).reshape(2, 255, 76, 76)\nYolo_input = torch.from_numpy(Yolo_input)\ntarget = np.array([0 for i in range(3 * 76 * 76 * 85)])\ninput_tensor = Yolo_input\noutput_tensor = torch.Tensor(target)\n\nlearning_rate = 0.08\nepoch_size = 5\nsteps_for_printing_out_loss = 1\n\nYOLO_Module_WIP = Yolo(anchors, mask, classes, input_image_size)\nYOLO_Module_WIP.cuda()\n#Model_WIP.to(device)\nloss_functioin = nn.MSELoss()\noptimizer = optim.SGD(YOLO_Module_WIP.parameters(), lr = learning_rate)\n\ninput = input_tensor.cuda()\ntarget = output_tensor.cuda()\n\ndef training_model():\n    for i in range(1, epoch_size + 1):\n        optimizer.zero_grad()\n        output = YOLO_Module_WIP(input.cuda())\n        print(output

In [8]:
class Yolo_TL(nn.Module):
    def __init__(self, anchors, mask, classes, input_image_size):
        super().__init__()
        self.anchors = anchors
        self.mask = mask
        self.classes = classes
        self.number_of_mask = len(mask)
        self.input_image_size = input_image_size
        #self.Sigmoid_layer = nn.Sigmoid()

    def forward(self, x):
        
        mask = self.mask
        anchors = self.anchors
        classes = self.classes
        number_of_mask = self.number_of_mask
        input_image_size = self.input_image_size
        grid_size = int(input_image_size / x.size(2))
        #print(grid_size)
        #t_x = torch.from_numpy(np.array([0.0 for i in range(number_of_mask * x.size(0) * x.size(2) * x.size(3))]).reshape(number_of_mask * x.size(0), 1, x.size(2), x.size(3)))
        t_x = [None for i in range(number_of_mask)]
        t_y = [None for i in range(number_of_mask)]
        t_w = [None for i in range(number_of_mask)]
        t_h = [None for i in range(number_of_mask)]
        objective_p = [None for i in range(number_of_mask)]
        class_p = [None for i in range(number_of_mask)]
        
        #c_x = [i for i in range(x.size(2))]
        #c_y = [i for i in range(x.size(2))]
        
        #add_on_Matrix_x = torch.from_numpy(np.array([[i for j in range(x.size(2))] for i in range(x.size(2))])).cuda()
        #add_on_Matrix_y = torch.from_numpy(np.array([[i for i in range(x.size(2))] for j in range(x.size(2))])).cuda()
        
        b_x = [None for i in range(number_of_mask)]
        b_y = [None for i in range(number_of_mask)]
        b_w = [None for i in range(number_of_mask)]
        b_h = [None for i in range(number_of_mask)]
        
        
        anchor_shape_1 = int(len(anchors) / 2)
        anchors = np.array(anchors).reshape(anchor_shape_1, 2)
        
        #print(anchors)
        
        #p_w = [None for i in range(number_of_mask)]
        #p_h = [None for i in range(number_of_mask)]
        
        
        for i in range(number_of_mask):
            start_point = i * (5 + classes)
            end_point = (i + 1) * (5 + classes)
            #p_w[i], p_h[i] = anchors[mask[i]]
            #print(p_w)
            #print(p_h)
            
            t_x[i] = x[:, (start_point + 0) : (start_point + 1), :, :].clone()
            
            t_y[i] = x[:, (start_point + 1) : (start_point + 2), :, :].clone()
            t_w[i] = x[:, (start_point + 2) : (start_point + 3), :, :].clone()
            t_h[i] = x[:, (start_point + 3) : (start_point + 4), :, :].clone()
            objective_p[i] = x[:, (start_point + 4) : (start_point + 5), :, :].clone()
            class_p[i] = x[:, (start_point + 5) : end_point, :, :].clone()

            b_x[i] = torch.sigmoid(t_x[i].clone())
            b_y[i] = torch.sigmoid(t_y[i].clone())

            b_w[i] = torch.exp(t_w[i].clone())
            b_h[i] = torch.exp(t_h[i].clone())
            
            objective_p[i] = torch.sigmoid(objective_p[i].clone())
            class_p[i] = torch.sigmoid(class_p[i].clone())
            #torch.reshape(t_x[i])
        
        b_x = torch.stack(b_x).clone()
        b_y = torch.stack(b_y).clone()
        b_w = torch.stack(b_w).clone()
        b_h = torch.stack(b_h).clone()
        objective_p = torch.stack(objective_p).clone()
        class_p = torch.stack(class_p).clone()
        combined_yolo_output = torch.cat((b_x, b_y, b_w, b_h, objective_p, class_p), 2)
        #return b_x, b_y, b_w, b_h, objective_p, class_p
        #return combined_yolo_output
        
        
        #b_x = torch.stack(b_x).clone()
        return combined_yolo_output

In [9]:
#combined_yolo_output

In [10]:
layer_type, layer_details = read_cfg_file(cfgfile)

net_layer = layer_details[0]

layer_type = layer_type[1:]
layer_details = layer_details[1:]

print(len(layer_type))
print(len(layer_details))
print(layer_details[1])

162
162
{'batch_normalize': '1', 'filters': '64', 'size': '3', 'stride': '2', 'pad': '1', 'activation': 'mish'}


In [11]:
"""
anchor = 3
#yolo_layer = 3
output = (19 ** 2) * (1 + 4 + 16)
anchor * output
"""

'\nanchor = 3\n#yolo_layer = 3\noutput = (19 ** 2) * (1 + 4 + 16)\nanchor * output\n'

In [12]:
#build module for entire YOLO
class YOLO_v4_model(nn.Module):
    def __init__(self, layer_details, layer_type):
        super(YOLO_v4_model, self).__init__()
        self.all_layers = nn.ModuleList()
        all_layers = self.all_layers
        self.layer_details = layer_details
        self.layer_type = layer_type

        for i in range(len(layer_type)):
            if layer_type[i] == 'convolutional':
                #print(layer_details[i])
                #print(i)
                try:
                    if int(layer_details[i]['batch_normalize']) == 1:
                        batch_normalize = True
                    else:
                        batch_normalize = False
                except:
                    batch_normalize = False
                if i == 0:
                    in_channel = 3
                else:
                    in_channel = None
                    if layer_type[i - 1] == 'convolutional':
                        skip_step = [0]
                    elif layer_type[i - 1] == 'shortcut':
                        skip_step = [int(layer_details[i - 1]['from'])]
                    elif layer_type[i - 1] == 'route':
                        skip_step = layer_details[i - 1]['layers'].split(",")
                        
                    """
                    if skip_step > 0:
                        in_channel = int(layer_details[skip_step]['filters'])
                    else:
                        in_channel = int(layer_details[i - 1 + skip_step]['filters'])
                    """
                    for SS in skip_step:
                        SS = int(SS)
                        if SS > 0:
                            if in_channel == None:
                                in_channel = int(layer_details[SS]['filters'])
                            else:
                                in_channel += int(layer_details[SS]['filters'])
                        else:
                            if in_channel == None:
                                in_channel = int(layer_details[i - 1 + SS]['filters'])
                            else:
                                in_channel += int(layer_details[i - 1 + SS]['filters'])
                        
                out_channel = int(layer_details[i]['filters'])
                kernel_size = int(layer_details[i]['size'])
                stride = int(layer_details[i]['stride'])
                pad = int(layer_details[i]['pad'])
                activation_func = layer_details[i]['activation']
                layer = Conv_Layer_box(in_channel, out_channel, kernel_size, stride, activation_func, batch_normalize)
                #print(layer)
            elif layer_type[i] == 'maxpool':
                layer_details[i].update([('filters', layer_details[i - 1]['filters'])])
                maxpool_size = int(layer_details[i]['size'])
                #print(maxpool_size)
                layer = Maxpool_pad_Layer_box(maxpool_size)
                #print(layer)
            elif layer_type[i] == 'upsample':
                layer_details[i].update([('filters', layer_details[i - 1]['filters'])])
                stride = int(layer_details[i]['stride'])
                layer = Upsample_layer(stride)
            elif layer_type[i] == 'yolo':
                #print("yolo")
                anchors = [int(x) for x in layer_details[i]['anchors'].split(",")]
                
                mask = [int(x) for x in layer_details[i]['mask'].split(",")]
                classes = int(layer_details[i]['classes'])
                #input image size = 608 for now
                layer = Yolo(anchors, mask, classes, input_image_size)
                #print(anchors)
                #print(classes)
                #print(input_image_size)
                #continue
            elif layer_type[i] == 'shortcut':
                skip_step = int(layer_details[i]['from'])
                layer_details[i].update([('filters', layer_details[i + skip_step]['filters'])])
                layer = shortcut()
            elif layer_type[i] == 'route':
                try:
                    skip_step = int(layer_details[i]['layers'].split(",")[0])
                except:
                    skip_step = int(layer_details[i]['layers'])
                #print(skip_step)
                if skip_step > 0:
                    layer_details[i].update([('filters', layer_details[skip_step]['filters'])])
                else:
                    layer_details[i].update([('filters', layer_details[i + skip_step]['filters'])])
                layer = route()
            elif layer_type[i] == 'net':
                #print("net")
                continue
            else:
                continue

            all_layers.append(layer)
        global all_layerrr
        all_layerrr = all_layers

    def forward(self, x):
        all_layers = self.all_layers
        layers_output = [None for i in range(len(layer_type))]
        for i in range(len(layer_type)):
            #print(i)
            if i == 0:
                layers_output[i] = all_layers[i](x)
                continue
                
            elif layer_type[i] == 'yolo':
                layers_output[i] = all_layers[i](layers_output[i - 1])
                continue
            elif layer_type[i] == 'convolutional' or layer_type[i] == 'maxpool' or layer_type[i] == 'upsample' or layer_type[i] == 'yolo':
                layers_output[i] = all_layers[i](layers_output[i - 1])
                """
                try:
                    print("i: " + str(i) + str(layers_output[i].size()))
                except:
                    print("go")
                """
                continue
            elif layer_type[i] == 'shortcut':
                skip_step = [int(layer_details[i]['from'])]
            elif layer_type[i] == 'route':
                skip_step = layer_details[i]['layers'].split(",")
            for SS in skip_step:
                SS = int(SS)
                #print("SS" + str(i) + str(SS))
                #print(skip_step)
                
                #print(SS)
                #print(layers_output[i])
                #print(layers_output[i - 1 + SS])
                
                if SS > 0:
                    if layers_output[i] == None:
                        #print(layers_output[SS].size())
                        layers_output[i] = layers_output[SS]
                    else:
                        #print(layers_output[SS].size())
                        layers_output[i] = torch.cat((layers_output[i], layers_output[SS]), 1)
                else:
                    #print(i + SS)
                    if layers_output[i] == None:
                        #print(layers_output[i + SS].size())
                        layers_output[i] = layers_output[i + SS]
                    else:
                        
                        #print(layers_output[i + SS].size())
                        layers_output[i] = torch.cat((layers_output[i], layers_output[i + SS]), 1)
            """
            try:
                print("i: " + str(i) + str(layers_output[i].size()))
            except:
                print("go")
            """
        #print(layers_output[138].size())
        return layers_output[137], layers_output[148], layers_output[159]
    



In [13]:

input = np.array([1 for i in range(608 * 608 * 3 * 1)]).reshape(1, 3, 608, 608)
#target = np.array([0 for i in range(7 * 7 * 30)])
target = np.array([0 for i in range(3 * 19 * 19 * 85)])

input_tensor = torch.Tensor(input)
output_tensor = torch.Tensor(target)

#x = np.array([1 for i in range(608 * 608 * 3)]).reshape(1, 3, 608, 608)
#x = torch.tensor(x)

learning_rate = 0.08
epoch_size = 2
steps_for_printing_out_loss = 1

YOLO_v4_Module_WIP = YOLO_v4_model(layer_details, layer_type)
YOLO_v4_Module_WIP.cuda()



#YOLO_v4_Module_WIP.load_state_dict(torch.load("D:/Installation/yolov4.pt"))

#YOLO_v4_Module_WIP.load_weights("‪D:/Installation/yolov4.weights")
#Model_WIP.to(device)
loss_function = nn.MSELoss()
optimizer = optim.SGD(YOLO_v4_Module_WIP.parameters(), lr = learning_rate)


"""

for name, param in YOLO_v4_Module_WIP.named_parameters():
    print('name: ', name)
    print(type(param))
    print('param.shape: ', param.shape)
    print('param.requires_grad: ', param.requires_grad)
    print('=====')
#transfer learning:


for name, param in model.named_parameters():
    if name in ['fc.weight', 'fc.bias']:
        param.requires_grad = True
    else:
        param.requires_grad = False
"""
#YOLO_v4_Module_WIP.load_state_dict(torch.load("C:/Users/HX/Desktop/model.pt")['state_dict'])
#YOLO_v4_Module_WIP.eval()
input = input_tensor.cuda()
target = output_tensor.cuda()

def training_model():
    for i in range(1, epoch_size + 1):
        optimizer.zero_grad()
        output = YOLO_v4_Module_WIP(input.cuda())
        #print(output.size())
        #b_x, b_y, b_w, b_h, objective_p, class_p = YOLO_v4_Module_WIP(input.cuda())
        #output = b_x
        #loss = loss_function(output, target.reshape(output.size(0), output.size(1), output.size(2), output.size(3)))
        global output_tensor
        output_tensor = output
        loss = loss_function(output, target.reshape(output.size(0), output.size(1), output.size(2), output.size(3), output.size(4)))
        loss.backward()
        optimizer.step()
        if i % (steps_for_printing_out_loss) == 0:
            print('Loss (epoch: ' + str(i) + '): ' + str(loss.cpu().detach().numpy()))
    #torch.save({'state_dict': YOLO_v4_Module_WIP.state_dict(),'optimizer': optimizer.state_dict()}, model_file_path)


#training_model()
#torch.save({'output': output_tensor}, 'Model/output.pt')
#YOLO_v4_Module_WIP.state_dict()

In [14]:
m = []

In [15]:
training_data_image_folder = "F:/FlyAI/UnderwaterDetection_roundA/train-A/image/"
batch_size = 1
import math
import glob
image_path_list = glob.glob(training_data_image_folder + "*.jpg")

batch_no = math.ceil(len(image_path_list) / batch_size)
print(batch_no)



def image_reader(image_path_list):
    from PIL import Image
    import numpy as np
    final_output_array = []
    for image_path in image_path_list:
        image = Image.open(image_path)
        image = image.resize((608, 608))
        image = np.array(image)
        image_array = np.array([0 for i in range(3 * 608 * 608)]).reshape(3, 608, 608)
        for i in range(3):
            image_array[i] = image[:, :, i]
        #print(image_array.shape)
        final_output_array.append(image_array)
    return np.array(final_output_array)


def running_YOLO_v4_model(input):

    output = YOLO_v4_Module_WIP(input_tensor)
    return output

#########convert original image into output from YOLOv4
"""
YOLO_v4_Module_WIP = YOLO_v4_model(layer_details, layer_type)
YOLO_v4_Module_WIP.cuda()
YOLO_v4_Module_WIP.load_state_dict(torch.load("C:/Users/HX/Desktop/model.pt")['state_dict'])
YOLO_v4_Module_WIP.eval()
    
for current_batch_no in range(3774, len(image_path_list) // batch_size):
    input = image_reader(image_path_list[batch_size * current_batch_no: batch_size * (current_batch_no + 1)])
    input_tensor = torch.Tensor(input).cuda()
    #print(input.shape)
    output_137, output_148, output_159 = running_YOLO_v4_model(input_tensor)
    print(output_137.shape)
    file_name = 'F:/FlyAI/TL_input_data/' + str(current_batch_no) + '.pt'
    torch.save({'output_137': output_137, 'output_148': output_148, 'output_159': output_159}, file_name)
    
"""
#torch.save({'output_137': output_137}, 'Model/output.pt')

6575


'\nYOLO_v4_Module_WIP = YOLO_v4_model(layer_details, layer_type)\nYOLO_v4_Module_WIP.cuda()\nYOLO_v4_Module_WIP.load_state_dict(torch.load("C:/Users/HX/Desktop/model.pt")[\'state_dict\'])\nYOLO_v4_Module_WIP.eval()\n    \nfor current_batch_no in range(3774, len(image_path_list) // batch_size):\n    input = image_reader(image_path_list[batch_size * current_batch_no: batch_size * (current_batch_no + 1)])\n    input_tensor = torch.Tensor(input).cuda()\n    #print(input.shape)\n    output_137, output_148, output_159 = running_YOLO_v4_model(input_tensor)\n    print(output_137.shape)\n    file_name = \'F:/FlyAI/TL_input_data/\' + str(current_batch_no) + \'.pt\'\n    torch.save({\'output_137\': output_137, \'output_148\': output_148, \'output_159\': output_159}, file_name)\n    \n'

In [22]:
#all_layerrr

class transfer_learning_model(nn.Module):
    def __init__(self):
        super().__init__()
        self.anchors = [12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401]
        self.mask_a = [0, 1, 2]
        self.mask_b = [3, 4, 5]
        self.mask_c = [6, 7, 8]
        #self.TL_model_list = nn.ModuleList()
        #TL_model_list.append(Conv_Layer_box(in_channel = 256, out_channel = 255, kernel_size = 1, stride = 1, activation_func = 'linear', batch_normalize = False))
        self.Conv_Layer_76_a = Conv_Layer_box(in_channel = 256, out_channel = 255, kernel_size = 1, stride = 1, activation_func = 'linear', batch_normalization = False)
        self.Sigmoid_layer_76 = nn.Sigmoid()
        self.Conv_Layer_76_b = Conv_Layer_box(in_channel = 255, out_channel = 27, kernel_size = 1, stride = 1, activation_func = 'linear', batch_normalization = False)
        #self.Yolo_Layer_76 = Yolo(self.anchors, self.mask_a, classes = 4, input_image_size = 608)
        self.Yolo_Layer_76 = Yolo_TL(self.anchors, self.mask_a, classes = 4, input_image_size = 608)
        
        self.Conv_Layer_38_a = Conv_Layer_box(in_channel = 512, out_channel = 255, kernel_size = 1, stride = 1, activation_func = 'linear', batch_normalization = False)
        self.Sigmoid_layer_38 = nn.Sigmoid()
        self.Conv_Layer_38_b = Conv_Layer_box(in_channel = 255, out_channel = 27, kernel_size = 1, stride = 1, activation_func = 'linear', batch_normalization = False)
        #self.Yolo_Layer_38 = Yolo(self.anchors, self.mask_b, classes = 4, input_image_size = 608)
        self.Yolo_Layer_38 = Yolo_TL(self.anchors, self.mask_b, classes = 4, input_image_size = 608)
        
        self.Conv_Layer_19_a = Conv_Layer_box(in_channel = 1024, out_channel = 255, kernel_size = 1, stride = 1, activation_func = 'linear', batch_normalization = False)
        self.Sigmoid_layer_19 = nn.Sigmoid()
        self.Conv_Layer_19_b = Conv_Layer_box(in_channel = 255, out_channel = 27, kernel_size = 1, stride = 1, activation_func = 'linear', batch_normalization = False)
        #self.Yolo_Layer_19 = Yolo(self.anchors, self.mask_c, classes = 4, input_image_size = 608)
        self.Yolo_Layer_19 = Yolo_TL(self.anchors, self.mask_c, classes = 4, input_image_size = 608)
        
    def forward(self, layer_137_out, layer_148_out, layer_159_out):
        out_76_1 = self.Conv_Layer_76_a(layer_137_out)
        out_76_1 = self.Sigmoid_layer_76(out_76_1)
        out_76_1 = self.Conv_Layer_76_b(out_76_1)
        out_76_1 = self.Yolo_Layer_76(out_76_1)
        
        out_38_1 = self.Conv_Layer_38_a(layer_148_out)
        out_38_1 = self.Sigmoid_layer_38(out_38_1)
        out_38_1 = self.Conv_Layer_38_b(out_38_1)
        out_38_1 = self.Yolo_Layer_38(out_38_1)
        
        out_19_1 = self.Conv_Layer_19_a(layer_159_out)
        out_19_1 = self.Sigmoid_layer_19(out_19_1)
        out_19_1 = self.Conv_Layer_19_b(out_19_1)
        out_19_1 = self.Yolo_Layer_19(out_19_1)
        #another option: only use 1 conv layer b, and Yolo layer
        return out_76_1, out_38_1, out_19_1
        

In [17]:
import glob
from PIL import ImageTk, Image
import numpy as np
import xml.etree.ElementTree as ET
#import training data

round_A_data_folder = "F:/FlyAI/UnderwaterDetection_roundA/"
round_A_image_folder = round_A_data_folder + "train-A/image/"
round_A_box_folder = round_A_data_folder + "train-A/box/"

box_file_path_list = glob.glob(round_A_box_folder + "*.xml")
box_file_name_list = [x.split('\\')[1] for x in box_file_path_list]

image_file_name_list = [(x.split('.')[0] + '.jpg') for x in box_file_name_list]
image_file_path_list = [(round_A_image_folder + x) for x in image_file_name_list]


def read_image(image_file_path_list):
    image_array_list = []
    for image_file_path in image_file_path_list:
        image = Image.open(image_file_path)
        image_data = image.getdata()
        image_array = np.array(image_data)
        image_array_list.append(image_array)
    return image_array_list
#best IOU anchor
def cross_length(a_1, a_2, b_1, b_2):
    if a_1 <=b_1 and a_2 >= b_1:
        return (min(a_2, b_2) - b_1)
    elif a_1 <=b_1 and a_2 <= b_1:
        return 0
    else:
        return cross_length(b_1, b_2, a_1, a_2)
"""
def IoU(x_GT, y_GT, w_GT, h_GT, x_PD, y_PD, w_PD, h_PD):
    area_of_I = cross_length(x_GT, x_GT + w_GT, x_PD, x_PD + w_PD) * cross_length(y_GT, y_GT + h_GT, y_PD, y_PD + h_PD)
    area_of_U = h_GT * w_GT + h_PD * w_PD - area_of_I
    return area_of_I / area_of_U
"""
anchors = [12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401]
anchor_shape_1 = int(len(anchors) / 2)
anchors = np.array(anchors).reshape(anchor_shape_1, 2)
def best_anchor(image_info):
    max_IoU = 0
    for m in range(9):
        x_GT = 0
        y_GT = 0
        w_GT = image_info[9]
        h_GT = image_info[10]
        x_PD = image_info[9] / 2 - anchors[m][0] / 2
        y_PD = image_info[10] / 2 - anchors[m][1] / 2
        w_PD = anchors[m][0]
        h_PD = anchors[m][1]
        
        current_IOU = IoU(x_GT, y_GT, w_GT, h_GT, x_PD, y_PD, w_PD, h_PD)
        #print(current_IOU)
        if current_IOU >= max_IoU:
            max_IoU = current_IOU
            selected_anchors = m
    
    if selected_anchors < 3:
        grid_size = 608 / 76
    elif selected_anchors < 6:
        grid_size = 608 / 38
    elif selected_anchors < 9:
        grid_size = 608 / 19
    
    grid_no_x = int((image_info[2] + image_info[4]) / 2 / grid_size)
    grid_no_y = int((image_info[3] + image_info[5]) / 2 / grid_size)
    
    w_expanded_time = image_info[9] / w_PD
    h_expanded_time = image_info[10] / h_PD
    
    position_x = (image_info[2] + image_info[4]) / 2 / grid_size - grid_no_x
    position_y = (image_info[3] + image_info[5]) / 2 / grid_size - grid_no_y
    
    #print(image_info)
    #print(grid_no_x)
    #print(grid_no_y)
    return selected_anchors, grid_no_x, grid_no_y, position_x, position_y, w_expanded_time, h_expanded_time

def read_xml_into_training_data(box_file_path_list):
    All_image = []
    for box_file_path in box_file_path_list:
        dict_object = {"holothurian": 0, "echinus": 1, "scallop": 2, "starfish": 3}
        tree = ET.parse(box_file_path)
        root = tree.getroot()
        """
        root.tag
        root.attrib
        
        for child in root:
            for sub_child in child:
                print(sub_child.tag)
        """
        
        object = [None for i in range(6)]
        All_object = []
        All_image_size = []
        image_size = [None, None]
        frame_name = []
        image_size_width = []
        image_size_height = []
        object_name = []
        object_type = []
        object_xmin = []
        object_ymin = []
        object_xmax = []
        object_ymax = []

        for name in root.findall("./frame"):
            frame_name.append(name.text)
        for name in root.findall("./size/width"):
            image_size_width.append(int(name.text))
        for name in root.findall("./size/height"):
            image_size_height.append(int(name.text))

        for name in root.findall("./object/name"):
            object_name.append(name.text)

        for name in root.findall("./object/bndbox/xmin"):
            object_xmin.append(int(name.text))
        for name in root.findall("./object/bndbox/ymin"):
            object_ymin.append(int(name.text))
        for name in root.findall("./object/bndbox/xmax"):
            object_xmax.append(int(name.text))
        for name in root.findall("./object/bndbox/ymax"):
            object_ymax.append(int(name.text))

        for i in range(len(object_name)):
            current_object = []
            current_object.append(object_name[i])
            current_object.append(dict_object[object_name[i]])
            current_object.append(object_xmin[i])
            current_object.append(object_ymin[i])
            current_object.append(object_xmax[i])
            current_object.append(object_ymax[i])
            current_object.append(frame_name[0])
            current_object.append(image_size_width[0])
            current_object.append(image_size_height[0])
            All_object.append(current_object)
        All_image.append(All_object)
    return All_image


training_input_data = read_image(image_file_path_list[0: 2])

All_image = read_xml_into_training_data(box_file_path_list[0:200])

yolo_size = 608

for i in range(len(All_image)):
    for j in range(len(All_image[i])):
        All_image[i][j][2] = All_image[i][j][2] / All_image[i][j][7] * yolo_size
        All_image[i][j][4] = All_image[i][j][4] / All_image[i][j][7] * yolo_size
        All_image[i][j][3] = All_image[i][j][3] / All_image[i][j][8] * yolo_size
        All_image[i][j][5] = All_image[i][j][5] / All_image[i][j][8] * yolo_size
        All_image[i][j].append(All_image[i][j][4] - All_image[i][j][2])
        All_image[i][j].append(All_image[i][j][5] - All_image[i][j][3])
        selected_anchors, grid_no_x, grid_no_y, position_x, position_y, w_expanded_time, h_expanded_time = best_anchor(All_image[i][j])
        All_image[i][j].append(selected_anchors)
        All_image[i][j].append(grid_no_x)
        All_image[i][j].append(grid_no_y)
        All_image[i][j].append(position_x)
        All_image[i][j].append(position_y)
        All_image[i][j].append(w_expanded_time)
        All_image[i][j].append(h_expanded_time)
        
All_image[0][0]

['holothurian',
 0,
 225.46666666666667,
 233.62962962962962,
 265.3666666666667,
 322.5777777777778,
 '000001',
 1920,
 1080,
 39.900000000000006,
 88.94814814814816,
 3,
 15,
 17,
 0.33854166666666785,
 0.38148148148147953,
 0.08692810457516341,
 0.22181583079338693]

In [None]:
2.3/2 -1

In [37]:
#torch.autograd.set_detect_anomaly(True)
#training loop for transfer layer
def TL_model_training():
    learning_rate = 0.002
    epoch_size = 100
    batch_size = 1
    steps_for_printing_out_loss = 1
    
    loss_function_MSE = nn.MSELoss(size_average=False)
    loss_function_BCE = nn.BCELoss(size_average=False)
    
    #loss_function_MSE = nn.MSELoss()
    #loss_function_BCE = nn.BCELoss()
    
    TL_model = transfer_learning_model().cuda()
    
    #TL_model.load_state_dict(torch.load('Model/TL_model_starting_point.pt')['state_dict'])
    optimizer = optim.SGD(TL_model.parameters(), lr = learning_rate)
    TL_model.eval()
    
    for i in range(1, epoch_size + 1):
        #loss = 0
        optimizer.zero_grad()
        total_loss = 0
        for image_pt_name in range(0, 1):
            
            file_name = 'F:/FlyAI/TL_input_data/' + str(image_pt_name) + '.pt'
            input_data = torch.load(file_name)
            layer_137_out = input_data['output_137'].cuda()
            layer_148_out = input_data['output_148'].cuda()
            layer_159_out = input_data['output_159'].cuda()
            #print(layer_137_out.shape)

            
            
            output_76, output_38, output_19 = TL_model(layer_137_out, layer_148_out, layer_159_out)
            output_file_name = 'F:/FlyAI/TL_output_data/' + str(image_pt_name) + '.pt'
            torch.save({'output_76': output_76, 'output_38': output_38, 'output_19': output_19}, output_file_name)
            
            target_76 = output_76.clone()
            target_38 = output_38.clone()
            target_19 = output_19.clone()
            #there is a possibility of more than one GT are mapped into same grid of a anchor, may need to check from training data?
            image_set = range(0, 1)
            target_76[:,:,4,:,:] = 0
            target_38[:,:,4,:,:] = 0
            target_19[:,:,4,:,:] = 0
            
            for image_no in image_set:
                image_no_current_batch = image_no % batch_size
                
                for item in All_image[image_pt_name]:
                    #print(item)
                    anchor_no = item[11] % 3
                    grid_x = item[12]
                    grid_y = item[13]

                    obj_class_no = item[1]
                    central_x = (item[2] + item[4]) / 2
                    central_y = (item[3] + item[5]) / 2
                    width_x = item[9]
                    height_y = item[10]
                    
                    position_x = item[14]
                    position_y = item[15]
                    w_expanded_time = item[16]
                    h_expanded_time = item[17]
                    
                    if item[11] < 3:
                        #target_76[anchor_no,image_no_current_batch,4,:,:] = 0
                        target_76[anchor_no,image_no_current_batch,0,grid_y,grid_x] = position_x
                        target_76[anchor_no,image_no_current_batch,1,grid_y,grid_x] = position_y
                        target_76[anchor_no,image_no_current_batch,2,grid_y,grid_x] = w_expanded_time
                        target_76[anchor_no,image_no_current_batch,3,grid_y,grid_x] = h_expanded_time
                        target_76[anchor_no,image_no_current_batch,4,grid_y,grid_x] = 1

                        target_76[anchor_no,image_no_current_batch,5:9,grid_y,grid_x] = 0
                        target_76[anchor_no,image_no_current_batch,5 + obj_class_no,grid_y,grid_x] = 1
                    elif item[11] < 6:
                        #target_38[anchor_no,image_no_current_batch,4,:,:] = 0
                        target_38[anchor_no,image_no_current_batch,0,grid_y,grid_x] = position_x
                        target_38[anchor_no,image_no_current_batch,1,grid_y,grid_x] = position_y
                        target_38[anchor_no,image_no_current_batch,2,grid_y,grid_x] = w_expanded_time
                        target_38[anchor_no,image_no_current_batch,3,grid_y,grid_x] = h_expanded_time
                        target_38[anchor_no,image_no_current_batch,4,grid_y,grid_x] = 1

                        target_38[anchor_no,image_no_current_batch,5:9,grid_y,grid_x] = 0
                        target_38[anchor_no,image_no_current_batch,5 + obj_class_no,grid_y,grid_x] = 1
                    elif item[11] < 9:
                        #target_19[anchor_no,image_no_current_batch,4,:,:] = 0
                        target_19[anchor_no,image_no_current_batch,0,grid_y,grid_x] = position_x
                        target_19[anchor_no,image_no_current_batch,1,grid_y,grid_x] = position_y
                        target_19[anchor_no,image_no_current_batch,2,grid_y,grid_x] = w_expanded_time
                        target_19[anchor_no,image_no_current_batch,3,grid_y,grid_x] = h_expanded_time
                        target_19[anchor_no,image_no_current_batch,4,grid_y,grid_x] = 1

                        target_19[anchor_no,image_no_current_batch,5:9,grid_y,grid_x] = 0
                        target_19[anchor_no,image_no_current_batch,5 + obj_class_no,grid_y,grid_x] = 1

            #global output_tensor
            #output_tensor = output
            #print(output_76.shape)
            #print(output_76)
            
            target_file_name = 'F:/FlyAI/TL_output_data/target_' + str(image_pt_name) + '.pt'
            torch.save({'target_76': target_76, 'target_38': target_38, 'target_19': target_19}, target_file_name)
            
            
            
            target_76 = Variable(target_76, requires_grad=False)
            target_38 = Variable(target_38, requires_grad=False)
            target_19 = Variable(target_19, requires_grad=False)
            
            #print(target_76[0])

            loss_w_h = loss_function_MSE(output_76[:, :, 2 : 4, :, :], target_76[:, :, 2 : 4, :, :])\
             + loss_function_MSE(output_38[:, :, 2 : 4, :, :], target_38[:, :, 2 : 4, :, :])\
             + loss_function_MSE(output_19[:, :, 2 : 4, :, :], target_19[:, :, 2 : 4, :, :])
            
            loss_w_h = loss_w_h / 2
            
            loss_class = loss_function_BCE(output_76[:, :, 5 : 9, :, :], target_76[:, :, 5 : 9, :, :])\
             + loss_function_BCE(output_38[:, :, 5 : 9, :, :], target_38[:, :, 5 : 9, :, :])\
             + loss_function_BCE(output_19[:, :, 5 : 9, :, :], target_19[:, :, 5 : 9, :, :])
            
            loss_obj_p = loss_function_BCE(output_76[:, :, 4, :, :], target_76[:, :, 4, :, :])\
             + loss_function_BCE(output_38[:, :, 4, :, :], target_38[:, :, 4, :, :])\
             + loss_function_BCE(output_19[:, :, 4, :, :], target_19[:, :, 4, :, :])
            
            loss_x_y = loss_function_BCE(output_76[:, :, 0 : 2, :, :], target_76[:, :, 0 : 2, :, :])\
             + loss_function_BCE(output_38[:, :, 0 : 2, :, :], target_38[:, :, 0 : 2, :, :])\
             + loss_function_BCE(output_19[:, :, 0 : 2, :, :], target_19[:, :, 0 : 2, :, :])
            
            loss = loss_w_h + loss_class + loss_obj_p + loss_x_y
            #loss = loss_function_MSE(output_76, target_76) + loss_function_MSE(output_38, target_38) + loss_function_MSE(output_19, target_19)
            print(loss_w_h)
            print(loss_class)
            print(loss_obj_p)
            print(loss_x_y)
            total_loss += loss
            loss.backward()
            
        #loss = loss_function(output_76, target_76)
        
        optimizer.step()
        if i % (steps_for_printing_out_loss) == 0:
            print('Loss (epoch: ' + str(i) + '): ' + str(total_loss.cpu().detach().numpy()))
    torch.save({'state_dict': TL_model.state_dict(),'optimizer': optimizer.state_dict()}, TL_model_file_path)

TL_model_training()

tensor(0.0007, device='cuda:0', grad_fn=<DivBackward0>)
tensor(2.0493, device='cuda:0', grad_fn=<AddBackward0>)
tensor(2.0861, device='cuda:0', grad_fn=<AddBackward0>)
tensor(2.0620, device='cuda:0', grad_fn=<AddBackward0>)
Loss (epoch: 1): 6.1981144
tensor(0.0007, device='cuda:0', grad_fn=<DivBackward0>)
tensor(2.0493, device='cuda:0', grad_fn=<AddBackward0>)
tensor(2.0827, device='cuda:0', grad_fn=<AddBackward0>)
tensor(2.0620, device='cuda:0', grad_fn=<AddBackward0>)
Loss (epoch: 2): 6.1947365
tensor(0.0007, device='cuda:0', grad_fn=<DivBackward0>)
tensor(2.0493, device='cuda:0', grad_fn=<AddBackward0>)
tensor(2.0819, device='cuda:0', grad_fn=<AddBackward0>)
tensor(2.0620, device='cuda:0', grad_fn=<AddBackward0>)
Loss (epoch: 3): 6.19388
tensor(0.0007, device='cuda:0', grad_fn=<DivBackward0>)
tensor(2.0493, device='cuda:0', grad_fn=<AddBackward0>)
tensor(2.0814, device='cuda:0', grad_fn=<AddBackward0>)
tensor(2.0620, device='cuda:0', grad_fn=<AddBackward0>)
Loss (epoch: 4): 6.193391

tensor(0.0007, device='cuda:0', grad_fn=<DivBackward0>)
tensor(2.0493, device='cuda:0', grad_fn=<AddBackward0>)
tensor(2.0796, device='cuda:0', grad_fn=<AddBackward0>)
tensor(2.0620, device='cuda:0', grad_fn=<AddBackward0>)
Loss (epoch: 34): 6.1915894
tensor(0.0007, device='cuda:0', grad_fn=<DivBackward0>)
tensor(2.0493, device='cuda:0', grad_fn=<AddBackward0>)
tensor(2.0796, device='cuda:0', grad_fn=<AddBackward0>)
tensor(2.0620, device='cuda:0', grad_fn=<AddBackward0>)
Loss (epoch: 35): 6.1915846
tensor(0.0007, device='cuda:0', grad_fn=<DivBackward0>)
tensor(2.0493, device='cuda:0', grad_fn=<AddBackward0>)
tensor(2.0796, device='cuda:0', grad_fn=<AddBackward0>)
tensor(2.0620, device='cuda:0', grad_fn=<AddBackward0>)
Loss (epoch: 36): 6.19158
tensor(0.0007, device='cuda:0', grad_fn=<DivBackward0>)
tensor(2.0493, device='cuda:0', grad_fn=<AddBackward0>)
tensor(2.0796, device='cuda:0', grad_fn=<AddBackward0>)
tensor(2.0620, device='cuda:0', grad_fn=<AddBackward0>)
Loss (epoch: 37): 6.19

tensor(0.0007, device='cuda:0', grad_fn=<DivBackward0>)
tensor(2.0493, device='cuda:0', grad_fn=<AddBackward0>)
tensor(2.0795, device='cuda:0', grad_fn=<AddBackward0>)
tensor(2.0620, device='cuda:0', grad_fn=<AddBackward0>)
Loss (epoch: 67): 6.1915097
tensor(0.0007, device='cuda:0', grad_fn=<DivBackward0>)
tensor(2.0493, device='cuda:0', grad_fn=<AddBackward0>)
tensor(2.0795, device='cuda:0', grad_fn=<AddBackward0>)
tensor(2.0620, device='cuda:0', grad_fn=<AddBackward0>)
Loss (epoch: 68): 6.191509
tensor(0.0007, device='cuda:0', grad_fn=<DivBackward0>)
tensor(2.0493, device='cuda:0', grad_fn=<AddBackward0>)
tensor(2.0795, device='cuda:0', grad_fn=<AddBackward0>)
tensor(2.0620, device='cuda:0', grad_fn=<AddBackward0>)
Loss (epoch: 69): 6.1915083
tensor(0.0007, device='cuda:0', grad_fn=<DivBackward0>)
tensor(2.0493, device='cuda:0', grad_fn=<AddBackward0>)
tensor(2.0795, device='cuda:0', grad_fn=<AddBackward0>)
tensor(2.0620, device='cuda:0', grad_fn=<AddBackward0>)
Loss (epoch: 70): 6.1

tensor(0.0006, device='cuda:0', grad_fn=<DivBackward0>)
tensor(2.0493, device='cuda:0', grad_fn=<AddBackward0>)
tensor(2.0795, device='cuda:0', grad_fn=<AddBackward0>)
tensor(2.0621, device='cuda:0', grad_fn=<AddBackward0>)
Loss (epoch: 100): 6.191493


In [None]:
image_pt_name = 0
target_file_name = 'F:/FlyAI/TL_output_data/' + str(image_pt_name) + '.pt'

target_data = torch.load(target_file_name)
target_76 = target_data['output_76']
target_38 = target_data['output_38']
target_19 = target_data['output_19']
print(target_38[:,:,5,:,:])

In [None]:
image_pt_name = 0
output_file_name = 'F:/FlyAI/TL_output_data/' + str(image_pt_name) + '.pt'

output_data = torch.load(output_file_name)
output_76 = output_data['output_76']
output_38 = output_data['output_38']
output_19 = output_data['output_19']
print(output_38[:,:,5,:,:])

In [27]:
image_pt_name = 0
target_file_name = 'F:/FlyAI/TL_output_data/target_' + str(image_pt_name) + '.pt'

target_data = torch.load(target_file_name)
target_76 = target_data['target_76'].cuda()
target_38 = target_data['target_38'].cuda()
target_19 = target_data['target_19'].cuda()

target_76 = torch.transpose(target_76, 2, 4)
target_38 = torch.transpose(target_38, 2, 4)
target_19 = torch.transpose(target_19, 2, 4)

target_76 = target_76.reshape(3 * 76**2, 9)
target_38 = target_38.reshape(3 * 38**2, 9)
target_19 = target_19.reshape(3 * 19**2, 9)

from numpy import savetxt
savetxt('data_t.csv', target_76.cpu().detach().numpy(), delimiter=',')

In [26]:
image_pt_name = 0
output_file_name = 'F:/FlyAI/TL_output_data/' + str(image_pt_name) + '.pt'

output_data = torch.load(output_file_name)
output_76 = output_data['output_76'].cuda()
output_38 = output_data['output_38'].cuda()
output_19 = output_data['output_19'].cuda()

output_76 = torch.transpose(output_76, 2, 4)
output_38 = torch.transpose(output_38, 2, 4)
output_19 = torch.transpose(output_19, 2, 4)

output_76 = output_76.reshape(3 * 76**2, 9)
output_38 = output_38.reshape(3 * 38**2, 9)
output_19 = output_19.reshape(3 * 19**2, 9)

from numpy import savetxt
savetxt('data.csv', output_76.cpu().detach().numpy(), delimiter=',')
#output_76 = torch.cat((output_76[0], output_76[1], output_76[2]), 0)
#output_38 = torch.cat((output_38[0], output_38[1], output_38[2]), 0)
#output_19 = torch.cat((output_19[0], output_19[1], output_19[2]), 0)


sum(x**2 for x in (19, 38, 76))

output = torch.cat((output_76, output_38, output_19), 0)
print(output.shape)
obj_threshold = 0.53
updated_outcome = []
for item in output:
    if item[4] >= obj_threshold:
        updated_outcome.append(item)

torch.Size([22743, 9])


In [None]:
len(updated_outcome)

In [None]:
def IoU(x_GT, y_GT, w_GT, h_GT, x_PD, y_PD, w_PD, h_PD):
    area_of_I = cross_length(x_GT, x_GT + w_GT, x_PD, x_PD + w_PD) * cross_length(y_GT, y_GT + h_GT, y_PD, y_PD + h_PD)
    area_of_U = h_GT * w_GT + h_PD * w_PD - area_of_I
    return area_of_I / area_of_U

#len(updated_outcome)
#updated_outcome = updated_outcome[0:9]
#print(updated_outcome.shape)
def sort_column(elem):
    return elem[4]
updated_outcome.sort(key = sort_column, reverse=True)

#updated_outcome = updated_outcome[updated_outcome[:,0].argsort()]

#print(updated_outcome)


#sort by obj possibility
IoU_threshold = 0.5
final_list = []
#print(updated_outcome)
#updated_outcome[0][3] = 6

for origin_item in updated_outcome:
    #print(final_list)
    if final_list == []:
        final_list.append(origin_item)
        continue
    indicator = 1
    for new_item in final_list:
        #print(IoU(new_item[0] - new_item[2]/2, new_item[1] - new_item[3]/2, new_item[2], new_item[3], origin_item[0] - origin_item[2]/2, origin_item[1] - origin_item[3]/2, origin_item[2], origin_item[3]))
        if IoU(new_item[0] - new_item[2]/2, new_item[1] - new_item[3]/2, new_item[2], new_item[3], origin_item[0] - origin_item[2]/2, origin_item[1] - origin_item[3]/2, origin_item[2], origin_item[3]) < IoU_threshold:
            indicator *= 1
        else:
            indicator *= 0
            break
    if indicator == 1:
        final_list.append(origin_item)

final_list


In [None]:
len(final_list)

In [None]:
#loss function - Done

"""
- objective possibility: except GT grids are 1, all others are 0
- class: 0, 1 only for GT grids, rest should be ignore (equal to predicted result)
- x, y, w, h only for GT grids, rest should be ignore (equal to predicted result)


"""
#grab yolov4 weight into TL - Done
#convert training data into 60GB mid input data
##########NMS (possibility threshold, IOU threshold, max 80)

In [None]:
#block 1 layer's parameter



#convert result into FlyAI format
#change back to normal pic size

In [None]:
layer_details[160]

In [None]:
#all_layers
n = [76, 38, 19]
m = [608 / i for i in n for m in range(6)] 
print(m)


anchor = [12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401]
[anchor[i] / m[i] for i in range(len(m))]

In [None]:
class YOLO_v4_model(nn.Module):
    def __init__(self):
        super().__init__()
        self.YOLO_v4_layers = nn.ModuleList()
        
            self.YOLO_v4_layers.append(Conv_Layer_box(in_channel[i], out_channel[i], kernel_size= kernel_size[i], stride = stride[i], activation_func = activation_func[i], batch_normalization = batch_normalization[i]))
    

In [None]:
class abc():
    def __init__(self, qwe, out):
        print(qwe)
        
abc(4,5)

In [None]:
abc = "1,2,3"
m = abc.split(",")
m
abc = "1"
m = abc.split(",")
m
k = 4

HX = []

for r in k:
    print(r)
    print(k[r])

In [None]:
weightfile = "D:/Installation/yolov4.weights"
fp = open(weightfile, 'rb')
header = np.fromfile(fp, count=5, dtype=np.int32)
header = torch.from_numpy(header)
seen = header[3]
buf = np.fromfile(fp, dtype=np.float32)
fp.close()

start = 0
ind = -2
buf


In [None]:
HX_weight = YOLO_v4_Module_WIP.state_dict()
i = 0
HX = []

for kk in HX_weight:
    i += 1
    print(kk)
    print(HX_weight[kk].size())
    HX.append(HX_weight[kk].size())
print(i)

In [None]:
d = torch.load("D:/Installation/yolov4.pt")
#d

In [None]:
mm = d['model']
weight_bank = []
i = 0
yolo_v4_size = []
for kk in mm:
    i += 1
    print(kk)
    print(mm[kk].size())
    weight_bank.append(mm[kk])
    yolo_v4_size.append(mm[kk].size())
print(i)

In [None]:
len(weight_bank)
#update weight into model

i = 0
HX = []

for kk in YOLO_v4_Module_WIP.state_dict():
    print(kk)
    YOLO_v4_Module_WIP.state_dict()[kk] = weight_bank[i]
    i += 1
    #HX.append(HX_weight[kk].size())
print(i)

In [None]:
for i in range(len(yolo_v4_size)):
    #print(yolo_v4_size[i] == HX[i])
    if (yolo_v4_size[i] == HX[i]) == False:
        print(yolo_v4_size[i])
        print(HX[i])
        print(i)
    print(yolo_v4_size[i] == HX[i])



In [None]:
#YOLO_v4_Module_WIP.state_dict()
torch.save({'state_dict': YOLO_v4_Module_WIP.state_dict(),'optimizer': optimizer.state_dict()}, model_file_path)

In [None]:
#TL parameter:
d = torch.load("D:/Installation/yolov4.pt")
mm = d['model']
weight_bank = []
i = 0
yolo_v4_size = []
for kk in mm:
    i += 1
    print(kk)
    print(mm[kk].size())
    weight_bank.append(mm[kk])
    yolo_v4_size.append(mm[kk].size())
print(i)



In [None]:
#TL parameter:
d = torch.load(TL_model_file_path)
mm = d['state_dict']
weight_bank = []
i = 0
yolo_v4_size = []
for kk in mm:
    i += 1
    print(kk)
    print(mm[kk].size())
    weight_bank.append(mm[kk])
    yolo_v4_size.append(mm[kk].size())
print(i)




In [None]:
#len(weight_bank)
#update weight into model
TL_model_weight = torch.load(TL_model_file_path)
YOLO_v4_weight = torch.load("D:/Installation/yolov4.pt")['model']
i = 0
HX = []

TL_model_weight['state_dict']['Conv_Layer_76_a.conv_box.0.weight'] = YOLO_v4_weight['module_list.138.Conv2d.weight']
TL_model_weight['state_dict']['Conv_Layer_76_a.conv_box.0.bias'] = YOLO_v4_weight['module_list.138.Conv2d.bias']
TL_model_weight['state_dict']['Conv_Layer_38_a.conv_box.0.weight'] = YOLO_v4_weight['module_list.149.Conv2d.weight']
TL_model_weight['state_dict']['Conv_Layer_38_a.conv_box.0.bias'] = YOLO_v4_weight['module_list.149.Conv2d.bias']
TL_model_weight['state_dict']['Conv_Layer_19_a.conv_box.0.weight'] = YOLO_v4_weight['module_list.160.Conv2d.weight']
TL_model_weight['state_dict']['Conv_Layer_19_a.conv_box.0.bias'] = YOLO_v4_weight['module_list.160.Conv2d.bias']
torch.save(TL_model_weight, 'Model/TL_model_starting_point.pt')

In [None]:
mm = TL_model_weight['state_dict']
weight_bank = []
i = 0
yolo_v4_size = []
for kk in mm:
    i += 1
    print(kk)
    print(mm[kk].size())
    weight_bank.append(mm[kk])
    yolo_v4_size.append(mm[kk].size())
print(i)


In [None]:
for i in range(4):
    if i == 2:
        continue
    print(i)