In [None]:
import pandas as pd
import cv2 as cv
import numpy as np
import os, glob

In [None]:
import re
def sorted_alphanumeric(data):
    convert = lambda text: int(text) if text.isdigit() else text.lower()
    alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] 
    return sorted(data, key=alphanum_key)

In [None]:
# Automatically label and generate the txt files of each image for use in Yolo format from the given pascalvoc csv
df = pd.read_csv('train_boundingboxes.csv',sep=', ')

imglist = sorted_alphanumeric(os.listdir('Train'))
widthList = []
heightList = []
for img in imglist:
    image = cv.imread('Train/'+img)
    height, width, channels = image.shape
    widthList.insert(len(widthList),width)
    heightList.insert(len(heightList),height)
    
def findX(xmin,xmax,width):
    return (xmin + xmax)/ 2.0 / width

def findY(ymin,ymax,height):
    return (ymin + ymax)/ 2.0 / height

def findW(xmin,xmax,width):
    return abs(xmin - xmax) / width

def findH(ymin,ymax,height):
    return abs(ymin - ymax) / height

df['pwidth'] = widthList
df['pheight'] = heightList
df['x_centre'] = df.apply(lambda x: findX(x['X_min'],x['X_max'],x['pwidth']),axis=1)
df['y_centre'] = df.apply(lambda x: findY(x['Y_min'],x['Y_max'],x['pheight']),axis=1)
df['width'] = df.apply(lambda x: findW(x['X_min'],x['X_max'],x['pwidth']),axis=1)
df['height'] = df.apply(lambda x: findH(x['Y_min'],x['Y_max'],x['pheight']),axis=1)
df['Class'] = 0

dfresult = df[['Class','x_centre','y_centre','width','height']]
dfresult = dfresult.round(6)
dfresult = dfresult.astype(str)

i=0
for row in dfresult.values:
    filename = 'data/obj/{}.txt'.format(i)
    row.tofile(filename, sep=" ", format="%s")
    i+=1
    
path = 'data/obj/'
imgList = sorted_alphanumeric(os.listdir('Train'))
textFile = open('data/train.txt','w')

for img in imgList:
    imgPath = path+img+'.jpg'+'\n'
    textFile.write(imgPath)

testFile = open('data/test.txt','w')
imgList = sorted_alphanumeric(os.listdir('Train'))
path = 'data/Test/' 
i = 1000
for img in imgList:
    imgPath = path+str(i)+'.jpg'+'\n'
    i+=1
    testFile.write(imgPath)

In [None]:
!cp -R data/obj ../darknet/data
!cp -R Test ../darknet/data
!cp yolov4-obj.cfg ../darknet/cfg
!cp data/train.txt ../darknet/data
!cp data/test.txt ../darknet/data
!cp data/obj.data ../darknet/data
!cp data/obj.names ../darknet/data

In [None]:
!cd darknet
!wget https://drive.google.com/open?id=1JKF-bdIklxOOVy-2Cr5qdvjgGpmGfcbp
# Wait till the download is done for the weights

In [None]:
!./darknet detector train data/obj.data cfg/yolov4-obj.cfg yolov4.conv.137 -dont_show -mjpeg_port 8040 -map > output.txt
#Start the training here, just incase something happens we can continue the training using the command below
# !./darknet detector train data/obj.data cfg/yolov4-obj.cfg backup/yolov4-obj_last.weights > output.txt

In [None]:
!./darknet detector test data/obj.data cfg/yolov4-obj.cfg backup/yolov4-obj_best.weights -thresh 0.4 -ext_output -dont_show -out result.json < data/test2.txt > result.txt
#Generate the json file for conversion, then we convert it csv as yolo will not convert to csv automatically
# https://www.convertcsv.com/json-to-csv.htm
!cp result.json ../kaggle
!cd ../kaggle

In [None]:
#since the json file that is generated takes too long to parse, we call an external api at convertcsv.com to generate it for us instead
# in order for this to work we need to insert an authentication token
!curl -X POST "https://www.convertcsv.io/api/v1/json2csv" -H "Authorization: Token [YOUR TOKEN]" -F "infile=@result.json" -o convertcsv.csv

In [None]:
# you can either use this if you have result.txt or the next code snippet to convert using pandas
myFiles = glob.glob('*.txt')

width=1024
height=1024
image_id=0
final_df=[]
for item in myFiles:
    row=[]
    bbox_temp=[]
    with open(item, 'rt') as fd:
        first_line = fd.readline()
        splited = first_line.split();
        
        row.append(image_id)
        row.append(width)
        row.append(height)
        try:
            bbox_temp.append(float(splited[1])*width)
            bbox_temp.append(float(splited[2])*height)
            bbox_temp.append(float(splited[3])*width)
            bbox_temp.append(float(splited[4])*height)
            row.append(bbox_temp)
            final_df.append(row)
        except:
            print("file is not in YOLO format!")
df = pd.DataFrame(final_df,columns=['image_id', 'width', 'height','bbox'])
df.to_csv("convertcsv.csv",index=False)

In [None]:
#using pandas to convert the csv for and formatting for submission
import json
with open('result.json', encoding='utf-8') as inputfile:
    df = pd.read_json(inputfile)

df.to_csv('csvfile.csv', encoding='utf-8', index=False)

with open('result.json') as file:
    data = json.load(file)
    
def column(matrix, i):
    return [row[i] for row in matrix]

df = pd.DataFrame(data)
df = df.join(pd.json_normalize(pd.json_normalize(df['objects'])[0]))
df2 = df[['frame_id','filename']]
df2['objects/0/class_id'] = df['class_id']
df2['objects/0/relative_coordinates/center_x'] = df['relative_coordinates.center_x']
df2['objects/0/relative_coordinates/center_y'] = df['relative_coordinates.center_y']
df2['objects/0/relative_coordinates/width'] = df['relative_coordinates.width']
df2['objects/0/relative_coordinates/height'] = df['relative_coordinates.height']
df2['objects/0/relative_coordinates/confidence'] = df['confidence']
df2.to_csv('convertcsv.csv', encoding='utf-8', index=False)

In [None]:
#https://github.com/tensorturtle/rebox
from rebox import BBox
from rebox.formats import yolo,pascal
def yolo_to_pascal_voc(x_center, y_center, w, h,  image_w, image_h):
    w = w * image_w
    h = h * image_h
    x1 = ((2 * x_center * image_w) - w)/2
    y1 = ((2 * y_center * image_h) - h)/2
    x2 = x1 + w
    y2 = y1 + h
    return [x1, y1, x2, y2]

In [None]:
#formatting from yolo to pascalvoc for submission
imglist = sorted_alphanumeric(os.listdir('Test'))
widthList = []
heightList = []
for img in imglist:
    image = cv.imread('Test/'+img)
    height, width, channels = image.shape
    widthList.insert(len(widthList),width)
    heightList.insert(len(heightList),height)
    
cleanup = pd.read_csv('convertcsv.csv')

cleanedfilename = []

for i in cleanup['filename']:
    cleanedfilename.insert(len(cleanedfilename),i.split('/')[2])
cleanup['filename'] = cleanedfilename

x_min = []
y_min = []
x_max = []
y_max = []

i = 0
for index, row in cleanup.iterrows():
    if (~pd.isnull(row['objects/0/relative_coordinates/width'])):
        # pascalb = yolo_to_pascal_voc(row['objects/0/relative_coordinates/center_x'],row['objects/0/relative_coordinates/center_y'],row['objects/0/relative_coordinates/width'],row['objects/0/relative_coordinates/height'],
        #                              widthList[i],heightList[i])
        # x_min.insert(len(x_min),pascalb[0])
        # y_min.insert(len(y_min),pascalb[1])
        # x_max.insert(len(x_max),pascalb[2])
        # y_max.insert(len(y_max),pascalb[3])
        
        yolob = BBox([row['objects/0/relative_coordinates/center_x'],row['objects/0/relative_coordinates/center_y'],row['objects/0/relative_coordinates/width'],row['objects/0/relative_coordinates/height']],yolo)
        pascalb = yolob.as_format(pascal, widthList[i], heightList[i])
        x_min.insert(len(x_min),pascalb.x1)
        y_min.insert(len(y_min),pascalb.y1)
        x_max.insert(len(x_max),pascalb.x2)
        y_max.insert(len(y_max),pascalb.y2)
    else:    
        x_min.insert(len(x_min),0)
        y_min.insert(len(y_min),0)
        x_max.insert(len(x_max),0)
        y_max.insert(len(y_max),0)
    i+=1

result = pd.DataFrame()
result[' X_min'] = x_min
result[' Y_min'] = y_min
result[' X_max'] = x_max
result[' Y_max'] = y_max
result[result < 0] = 0
result = result.replace(r'nan', np.nan, regex=True)
result = result.fillna(0)

result[' X_min'].astype(int)
result[' Y_min'].astype(int)
result[' X_max'].astype(int)
result[' Y_max'].astype(int)
result['ImageId'] = cleanup['filename']
result = result[['ImageId',' X_min',' Y_min',' X_max',' Y_max']]
result.to_csv('EE4211_group1.csv',index=False)