In [None]:
#Mount the google drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#Import Necessary Libraries
import pandas as pd #data frame operations
import os #initializing the paths

In [None]:
#Read csv files containing
os.chdir('/content/drive/My Drive/YOLOv4/Dataset_and_Annotations/csv_folder') #change the path
classes_data = pd.read_csv('class-descriptions-boxable.csv', header=None) #reads the file
classes_data.head() #shows subset of the data

Unnamed: 0,0,1
0,/m/011k07,Tortoise
1,/m/011q46kg,Container
2,/m/012074,Magpie
3,/m/0120dh,Sea turtle
4,/m/01226z,Football


In [None]:
#Define required classes
classes=['Cat', 'Dog']

#Get the strings corresponding to the class name and store it insode a list called as class_strings
class_strings = []  #storing the unique strings in this class
for i in classes:
  #Extract the first column's elements for matching classes Airplane and Parachute
  req_classes = classes_data.loc[classes_data[1] == i]  #.loc to match class name to classes_data
  string = req_classes.iloc[0][0] #extract first column value
  print(string)
  #Append the strings corresponding to the class name into the list
  class_strings.append(string)

print(class_strings)

/m/01yrx
/m/0bt9lr
['/m/01yrx', '/m/0bt9lr']


In [None]:
#Get only required columns from annotations csv file and get rid of NOT required columns
annotation_data = pd.read_csv('train-annotations-bbox.csv',
                              usecols=['ImageID', 
                                       'LabelName',
                                       'XMin',
                                       'XMax',
                                       'YMin',
                                       'YMax'])
annotation_data.head()

Unnamed: 0,ImageID,LabelName,XMin,XMax,YMin,YMax
0,000002b66c9c498e,/m/01g317,0.0125,0.195312,0.148438,0.5875
1,000002b66c9c498e,/m/01g317,0.025,0.276563,0.714063,0.948438
2,000002b66c9c498e,/m/01g317,0.151562,0.310937,0.198437,0.590625
3,000002b66c9c498e,/m/01g317,0.25625,0.429688,0.651563,0.925
4,000002b66c9c498e,/m/01g317,0.257812,0.346875,0.235938,0.385938


In [None]:
#Get only records with matching classes (LabelName = class_strings)
filtered_class_data = annotation_data.loc[annotation_data['LabelName'].isin(class_strings)].copy()
filtered_class_data.head()

Unnamed: 0,ImageID,LabelName,XMin,XMax,YMin,YMax
1151,0000b9fcba019d36,/m/0bt9lr,0.165,0.90375,0.268333,0.998333
1242,0000cb13febe0138,/m/0bt9lr,0.0,0.651875,0.0,0.999062
1709,0000de486dc6c49f,/m/01yrx,0.019608,0.998366,0.02451,0.977124
3730,000256419f7c57d8,/m/0bt9lr,0.190625,0.30125,0.53,0.65
3731,000256419f7c57d8,/m/0bt9lr,0.45,0.5625,0.485833,0.598333


In [None]:
#Add new columns that are required for YOLO format
filtered_class_data['classNumber'] = ''
filtered_class_data['center x'] = ''
filtered_class_data['center y'] = ''
filtered_class_data['width'] = ''
filtered_class_data['height'] = ''


In [None]:
#Iterate through all the class strings and assign a class number according to the order they are appearing in the list
#For e.g. 0 to Airpane and 1 to Parachute
for i in range(len(class_strings)):
  #Store the result of this operation into classNumber
  filtered_class_data.loc[filtered_class_data['LabelName']==class_strings[i], 'classNumber']=i

#Calculate center x and center y values
filtered_class_data['center x'] = (filtered_class_data['XMax'] + filtered_class_data['XMin'])/2
filtered_class_data['center y'] = (filtered_class_data['YMax'] + filtered_class_data['YMin'])/2

#Calculate width and height values
filtered_class_data['width'] = filtered_class_data['XMax'] - filtered_class_data['XMin']
filtered_class_data['height'] = filtered_class_data['YMax'] - filtered_class_data['YMin']

#Generate the dataframe with YOLO required values
YOLO_values = filtered_class_data.loc[:, ['ImageID', 'classNumber', 'center x', 'center y', 'width', 'height']].copy()
YOLO_values.head()

Unnamed: 0,ImageID,classNumber,center x,center y,width,height
1151,0000b9fcba019d36,1,0.534375,0.633333,0.73875,0.73
1242,0000cb13febe0138,1,0.325937,0.499531,0.651875,0.999062
1709,0000de486dc6c49f,0,0.508987,0.500817,0.978758,0.952614
3730,000256419f7c57d8,1,0.245938,0.59,0.110625,0.12
3731,000256419f7c57d8,1,0.50625,0.542083,0.1125,0.1125


In [None]:
#Change the current directory to the one where we have Images

image_path = '/content/drive/My Drive/YOLOv4/Dataset_and_Annotations/Dataset/train/Cat_Dog'
os.chdir(image_path)

#Go through all the image files in the directory
#Full stop in os.walk('.') means the current directory
for current_dir, dirs, files in os.walk('.'):
  #Iterating though all the files
  for f in files:
    #Check if the file extension ends with '.jpg'
    if f.endswith('.jpg'):
      #Extract only the title of the image file so that corrsponding txt file can be generated with the same name
      #as per YOLO guidelines
      image_title = f[:-4]
      #Get matched data
      YOLO_file = YOLO_values.loc[YOLO_values['ImageID']==image_title]

      #Create a copy of the above dataframe to avoid any mess
      df=YOLO_file.loc[:, ['classNumber', 'center x', 'center y', 'width', 'height']].copy()

      save_path = image_path + '/'+ image_title + '.txt'

      #Generate a text file containing required data in YOLO format
      df.to_csv(save_path, header=False, index=False, sep=' ')