## Configuration 


In [3]:
IMAGE_LABEL_DIR = '../data/processed_data/images_labelled/'

DARKNET_DOWNLOAD_DIR = '../Frameworks/'

## Config in Darknet to use
CONFIG_TO_USE_FILE_PATH = '../config/darknet_cfgs_convs_weights/downloaded/YOLOv3-608.cfg'
WEIGHTS_TO_LOAD_FILE_PATH = '../config/darknet_cfgs_convs_weights/downloaded/YOLOv3-608.weights'
THRESHOLD_TO_USE = 0.25
## 
TEST_IMAGE_PATH = '../data/processed_data/images_labelled/172_0_2834.jpeg' 
# data/dog.jpg'


## Required Training Files 
DOT_DATA_FILE_PATH = '../data/processed_data/nuggets.data'
DOT_NAMES_FILE_PATH = '../data/processed_data/nuggets.names' 
TRAIN_IMAGES_PATH_FILE_PATH = '../data/processed_data/train.txt'
TEST_IMAGES_PATH_FILE_PATH = '../data/processed_data/test.txt'
BACKUP_DIR = '../model_backup/'
TRAIN_SIZE = 0.8
CONVOLUTION_LAYER_FILE_PATH = '../config/darknet_cfgs_convs_weights/downloaded/darknet53.conv.74'

## TRAINING REQUIREMENT
'''
    - CONFIG_TO_USE_FILE_PATH
    - CONVOLUTION_LAYER_FILE_PATH
    - TRAIN_SIZE
    - DOT_DATA_FILE_PATH
    - 
    - 
'''
## TESTING REQUIREMENT
'''
    - CONFIG_TO_USE_FILE_PATH
    - DOT_DATA_FILE_PATH
    - 
    - WEIGHTS_TO_LOAD_FILE_PATH
    - THRESHOLD_TO_USE

'''
print('Value Set')


Value Set


### Download Weights & Conv

In [None]:
# cmd.add('Download - yolo v3 weights', '!wget https://pjreddie.com/media/files/yolov3.weights')
# cmd.add('Download - yolo v3 conv', '!wget https://pjreddie.com/media/files/darknet53.conv.74')

## '.data', '.names', 'train.txt', & 'test.txt' file creation

In [None]:

import os, glob
from sklearn.model_selection import train_test_split

def get_img_lab_files_paths(config_dict):
    '''
    get the image and label files that are present in the provided directory
    '''
    training_data_dir = config_dict['paths']['input']['training_data_dir']
    files = glob.glob(training_data_dir+'*')
    img_files_path = [ e for e in files if e.split('.')[-1].lower() in ['jpg', 'jpeg', 'png'] ]
    lab_files_path = [ e for e in files if e.split('.')[-1].lower() in ['txt'] ]
    
    ## Converting JPEG to jpg
    ## Renaming
    [ os.rename(pt, pt.replace('.jpeg','.jpg')) for pt in img_files_path if os.path.exists(pt.replace('.jpeg','.jpg')) is False ]
    [ os.rename(pt, pt.replace('.JPEG','.jpg')) for pt in img_files_path if os.path.exists(pt.replace('.jpeg','.jpg')) is False ]
    
    return img_files_path, lab_files_path


def write_txt_having_paths_to_train_test_images(config_dict):
    '''
    Writing the training and test txt files contatining the paths to the images
    '''
    train_txt_path = config_dict['paths']['output']['txt_having_paths_to_train_images']
    test_txt_path = config_dict['paths']['output']['txt_having_paths_to_test_images']
    train_size = float(config_dict['train_size'])
    image_files_path, _ = get_img_lab_files_paths(config_dict)
    image_files_path = [ os.path.abspath(e) for e in image_files_path ]
    train_li, test_li = train_test_split(image_files_path, train_size= train_size, shuffle=True)
    
    for path, content in [(train_txt_path, train_li), (test_txt_path, test_li)]:
        with open(path, 'w+') as file:
            file.write('\n'.join(content))
            print('[File Written] "{}" has been written at :\n\t{}'.format(
                path.split('/')[-1], path))    
    

def write_data_and_names_files(config_dict):
    ''' 
     create this file:
                      .data
                      .names      
    '''
    training_data_dir = config_dict['paths']['input']['training_data_dir']
    name_file_path = config_dict['paths']['output']['.name']
    data_file_path = config_dict['paths']['output']['.data']
    train_txt_path = config_dict['paths']['output']['txt_having_paths_to_train_images']
    test_txt_path = config_dict['paths']['output']['txt_having_paths_to_test_images']
    backup_dir = config_dict['paths']['output']['backup_directory']
    cls_names = name_file_path
    _, lab_files_path= get_img_lab_files_paths(config_dict)
    
    ## Get labels name
    f = glob.glob(training_data_dir+'*.txt')
    category_names_file_path = [  e for e in f if 'classes.txt' in e ][0]
    with open(category_names_file_path) as file:
            label_name = [ l for l in file.read().split('\n') if len(l)>0 ]
    
    ## Get labels id
    for i in range(len(lab_files_path)):
        with open(lab_files_path[i]) as file:
            txt = file.read()
        txt_line_li = [ t for t in txt.split('\n') if len(t.split()) == 5 ]
    
        ## section/elements in image
        all_label_id = []
        for sec in txt_line_li:
            label_id, rel_x, rel_y, rel_width, rel_height = ( float(e) for e in sec.split() )
            label_id = str(int(label_id))
            if label_id not in all_label_id: all_label_id.append(label_id)

    ## check
    if len(label_name) != len(all_label_id):
        print('Length of Label Name ({}) is NOT EQUAL to label_id({})'.format(len(label_id), len(label_name)))
        raise Exception('There\'s some issue in total label ids which are present')

    ## Generating Label ID Mapping
    label_mapping_dict = { i:label_name[i] for i in range(len(label_name)) }
    print('Label Mapping:', label_mapping_dict)

    ## Generating '.names' file
    with open(name_file_path, 'w+') as file:
        file.write('\n'.join(label_name))
        print('[File Written] "{}" has been written at :\n\t{}'.format(
            name_file_path.split('/')[-1], name_file_path))

    ## Generating '.data' file
    n_cls = len(all_label_id)
    if os.path.exists(backup_dir) is False: os.mkdir(backup_dir)

    txt_for_data = f'classes = {n_cls}\ntrain =../{train_txt_path}\nvalid =../{test_txt_path}"\
    "\nnames =../{cls_names}\nbackup =../{backup_dir}'
    
    with open(data_file_path, 'w+') as file:
        file.write(txt_for_data)
        print('[File Written] "{}" has been written at :\n\t{}'.format(
            data_file_path.split('/')[-1], data_file_path))

## Creating Required File

def main():
    '''
    config_dict = {
        'paths': {
            'input': {
                'training_data_dir': '../data/processed_data/images_labelled/',
            },
            'output': {
                '.name': '/home/mohit/Documents/CargillWorkspace/ChickenNuggetProblem/ChickenNugget_CV/Frameworks/abc.names',
                '.data': '/home/mohit/Documents/CargillWorkspace/ChickenNuggetProblem/ChickenNugget_CV/Frameworks/abc.data',
                'txt_having_paths_to_train_images': '/home/mohit/Documents/CargillWorkspace/ChickenNuggetProblem/ChickenNugget_CV/Frameworks/settings/darknet/training_data/train.txt',
                'txt_having_paths_to_test_images': '/home/mohit/Documents/CargillWorkspace/ChickenNuggetProblem/ChickenNugget_CV/Frameworks/settings/darknet/training_data/2007_test.txt',
                'backup_directory': '/home/mohit/Documents/CargillWorkspace/ChickenNuggetProblem/ChickenNugget_CV/Frameworks/backup',
            },
        },
        'train_size': '0.8'
    }
    '''
    config_dict = {
        'paths': {
            'input': {
                'training_data_dir': IMAGE_LABEL_DIR,
            },
            'output': {
                '.name': DOT_NAMES_FILE_PATH,
                '.data': DOT_DATA_FILE_PATH,
                'txt_having_paths_to_train_images': TRAIN_IMAGES_PATH_FILE_PATH,
                'txt_having_paths_to_test_images': TEST_IMAGES_PATH_FILE_PATH,
                'backup_directory': BACKUP_DIR,
            },
        },
        'train_size': TRAIN_SIZE
    }
    ## Generating the train and test txt files
    write_txt_having_paths_to_train_test_images(config_dict)
    
    ## Generating '.data' & '.name' files
    write_data_and_names_files(config_dict)


main()

## Clone The Repo

In [1]:
class CatalogCmdOfDarknet:
    def __init__(self):
        self.msg_and_cmd_li = []
    def add(self, msg, cmd):
        self.msg_and_cmd_li.append((msg,cmd))
        print(msg, '\n\t',cmd)

cmd = CatalogCmdOfDarknet()

In [5]:
cmd.add(
    'Get Darknet Repository', 
    f'!cd {DARKNET_DOWNLOAD_DIR} && git clone https://github.com/AlexeyAB/darknet alexey_darknet'
)

cmd.add(
    'Current Directory', 
    '!pwd'
)

cmd.add(
    'Files In Current Directory', 
    '!ls -a'
)

cmd.add(
    'Files In Framework Directory', 
    f'!cd {DARKNET_DOWNLOAD_DIR}darknet/ && ls -a'
)


Get Darknet Repository 
	 !cd ../Frameworks/ && git clone https://github.com/AlexeyAB/darknet alexey_darknet
Current Directory 
	 !pwd
Files In Current Directory 
	 !ls -a
Files In Framework Directory 
	 !cd ../Frameworks/darknet/ && ls -a


In [None]:
!cd ../Frameworks/ && git clone https://github.com/AlexeyAB/darknet alexey_darknet

In [None]:
!pwd

In [None]:
!cd ../Frameworks/darknet/ && ls -a

## Edit The Configuration for Compilation

In [None]:
cmd.add(
    'Compile Darknet - Configuration - Load MakeFile', 
    f'%load {DARKNET_DOWNLOAD_DIR}darknet/Makefile'
)

cmd.add(
    'Compile Darknet - Configuration - Save MakeFile', 
    f'%%writefile {DARKNET_DOWNLOAD_DIR}darknet/Makefile'
)


## Compiling 

In [None]:
cmd.add(
    'Compile Darknet', 
    f'!cd {DARKNET_DOWNLOAD_DIR}darknet && make'
)

In [None]:
!cd ../Frameworks/darknet && make

## Checking Successful Build

In [None]:
cmd.add(
    'Check Sucessful Build', 
    f'!cd {DARKNET_DOWNLOAD_DIR}darknet && ./darknet'
)

In [None]:
!cd ../Frameworks/darknet && ./darknet

## Get Weight Config

### Get Location of Configs and Weights 

In [None]:
cmd.add('Directories inside framework/settings/darknet', '!ls ../config/darknet_cfgs_convs_weights/downloaded/')
# cmd.add('Directories inside framework/settings/darknet', '!ls -lsh settings/darknet/configs/')
# cmd.add('Directories inside framework/settings/darknet', '!ls -lsh settings/darknet/weights/')

In [None]:
!ls ../config/darknet_cfgs_convs_weights/downloaded/

## Detect

In [None]:
## Detection Using A Pre-Trained Model


cmd.add(
    'Testing Model - Single Image - Way1',
    f'!cd {DARKNET_DOWNLOAD_DIR}darknet && ./darknet detect ../{CONFIG_TO_USE_FILE_PATH} ../{WEIGHTS_TO_LOAD_FILE_PATH} ../{TEST_IMAGE_PATH} -thresh {THRESHOLD_TO_USE}'
)


# cfg/obj.data cfg/yolov3-voc.cfg backup/yolov3-voc_last.weights data/bc.JPG
cmd.add(
    'Testing Model - Single Image - Way2',
    f'!cd {DARKNET_DOWNLOAD_DIR}darknet && ./darknet detector test ../{DOT_DATA_FILE_PATH} ../{CONFIG_TO_USE_FILE_PATH} ../{WEIGHTS_TO_LOAD_FILE_PATH} ../{TEST_IMAGE_PATH} -thresh {THRESHOLD_TO_USE}'
)


In [None]:
!cd ../Frameworks/darknet && ./darknet detect ../../config/darknet_cfgs_convs_weights/downloaded/YOLOv3-608.cfg ../../config/darknet_cfgs_convs_weights/downloaded/YOLOv3-608.weights ../../data/processed_data/images_labelled/172_0_2834.jpeg -thresh 0.25

In [None]:
!cd ../Frameworks/darknet && ./darknet detector test ../../data/processed_data/nuggets.data ../../config/darknet_cfgs_convs_weights/downloaded/YOLOv3-608.cfg ../../config/darknet_cfgs_convs_weights/downloaded/YOLOv3-608.weights ../../data/processed_data/images_labelled/172_0_2834.jpeg -thresh 0.25

In [None]:
## Testing on Multiple Images

cmd.add(
    'Testing Model - Multiple Image',
    f'!cd {DARKNET_DOWNLOAD_DIR}darknet && ./darknet detect ../{CONFIG_TO_USE_FILE_PATH} ../{WEIGHTS_TO_LOAD_FILE_PATH} -thresh {THRESHOLD_TO_USE}'
)
# Image path in console

## Real Time Detection on Webcam
''' 
-c <num> to pick (OpenCV uses webcam 0 by default).
'''
cmd.add(
    'Testing Model - OpenCV - using default camera',
    f'!cd {DARKNET_DOWNLOAD_DIR}darknet && ./darknet detector demo ../{DOT_DATA_FILE_PATH} ../{CONFIG_TO_USE_FILE_PATH} ../{WEIGHTS_TO_LOAD_FILE_PATH} -c 0'
)

cmd.add(
    'Testing Model - OpenCV - Video File',
    f'!cd {DARKNET_DOWNLOAD_DIR}darknet && ./darknet detector demo ../{DOT_DATA_FILE_PATH} ../{CONFIG_TO_USE_FILE_PATH} ../{WEIGHTS_TO_LOAD_FILE_PATH} <video file>'
)


In [None]:
!cd ../Frameworks/darknet && ./darknet detect ../../config/darknet_cfgs_convs_weights/downloaded/YOLOv3-608.cfg ../../config/darknet_cfgs_convs_weights/downloaded/YOLOv3-608.weights -thresh 0.25

## Training

In [None]:
# !nohup ./darknet detector train cfg/obj.data cfg/yolov3-voc.cfg darknet53.conv.74>training_yolov3_voc_defect_1.txt

cmd.add(
    'Training the Model',
    f'!cd {DARKNET_DOWNLOAD_DIR}darknet && ./darknet detector train ../{DOT_DATA_FILE_PATH} ../{CONFIG_TO_USE_FILE_PATH} ../{CONVOLUTION_LAYER_FILE_PATH}'
)

## multiple gpus run:
cmd.add(
    'Training the Model',
    f'!cd {DARKNET_DOWNLOAD_DIR}darknet && ./darknet detector train ../{DOT_DATA_FILE_PATH} ../{CONFIG_TO_USE_FILE_PATH} ../{CONVOLUTION_LAYER_FILE_PATH} -gpus 0,1,2,3'
)

## stop and restart training from a checkpoint
cmd.add(
    'Training the Model',
    f'!cd {DARKNET_DOWNLOAD_DIR}darknet && ./darknet detector train ../{DOT_DATA_FILE_PATH} ../{CONFIG_TO_USE_FILE_PATH} <backup> -gpus 0,1,2,3'
)

# ./darknet detector train cfg/coco.data cfg/yolov3.cfg backup/yolov3.backup -gpus 0,1,2,3

In [None]:
!cd ../Frameworks/darknet && ./darknet detector train ../../data/processed_data/nuggets.data ../../config/darknet_cfgs_convs_weights/downloaded/YOLOv3-608.cfg ../../config/darknet_cfgs_convs_weights/downloaded/darknet53.conv.74

## Training YOLO on VOC

In [None]:
## Get The Pascal VOC Data

wget https://pjreddie.com/media/files/VOCtrainval_11-May-2012.tar
wget https://pjreddie.com/media/files/VOCtrainval_06-Nov-2007.tar
wget https://pjreddie.com/media/files/VOCtest_06-Nov-2007.tar
tar xf VOCtrainval_11-May-2012.tar
tar xf VOCtrainval_06-Nov-2007.tar
tar xf VOCtest_06-Nov-2007.tar

## Generate Labels for VOC
# <object-class> <x> <y> <width> <height>
wget https://pjreddie.com/media/files/voc_label.py
python voc_label.py

ls
'''
2007_test.txt   VOCdevkit
2007_train.txt  voc_label.py
2007_val.txt    VOCtest_06-Nov-2007.tar
2012_train.txt  VOCtrainval_06-Nov-2007.tar
2012_val.txt    VOCtrainval_11-May-2012.tar
'''
cat 2007_train.txt 2007_val.txt 2012_*.txt > train.txt

## Modify Cfg for Pascal Data:  cfg/voc.data
1 classes= 20
2 train  = <path-to-voc>/train.txt
3 valid  = <path-to-voc>2007_test.txt
4 names = data/voc.names
5 backup = backup

## Download Pretrained Convolutional Weights
wget https://pjreddie.com/media/files/darknet53.conv.74

## Train The Model
./darknet detector train cfg/voc.data cfg/yolov3-voc.cfg darknet53.conv.74




In [None]:
directory = '/home/mohit/Documents/CargillWorkspace/ChickenNuggetProblem/ChickenNugget_CV/Frameworks/settings/darknet/'
config_path = directory + 'configs/yolov3.cfg'
# weights_path = directory + 'weights/yolov3.weights'
# image_path = directory + 'sample_image/dog.jpg'
data_path = directory + 'voc.data'
# threshold_score = 0.25

conv_path = directory + 'configs/darknet53.conv.74'
cmd_to_run.append(f'!cd darknet && ./darknet detector train {data_path} {config_path} {conv_path}')
cmd_to_run

In [None]:
!cd darknet && ./darknet detector train /home/mohit/Documents/CargillWorkspace/ChickenNuggetProblem/ChickenNugget_CV/Frameworks/settings/darknet/voc.data /home/mohit/Documents/CargillWorkspace/ChickenNuggetProblem/ChickenNugget_CV/Frameworks/settings/darknet/configs/yolov3.cfg /home/mohit/Documents/CargillWorkspace/ChickenNuggetProblem/ChickenNugget_CV/Frameworks/settings/darknet/configs/darknet53.conv.74

## Training YOLO on COCO

In [None]:
## Get The COCO Data
cp scripts/get_coco_dataset.sh data
cd data
bash get_coco_dataset.sh

## Modify cfg for COCO : cfg/coco.data
1 classes= 80
2 train  = <path-to-coco>/trainvalno5k.txt
3 valid  = <path-to-coco>/5k.txt
4 names = data/coco.names
5 backup = backup

## Modify cfg for COCO :  cfg/yolo.cfg
[net]
# Testing
# batch=1
# subdivisions=1
# Training
batch=64
subdivisions=8
....

## Train The Model
./darknet detector train cfg/coco.data cfg/yolov3.cfg darknet53.conv.74

-- multiple gpus run:
    ./darknet detector train cfg/coco.data cfg/yolov3.cfg darknet53.conv.74 -gpus 0,1,2,3
-- stop and restart training from a checkpoint
    ./darknet detector train cfg/coco.data cfg/yolov3.cfg backup/yolov3.backup -gpus 0,1,2,3



In [None]:
## YOLOv3 on the Open Images dataset

In [None]:
wget https://pjreddie.com/media/files/yolov3-openimages.weights

./darknet detector test cfg/openimages.data cfg/yolov3-openimages.cfg yolov3-openimages.weights


# old Other Code Chunk

In [None]:
# %run $filename.py {args[0]} {args[1][-2:]}

# import subprocess
# # cmd = cmd_to_run.split()
# print(f'Running the command: {cmd_to_run}')
# output = subprocess.run( cmd_to_run, shell=True, check=False)#, capture_output=True)
# print(output.stdout.decode)

In [None]:
WHICH_REPO = 'Original_Darknet' # Original_Darknet, AlexeyAB_Darknet, ultralytics_Darknet, matterport_Mask_RCNN

cmd_to_run = ['!git clone ' ]
if 'Original_Darknet':
    cmd_to_run[0] += 'https://github.com/pjreddie/darknet'
elif 'AlexeyAB_Darknet':
    cmd_to_run[0] += 'https://github.com/AlexeyAB/darknet'
elif 'ultralytics_Darknet':
    cmd_to_run[0] += 'https://github.com/ultralytics/yolov3'
elif 'matterport_Mask_RCNN':
    cmd_to_run[0] += 'https://github.com/matterport/Mask_RCNN'

print('Clone Repository:\n\t', '\n\t'.join(cmd_to_run))

cmd_to_run.append('!ls -a')

####################### Image Format Changing #################################

In [None]:
import os
# from os import walk, getcwd
from PIL import Image
import glob

cls = "stopsign"

In [None]:
in_img_path = '../data/processed_data/images_labelled/'
out_path = '../data/processed_data/'
classes = ["stopsign"] ## can be read from data.names

def check_allowed_class(cls):
    if cls not in classes:
        raise Exception(f'Error: {cls} is not present in the {classes}.')
        
def convert_to_class_id(cls):
    return classes.index(cls)


img_txt_files_path = glob.glob(in_img_path+'*.txt')
img_txt_files_path


In [None]:
img_path = '../data/processed_data/images_labelled/172_0_6255.jpeg'#img_txt_files_path[0]
img_path

import cv2

def convert_labels(size, x1, y1, x2, y2):
    """
    Definition: Parses label files to extract label and bounding box
        coordinates.  Converts (x1, y1, x1, y2) KITTI format to
        (x, y, width, height) normalized YOLO format.
    """
    def sorting(l1, l2):
        if l1 > l2:
            lmax, lmin = l1, l2
            return lmax, lmin
        else:
            lmax, lmin = l2, l1
            return lmax, lmin
    size = get_img_shape(path)
    xmax, xmin = sorting(x1, x2)
    ymax, ymin = sorting(y1, y2)
    dw = 1./size[1]
    dh = 1./size[0]
    x = (xmin + xmax)/2.0
    y = (ymin + ymax)/2.0
    w = xmax - xmin
    h = ymax - ymin
    x = x*dw
    w = w*dw
    y = y*dh
    h = h*dh
    return (x,y,w,h)


def get_img_shape(path):
#     path = 'data/'+path
    img = cv2.imread(path)
    try:
        return img.shape
    except AttributeError:
        print('error! ', path)
        return (None, None, None)


convert_labels(size, x1, y1, x2, y2)

img_dict = {}
img_dict['path'] = img_path
img_dict['path']['shape'] = get_img_shape(img_path)
img_dict['path']['sections'] = {} 

## categories [options - labels / sections in a image]
img_dict['path']['sections'+s]['Normalized'] = {} 


# bbox_img['x'], bbox_img['y'], bbox_img['width'], bbox_img['height'] = zip(*bbox_img.progress_apply(lambda row: convert_labels(row['Path'], row['x1'], row['y1'], row['x2'], row['y2']), axis=1)) # Like python for one lone code.

In [None]:


def get_ima

def convert(size, box_coordinate):
    '''
     Converts (x1, y1, x1, y2) KITTI format to (x, y, width, height) normalized YOLO format
    
    box_coordinate = (x1, y1, x2, y2)
    '''
    def sorting(l1, l2):
        if l1 > l2:
            lmax, lmin = l1, l2
            return lmax, lmin
        else:
            lmax, lmin = l2, l1
            return lmax, lmin
    size = get_img_shape(path)
    xmax, xmin = sorting(x1, x2)
    ymax, ymin = sorting(y1, y2)
    dw = 1./size[1]
    dh = 1./size[0]
    x = (xmin + xmax)/2.0
    y = (ymin + ymax)/2.0
    w = xmax - xmin
    h = ymax - ymin
    x = x*dw
    w = w*dw
    y = y*dh
    h = h*dh
    return (x_min, ymin, x_max, y_max), (x,y,w,h)

    
    dw = 1./size[0]
    dh = 1./size[1]
    x = (box[0] + box[1])/2.0
    y = (box[2] + box[3])/2.0
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x*dw
    w = w*dw
    y = y*dh
    h = h*dh
    return (x,y,w,h)


def process_a_file(file_path):
    ''' '''
    ## Open txt files
    with open(img_txt_files_path[i]) as file:
        txt = file.read()
    txt_line_li = [ t for t in txt.split('\n') if len(t.split()) == 5 ]
#     txt_line_li
    
    ## Convert To Yolo Format 
    count = 0
    for line in txt_line_li:
        print(line)
        xmin, ymin, x_max, y_max = line.split()
        


for i in range(len(img_txt_files_path)):
    
#     """ Open output text files """
#     txt_outpath = outpath + txt_name
#     print("Output:" + txt_outpath)
#     txt_outfile = open(txt_outpath, "w")
    
    
#     """ Convert the data to YOLO format """
#     ct = 0
#     for line in lines:
#         #print('lenth of line is: ')
#         #print(len(line))
#         #print('\n')
#         if(len(line) >= 2):
#             ct = ct + 1
#             print(line + "\n")
#             elems = line.split(' ')
#             print(elems)
#             xmin = elems[0]
#             xmax = elems[2]
#             ymin = elems[1]
#             ymax = elems[3]
#             #
#             img_path = str('%s/images/%s/%s.JPEG'%(wd, cls, os.path.splitext(txt_name)[0]))
#             #t = magic.from_file(img_path)
#             #wh= re.search('(\d+) x (\d+)', t).groups()
#             im=Image.open(img_path)
#             w= int(im.size[0])
#             h= int(im.size[1])
#             #w = int(xmax) - int(xmin)
#             #h = int(ymax) - int(ymin)
#             # print(xmin)
#             print(w, h)
#             b = (float(xmin), float(xmax), float(ymin), float(ymax))
#             bb = convert((w,h), b)
#             print(bb)
#             txt_outfile.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')

#     """ Save those images with bb into list"""
#     if(ct != 0):
#         list_file.write('%s/images/%s/%s.JPEG\n'%(wd, cls, os.path.splitext(txt_name)[0]))

    ## Progress Msg
    if i==0: print('Data Processing Initiated')
    if (i+1)%10==0: print('Processed {:03d}/{}'.format(i+1, len(img_txt_files_path)))
    if i+1==len(img_txt_files_path): print('[Complete] Processed {o}/{o}'.format(o=len(img_txt_files_path)))


In [None]:
# import os
# from os import walk, getcwd
# from PIL import Image

# classes = ["stopsign"]

# def convert(size, box):
#     dw = 1./size[0]
#     dh = 1./size[1]
#     x = (box[0] + box[1])/2.0
#     y = (box[2] + box[3])/2.0
#     w = box[1] - box[0]
#     h = box[3] - box[2]
#     x = x*dw
#     w = w*dw
#     y = y*dh
#     h = h*dh
#     return (x,y,w,h)
    
    
# """-------------------------------------------------------------------""" 

# """ Configure Paths"""   
# mypath = "labels/stopsign_original/"
# outpath = "labels/stopsign/"

# cls = "stopsign"
# if cls not in classes:
#     exit(0)
# cls_id = classes.index(cls)

# wd = getcwd()
# list_file = open('%s/%s_list.txt'%(wd, cls), 'w')

# """ Get input text file list """
# txt_name_list = []
# for (dirpath, dirnames, filenames) in walk(mypath):
#     txt_name_list.extend(filenames)
#     break
# print(txt_name_list)

""" Process """
for txt_name in txt_name_list:
    # txt_file =  open("Labels/stop_sign/001.txt", "r")
    
    """ Open input text files """
    txt_path = mypath + txt_name
    print("Input:" + txt_path)
    txt_file = open(txt_path, "r")
    lines = txt_file.read().split('\r\n')   #for ubuntu, use "\r\n" instead of "\n"
    
    """ Open output text files """
    txt_outpath = outpath + txt_name
    print("Output:" + txt_outpath)
    txt_outfile = open(txt_outpath, "w")
    
    
    """ Convert the data to YOLO format """
    ct = 0
    for line in lines:
        #print('lenth of line is: ')
        #print(len(line))
        #print('\n')
        if(len(line) >= 2):
            ct = ct + 1
            print(line + "\n")
            elems = line.split(' ')
            print(elems)
            xmin = elems[0]
            xmax = elems[2]
            ymin = elems[1]
            ymax = elems[3]
            #
            img_path = str('%s/images/%s/%s.JPEG'%(wd, cls, os.path.splitext(txt_name)[0]))
            #t = magic.from_file(img_path)
            #wh= re.search('(\d+) x (\d+)', t).groups()
            im=Image.open(img_path)
            w= int(im.size[0])
            h= int(im.size[1])
            #w = int(xmax) - int(xmin)
            #h = int(ymax) - int(ymin)
            # print(xmin)
            print(w, h)
            b = (float(xmin), float(xmax), float(ymin), float(ymax))
            bb = convert((w,h), b)
            print(bb)
            txt_outfile.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')

    """ Save those images with bb into list"""
    if(ct != 0):
        list_file.write('%s/images/%s/%s.JPEG\n'%(wd, cls, os.path.splitext(txt_name)[0]))
                
list_file.close()       

In [None]:
## Data Preparation For Yolo

import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join

sets=[('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test')]

classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]


def convert(size, box):
    dw = 1./size[0]
    dh = 1./size[1]
    x = (box[0] + box[1])/2.0
    y = (box[2] + box[3])/2.0
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x*dw
    w = w*dw
    y = y*dh
    h = h*dh
    return (x,y,w,h)

def convert_annotation(year, image_id):
    in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id))
    out_file = open('VOCdevkit/VOC%s/labels/%s.txt'%(year, image_id), 'w')
    tree=ET.parse(in_file)
    root = tree.getroot()
    size = root.find('size')
    w = int(size.find('width').text)
    h = int(size.find('height').text)

    for obj in root.iter('object'):
        difficult = obj.find('difficult').text
        cls = obj.find('name').text
        if cls not in classes or int(difficult) == 1:
            continue
        cls_id = classes.index(cls)
        xmlbox = obj.find('bndbox')
        b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
        bb = convert((w,h), b)
        out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')

wd = getcwd()

for year, image_set in sets:
    if not os.path.exists('VOCdevkit/VOC%s/labels/'%(year)):
        os.makedirs('VOCdevkit/VOC%s/labels/'%(year))
    image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split()
    list_file = open('%s_%s.txt'%(year, image_set), 'w')
    for image_id in image_ids:
        list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n'%(wd, year, image_id))
        convert_annotation(year, image_id)
    list_file.close()

