# Import Section
---

In [1]:
from pycocotools.coco import COCO
import os
import shutil
from tqdm import tqdm
import skimage.io as io
import matplotlib.pyplot as plt
import cv2
from PIL import Image, ImageDraw, ImageFile
from distutils.dir_util import copy_tree
import time

import fiftyone as fo
import fiftyone.zoo as foz
import os

import ipywidgets as widgets
from ipywidgets import interact, interact_manual, interactive
from ipywidgets import AppLayout, Button, Layout, Box, FloatText, Textarea, Dropdown, Label, IntSlider
from IPython.display import display, HTML
from IPython.display import clear_output
from ipyfilechooser import FileChooser

# Data Processing
---

In [8]:
class fo_download():
    def __init__(self, dataset, max_val, user_dir_name):
        self.dataset_name = dataset
        self.max_numbers = int(max_val)
        
        self.home_path = os.getcwd()
        self.user_dir = os.path.join(self.home_path, user_dir_name) ##download dir
        self.create_user_folder(self.user_dir)
        
    def create_user_folder(self, dir_path):
        try:
            os.mkdir(dir_path)
        except OSError as error:
            print(error)
            print('skip create')  
        
    def dataset_download(self, classes_list, split_type):    
        dataset = foz.load_zoo_dataset(
            self.dataset_name,
            label_types=["detections"],
            classes=classes_list, #["person"]
            only_matching=True,
            split=split_type, #tran, validation, test
            max_samples=self.max_numbers, #download size
            seed=51,
            shuffle=True,
            dataset_dir=self.user_dir
        )
        return dataset
        
    def dataset_download_all_class(self, split_type):    
        dataset = foz.load_zoo_dataset(
            self.dataset_name,
            label_types=["detections"],
            only_matching=True,
            split=split_type, #tran, validation, test
            max_samples=self.max_numbers, #download size
            seed=51,
            shuffle=True,
            dataset_dir=self.user_dir
        )
        return dataset


class json_2_xml():
    
    def __init__(self, dataset, user_dir_name, classes_list):    
        ImageFile.LOAD_TRUNCATED_IMAGES = True
        
        # The home path
        self.home_path = os.getcwd()
        self.choose_folder = user_dir_name
        self.home_path = os.path.join(self.home_path, self.choose_folder)
        print('Working Folder: {}'.format(self.home_path))
        
        self.instances_json_folder = 'raw' #51 puts snno json file in raw folder
        savepath = os.path.join(self.home_path, ("output_xml"))
        #print(savepath)
        self.img_dir = savepath
        self.anno_dir = savepath
        
        self.datasets_list=['train', 'validation']
        self.classes_names = classes_list ##coco has 80 classes 
        #The path of coco data
        self.dataDir = os.path.join(self.home_path)
        #print('Data Folder: {}'.format(self.dataDir))
        '''
        目录格式如下：
        $COCO_PATH
        ----|annotations
        ----|train2017
        ----|val2017
        ----|test2017
        ''' 
        
        self.headstr = """\
        <annotation>
            <folder>VOC</folder>
            <filename>%s</filename>
            <source>
                <database>My Database</database>
                <annotation>COCO</annotation>
                <image>flickr</image>
                <flickrid>NULL</flickrid>
            </source>
            <owner>
                <flickrid>NULL</flickrid>
                <name>company</name>
            </owner>
            <size>
                <width>%d</width>
                <height>%d</height>
                <depth>%d</depth>
            </size>
            <segmented>0</segmented>
        """
        self.objstr = """\
            <object>
                <name>%s</name>
                <pose>Unspecified</pose>
                <truncated>0</truncated>
                <difficult>0</difficult>
                <bndbox>
                    <xmin>%d</xmin>
                    <ymin>%d</ymin>
                    <xmax>%d</xmax>
                    <ymax>%d</ymax>
                </bndbox>
            </object>
        """
         
        self.tailstr = '''\
        </annotation>
        '''
     
    # 检查目录是否存在，如果存在，先删除再创建，否则，直接创建
    def mkr(self, path):
        if not os.path.exists(path):
            os.makedirs(path)  # create multi folder
            #print(path)
            
    def id2name(self, coco):
        classes=dict()
        for cls in coco.dataset['categories']:
            classes[cls['id']]=cls['name']
        return classes
     
    def write_xml(self, anno_path, head, objs, tail):
        f = open(anno_path, "w")
        f.write(head)
        for obj in objs:
            f.write(self.objstr%(obj[0],obj[1],obj[2],obj[3],obj[4]))
        f.write(tail)
     
     
    def save_annotations_and_imgs(self, coco, dataset, img_dataset_loc, filename, objs):
        #transfer to xml, ex:COCO_train2017_000000196610.jpg-->COCO_train2017_000000196610.xml
        if(dataset == 'validation'): # tensorflow obj detection API's data folder style
            dataset = 'test'
        dst_anno_dir = os.path.join(self.anno_dir, dataset)
        self.mkr(dst_anno_dir)
        anno_path = dst_anno_dir + '\\' + filename[:-3]+'xml'
        
        src_img_path = os.path.join(self.dataDir, img_dataset_loc, filename)
        dst_img_dir = os.path.join(self.img_dir, dataset)
        self.mkr(dst_img_dir)
        dst_imgpath = os.path.join(dst_img_dir, filename)
        
        img=cv2.imread(src_img_path)
        #if (img.shape[2] == 1):
        #    print(filename + " not a RGB image")
        #   return
        
        if not (os.path.isfile(dst_imgpath)):
            # shutil.copy(src_img_path, dst_imgpath)
            os.link(src_img_path, dst_imgpath)
     
        head=self.headstr % (filename, img.shape[1], img.shape[0], img.shape[2])
        tail = self.tailstr
        self.write_xml(anno_path, head, objs, tail)
     
     #	   标注文件 train&val 图片信息 所有类别 人的类别id
    def showimg(self, coco, img_dataset_loc, img, classes, cls_id, show=True):
        img_path = os.path.join(self.dataDir, img_dataset_loc, img['file_name'])
        #print(img_path)
        objs = []
        if os.path.exists(img_path):
            # open img
            #print(self.dataDir, img_dataset_loc)
            try:
                I = Image.open('%s/%s/%s'%(self.dataDir, img_dataset_loc, img['file_name']))
            except UnidentifiedImageError:
                print("bad image, skip!")
                print(img_path)
            
            annIds = coco.getAnnIds(imgIds=img['id'], catIds=cls_id, iscrowd=None) #get the ann info by the id
            # print(annIds)
            anns = coco.loadAnns(annIds) #load the ann info
            # print(anns)
            # coco.showAnns(anns)
       
            for ann in anns: # iteration of anns
                class_name = classes[ann['category_id']]#得到这个标注的类别
                if class_name in self.classes_names:#如果是我们想要的
                    #print(class_name)
                    if 'bbox' in ann:#如果标注信息里有bbox
                        bbox=ann['bbox']
                        xmin = int(bbox[0])
                        ymin = int(bbox[1])
                        xmax = int(bbox[2] + bbox[0])
                        ymax = int(bbox[3] + bbox[1])
                        obj = [class_name, xmin, ymin, xmax, ymax]
                        objs.append(obj)
                        draw = ImageDraw.Draw(I)
                        draw.rectangle([xmin, ymin, xmax, ymax])
            if show:
                plt.figure()
                plt.axis('off')
                plt.imshow(I)
                plt.show()
                
        #else:
        #    print("no such file!!!!!!!!!!!!!!!!!!")
        return objs
     
    # 遍历标注文件 instances_train2017 和 instances_val2017 里的数据
    def json_2_xml_run(self):
    
        for dataset in self.datasets_list:
            #./COCO/annotations/instances_train2017.json, 51 json naming
            if dataset == 'validation':
                dataset_coco_json = 'val2017'
            else:    
                dataset_coco_json = dataset + '2017'
            
            annFile = '{}\\{}\\instances_{}.json'.format(self.dataDir, self.instances_json_folder, dataset_coco_json)
            #print(annFile)
         
            #use COCO API
            coco = COCO(annFile)
            classes = self.id2name(coco)
            #print(classes) #ex:{1: 'person', 2: 'bicycle',...
            classes_ids = coco.getCatIds(catNms = self.classes_names) #self.classes_names:person
            #print(classes_ids) #[1]
            miss = 0
            for cls in self.classes_names:
                
                cls_id = coco.getCatIds(catNms = [cls])
                img_ids = coco.getImgIds(catIds = cls_id)
                #print(cls,len(img_ids)) #ex: person there are total 64115 plts has person
                
                saved_count = 0
                for imgId in tqdm(img_ids):
                    img = coco.loadImgs(imgId)[0]
                    #print(img)
                    filename = img['file_name']
                    #print(filename)
                    img_dataset_loc = os.path.join(dataset, 'data') ##The 51 apps' folder is train/data style.
                    #print("img_dataset_loc:{}".format(img_dataset_loc))
                    objs = self.showimg(coco, img_dataset_loc, img, classes, classes_ids, show=False)
                    if(objs):
                        self.save_annotations_and_imgs(coco, dataset, img_dataset_loc, filename, objs)
                        saved_count = saved_count+1
                    else:
                        miss = miss+1
                print("The {} {} class has {} plots".format(dataset, cls, saved_count))        
            #print(miss)
        return self.anno_dir 

# Widgets Control Section
---

In [6]:
class init_download_img_widgets():
    def __init__(self):
        form_item_layout = Layout(
        display='flex',
        flex_flow='row',
        justify_content='space-between',
        )
        

        ### open source data download###
        self.A_ta = widgets.Text(value='coco-2017', placeholder='Type something', disabled=False)
        self.B_ta = widgets.Text(value='50', placeholder='Type something', disabled=False)
        self.C_ta = widgets.Text(value='coco-2017-50-2class', placeholder='Type something', disabled=False)
        self.D_ta = widgets.Checkbox(value=False, disabled=False, indent=False)
        self.E_ta = widgets.Textarea(value='dog,cat', placeholder='Type something', disabled=False)
        self.F_ta = Dropdown(options=['train & validation', 'train', 'validation', 'test'])
        self.G_ta = widgets.Button(description='Download', layout=Layout(width='30%', height='30px'), button_style='success')
        self.H_ta = widgets.Button(description='Create .xml', layout=Layout(width='30%', height='30px'), button_style='success')
        self.I_ta = widgets.Checkbox(value=True, disabled=False, indent=False)
                    
        
 
        form_train_items = [
            Box([Label(value = 'Zoo Dataset Name'), self.A_ta], layout=form_item_layout),
            Box([Label(value = 'Max Download Number'), self.B_ta], layout=form_item_layout),
            Box([Label(value = 'User Dir Name'), self.C_ta], layout=form_item_layout),
            Box([Label(value = 'All Classes'), self.D_ta], layout=form_item_layout),
            Box([Label(value = 'Classes List'), self.E_ta], layout=form_item_layout),
            Box([Label(value = 'Split Type'), self.F_ta], layout=form_item_layout),
            Box([Label(value = 'Super Lazy Mode'), self.I_ta], layout=form_item_layout),
            Box([Label(value = 'Start to Download'), self.G_ta], layout=form_item_layout),
            Box([Label(value = 'Create Annotation Files(COCO only)'), self.H_ta], layout=form_item_layout)
        ]
        
        self.form_box_train_para = Box(form_train_items, layout=Layout(
            display='flex',
            flex_flow='column',
            border='solid 3px lightgreen',
            align_items='stretch',
            width='50%',
        ))
        
        
        ### custom data labeling###
        self.A_da = widgets.Button(description='LabelImg', layout=Layout(width='30%', height='30px'), button_style='success')
        self.F_da = widgets.Textarea(value='person', placeholder='Type something', disabled=False)
        self.B_da = widgets.FloatSlider(value=0.1, min=0.0, max=1.0, step=0.05)
        self.C_da = widgets.Checkbox(value=True, disabled=False, indent=False)
        self.E_da = widgets.Checkbox(value=True, disabled=False, indent=False)
        self.D_da = widgets.Button(description='FolderChooser', layout=Layout(width='30%', height='30px'), button_style='success')
        
        form_data_items = [
            Box([Label(value = 'Label Image Tool'), self.A_da], layout=form_item_layout),
            Box([Label(value = 'Classes List (custom)'), self.F_da], layout=form_item_layout),
            Box([Label(value = 'Testing Percentage'), self.B_da], layout=form_item_layout),
            Box([Label(value = 'Copy .XML'), self.C_da], layout=form_item_layout),
            Box([Label(value = 'Super Lazy Mode'), self.E_da], layout=form_item_layout),
            Box([Label(value = 'Choose the Partition Folder'), self.D_da], layout=form_item_layout)
        ]
        
        self.form_box_data_para = Box(form_data_items, layout=Layout(
            display='flex',
            flex_flow='column',
            border='solid 3px lightgreen',
            align_items='stretch',
            width='50%',
        ))
        
        
        ###Dataset moving & tf-record created###
        self.A_dm = widgets.Button(description='FolderChooser', layout=Layout(width='30%', height='30px'), button_style='success')
        
        form_data_items = [
            Box([Label(value = 'Dataset moving'), self.A_dm], layout=form_item_layout)
        ]
        
        self.form_box_data_move = Box(form_data_items, layout=Layout(
            display='flex',
            flex_flow='column',
            border='solid 3px lightgreen',
            align_items='stretch',
            width='50%',
        ))
        
        ###tfrecord setting###
        self.A_tfr = widgets.Textarea(value=r'workspace\training_demo_1000\images\train', placeholder='Type something', disabled=False)
        self.B_tfr = widgets.Textarea(value=r'workspace\training_demo_1000\annotations\label_map.pbtxt', placeholder='Type something', disabled=False)
        self.C_tfr = widgets.Button(description='Create', layout=Layout(width='30%', height='30px'), button_style='success')
        self.D_tfr = widgets.Button(description='Create', layout=Layout(width='30%', height='30px'), button_style='success')
        
        form_data_items = [
            Box([Label(value = 'Label Map Location'), self.B_tfr], layout=form_item_layout),
            Box([Label(value = 'Create Label Map'), self.C_tfr], layout=form_item_layout),
            Box([Label(value = 'Image Folder'), self.A_tfr], layout=form_item_layout),
            Box([Label(value = 'Create tfrecord Files(.record)'), self.D_tfr], layout=form_item_layout)
        ]
        
        self.form_box_tfr = Box(form_data_items, layout=Layout(
            display='flex',
            flex_flow='column',
            border='solid 3px lightgreen',
            align_items='stretch',
            width='50%',
        ))
        
        ### working directory create
        self.A_work = widgets.Text(value='training_demo', placeholder='Type something', disabled=False)
        self.B_work = widgets.Button(description='Create', layout=Layout(width='30%', height='30px'), button_style='success')
        self.C_work = Dropdown(options=['tf2', 'tf1'])
        
        form_data_items = [
            Box([Label(value = 'Your Working Directory Name'), self.A_work], layout=form_item_layout),
            Box([Label(value = 'Tensorflow Version'), self.C_work], layout=form_item_layout),
            Box([Label(value = 'Create Your New Working Directory'), self.B_work], layout=form_item_layout)
        ]
        
        self.form_box_work = Box(form_data_items, layout=Layout(
            display='flex',
            flex_flow='column',
            border='solid 3px lightgreen',
            align_items='stretch',
            width='50%',
        ))
        
    def move_allfiles(self, src_folder, dst_folder):
        copy_num = 0
        
        #files = os.listdir(src_folder)
        #print(len(files))
        print("Copy: {} files".format(len(os.listdir(src_folder))))    
        if os.path.isdir(src_folder):  #copy whole folder
            print("Copying...")
            copy_tree(src_folder, dst_folder)
            print("Finish!")
        else:
            print("The source location is not a folder.")
    
    def create_folder(self, dir_path):
        try:
            os.mkdir(dir_path)
        except OSError as error:
            print(error)
            print('skip create')
            
    def copy_files(self, src_file, dst_file):
        try:
            shutil.copy(src_file, dst_file)
        except shutil.SameFileError:
            print("Source and destination represents the same file.")
        except PermissionError:
            print("Permission denied.")
        except:
            print("Error occurred while copying file.")
    
    def show_headline(self, output):
        html0= widgets.HTML(value = f"<b><font color='lightblue'><font size=2>{output}</b>")
        display(html0)
    
    def show_main(self):   
        
        intro_text = 'Please Choose the open source images download setting'
        htmlWidget = widgets.HTML(value = f"<b><font color='lightgreen'><font size=4>{intro_text}</b>")
        display(htmlWidget)
        
        #Create an accordion and put the 2 boxes
        accordion = widgets.Accordion(children=[self.form_box_work, self.form_box_train_para, self.form_box_data_para, 
                                                self.form_box_data_move, self.form_box_tfr]).add_class("parentstyle")
        #Add a custom style tag to the notebook, you can use dev tool to inspect the class names
        display(HTML("<style>.parentstyle > .p-Accordion-child > .p-Collapse-header{background-color:green}</style>"))
        accordion.set_title(0, 'Working Directory')
        accordion.set_title(1, 'Download Dataset')
        accordion.set_title(2, 'Labeling (Custom Dataset)')
        accordion.set_title(3, 'Dataset moving')
        accordion.set_title(4, 'tfrecord Setting')
        
        
        def act_para(dataset,max_val,user_dir_name,all_classes,classes_string,split_type,test_ratio,xml_copy,img_fdr,labelmap_loc,
                    download_lazy,custom_lazy): 
            #------------------#
            # create classes_list we want to download from classes_string
            #------------------#
            classes_list = classes_string.split(',')
            
            if download_lazy and custom_lazy:
                self.form_box_data_move.layout.visibility = 'hidden'
                self.form_box_tfr.layout.visibility = 'hidden'
            else:
                self.form_box_data_move.layout.visibility = 'visible'
                self.form_box_tfr.layout.visibility = 'visible'
        #------------------#
        # widgets.Accordion's interactive input with action function `act_para()`
        #------------------#
        out_inter = widgets.interactive_output(act_para, {'dataset': self.A_ta, 'max_val': self.B_ta, 'user_dir_name': self.C_ta,
                                                          'all_classes': self.D_ta, 'classes_string': self.E_ta, 'split_type': self.F_ta,
                                                          'test_ratio' : self.B_da, 'xml_copy' : self.C_da,
                                                          'img_fdr': self.A_tfr, 'labelmap_loc': self.B_tfr,
                                                          'download_lazy': self.I_ta, 'custom_lazy': self.E_da})

        display(accordion, out_inter)
        
        #------------------#
        # buttoms event control in widgets.Accordion
        #------------------#
        output_labelimg = widgets.Output(layout=Layout(border = '1px solid green'))
        display(output_labelimg)
        def on_button_clicked_LabelImg(b):
                with output_labelimg:
                    clear_output()
                    #print("Open LabelImg...") ##ToDo: split the dataset into train&test
                    self.open_labelImg()
        self.A_da.on_click(on_button_clicked_LabelImg)
        
        def on_button_clicked_download_data(b):
            with output_labelimg:
                clear_output()
                classes_list = self.E_ta.value.split(',')
                
                self.show_headline("The data download setting is finish and saved..., please wait for downloading")
                        
                if not self.D_ta.value:
                    if self.F_ta.value == 'train & validation':
                        my_fo = fo_download(self.A_ta.value, self.B_ta.value, self.C_ta.value)
                        my_fo.dataset_download(classes_list, "train")
                        my_fo.dataset_download(classes_list, "validation")
                    else:
                        my_fo = fo_download(self.A_ta.value, self.B_ta.value, self.C_ta.value)
                        my_fo.dataset_download(classes_list, self.F_ta.value)
                else:
                    if self.F_ta.value == 'train & validation':
                        my_fo = fo_download(self.A_ta.value, self.B_ta.value, self.C_ta.value)
                        my_fo.dataset_download_all_class("train")
                        my_fo.dataset_download_all_class("validation")
                    else:
                        my_fo = fo_download(self.A_ta.value, self.B_ta.value, self.C_ta.value)
                        my_fo.dataset_download_all_class(self.F_ta.value)
                        
                ### Lazy mode is finish .xml, moving train/test folders, label_map, tfrecord at once.
                if self.I_ta.value:
                    self.super_lazy_event(False, None)
                    
                print('Finish')      
        self.G_ta.on_click(on_button_clicked_download_data)
        
        def on_button_clicked_convert_xml(b):
            with output_labelimg:
                clear_output()
                classes_list = self.E_ta.value.split(',')
                self.show_headline("xml creating...")
                trans_xml = json_2_xml(self.A_ta.value, self.C_ta.value, classes_list)
                trans_xml.json_2_xml_run()
                print('Finish the xml create') 
        self.H_ta.on_click(on_button_clicked_convert_xml)
        
        def on_button_clicked_FileChooser_partition(b):
            with output_labelimg:
                clear_output()
                self.dataset_partition_event(self.B_da.value, self.C_da.value)           
        self.D_da.on_click(on_button_clicked_FileChooser_partition)
         
        def on_button_clicked_FileChooser(b):
            with output_labelimg:
                clear_output()
                self.dataset_moving_event()           
        self.A_dm.on_click(on_button_clicked_FileChooser)
        
        def on_button_clicked_tfRecord(b):
            with output_labelimg:
                clear_output()
                
                path_main_folder = os.path.abspath('..')  ##The training folder is in ../workspace/user_train_folder
                abs_img_fdr = os.path.join(path_main_folder, self.A_tfr.value)
                abs_labelmap_loc = os.path.join(path_main_folder, self.B_tfr.value)
                
                output_filename = self.A_tfr.value.split('\\')[-1]
                abs_output_record = abs_labelmap_loc.split(abs_labelmap_loc.split('\\')[-1])[0] + output_filename + r'.record'
                
                # run the generate_tfrecord.py
                self.show_headline("start generate tfrecord...")
                ! python generate_tfrecord.py -x "{abs_img_fdr}" -l "{abs_labelmap_loc}" -o "{abs_output_record}"    
        self.D_tfr.on_click(on_button_clicked_tfRecord)
        
        def on_button_clicked_make_labelmap(b):
            self.show_headline("Make Label Map...")
            with output_labelimg:
                clear_output()
                classes_list = self.E_ta.value.split(',')
                self.make_labelmap_txt(classes_list, self.B_tfr.value)
        self.C_tfr.on_click(on_button_clicked_make_labelmap)
        
        def on_button_clicked_create_working_dir(b):
            with output_labelimg:
                clear_output()
                self.create_working_directory(self.A_work.value)
                         
        self.B_work.on_click(on_button_clicked_create_working_dir)
    
    #------------------#
    # event job functions
    #------------------#
    def super_lazy_event(self, __custom_dataset, __custom_dataset_loc):
        self.show_headline("Super Lazy Mode:")
        
        if __custom_dataset:
            classes_list = self.F_da.value.split(',')
            output_folder = __custom_dataset_loc
        else:    
            self.show_headline("xml creating...")
            classes_list = self.E_ta.value.split(',')
            trans_xml = json_2_xml(self.A_ta.value, self.C_ta.value, classes_list)
            output_folder = trans_xml.json_2_xml_run()
        
        self.show_headline("Move to workspace...")
        my_work_folder = os.path.join(os.path.abspath('..'), 'workspace', self.A_work.value)
        dst_folder = os.path.join(my_work_folder, 'images', 'train') # ../workspace/<dir_name>/images
        src_folder = os.path.join(output_folder, 'train') # only copy the train&test cause of tfobj_api
        #self.create_folder(dst_folder)
        #self.move_allfiles(src_folder, dst_folder)
        shutil.move(src_folder, dst_folder)
        
        dst_folder = os.path.join(my_work_folder, 'images', 'test') # ../workspace/<dir_name>/images
        src_folder = os.path.join(output_folder, 'test')
        #self.create_folder(dst_folder)
        #self.move_allfiles(src_folder, dst_folder)
        shutil.move(src_folder, dst_folder)
        
        self.show_headline("Make Label Map...")
        self.make_labelmap_txt(classes_list, 
                               os.path.join('workspace', self.A_work.value, 'annotations', 'label_map.pbtxt'))
        
        self.show_headline("Generate tfrecord...")
        abs_img_fdr = os.path.join(my_work_folder, 'images', 'train')  #train.record
        abs_labelmap_loc = os.path.join(my_work_folder, 'annotations', 'label_map.pbtxt')
        abs_output_record = os.path.join(my_work_folder, 'annotations', 'train.record')
        ! python generate_tfrecord.py -x "{abs_img_fdr}" -l "{abs_labelmap_loc}" -o "{abs_output_record}"
        abs_img_fdr = os.path.join(my_work_folder, 'images', 'test')  #test.record
        abs_labelmap_loc = os.path.join(my_work_folder, 'annotations', 'label_map.pbtxt')
        abs_output_record = os.path.join(my_work_folder, 'annotations', 'test.record')
        ! python generate_tfrecord.py -x "{abs_img_fdr}" -l "{abs_labelmap_loc}" -o "{abs_output_record}"
    
    def dataset_partition_event(self, test_ratio, xml_copy):
        path_fc = os.getcwd() ##The image dataset location
        fc = FileChooser(path_fc)
        fc.show_only_dirs = True
        fc.title = f"<b><font color='lightblue'><font size=4>Choose image dataset to partition.</b>"
        display(fc)
        
        def act_partition_images():
            input_folder = fc.selected_path
            print("The test ration: {}".format(test_ratio))
            print("Selected folder: {}".format(input_folder))
            
            print("partition...")
            if xml_copy:
                !python partition_dataset.py -x -i "{input_folder}" -r "{test_ratio}"
            else:
                !python partition_dataset.py -i "{input_folder}" -r "{test_ratio}"
                
            if self.E_da.value:  
                self.super_lazy_event(True, input_folder)
                
            print("Finish!")
        evt = interact_manual(act_partition_images)
        evt.widget.children[0].description = 'Start partition'  #because there are 3 parameter of the evt
        evt.widget.children[0].button_style = 'primary'
        
    def dataset_moving_event(self):
        path_fc = os.getcwd() ##The image dataset location
        fc = FileChooser(path_fc)
        fc.show_only_dirs = True
        fc.title = f"<b><font color='lightblue'><font size=4>Choose image dataset.</b>"
        display(fc)
        
        path_f_copy = os.path.abspath('..')  ##The training folder is in ../workspace/user_train_folder
        path_f_copy = os.path.join(path_f_copy, 'workspace')
        if not os.path.isdir(path_f_copy):  ##check if the folder exists or not, if not creat it.
            os.makedirs(path_f_copy)
            print("<Create a new folder: {}>".format(path_f_copy))
        f_copy = FileChooser(path_f_copy)
        f_copy.show_only_dirs = True
        f_copy.title = f"<b><font color='lightblue'><font size=4>Copy to specify train folder.</b>"
        display( f_copy)
        
        def act_copy_folder():
            print("Selected folder: {}".format(fc.selected_path))
            
            copied_dst = f_copy.selected_path
            print("Copy to: {}".format(copied_dst))
            self.move_allfiles(fc.selected_path, copied_dst)
        
        evt = interact_manual(act_copy_folder)
        evt.widget.children[0].description = 'Start Moving'  #because there are 3 parameter of the evt
        evt.widget.children[0].button_style = 'primary'
        
    def make_labelmap_txt(self, classes_list, labelmap_loc):
        print(classes_list)
        path_main_folder = os.path.abspath('..')  ##The training folder is in ../workspace/user_train_folder
        pbtxt_path = os.path.join(path_main_folder, labelmap_loc)
        print(pbtxt_path)
        
        with open(pbtxt_path, 'w') as pbtxt: 
            for idx, val in enumerate(classes_list):
                pbtxt.write(f"item {{\n")
                pbtxt.write(f"    id: {(idx+1)}\n")
                pbtxt.write(f"    name: '{val}'\n")
                pbtxt.write(f"}}\n")
                
    def create_working_directory(self, dir_name):
        path_work_dir = os.path.join(os.path.abspath('..'), "workspace", dir_name)
        print("Start to create {} ...".format(path_work_dir))
        self.create_folder(path_work_dir) #father working dir.
        self.create_folder(os.path.join(path_work_dir, "annotations"))
        self.create_folder(os.path.join(path_work_dir, "exported-models"))
        self.create_folder(os.path.join(path_work_dir, "images"))
        self.create_folder(os.path.join(path_work_dir, "models"))
        self.create_folder(os.path.join(path_work_dir, "pre-trained-models"))
        
        ###needed scripts for tf1 or tf2
        if self.C_work.value == "tf2":
            scripts_list = ["model_main_tf2.py", "export_tflite_graph_tf2.py", "exporter_main_v2.py"]
            print("This working is running at tensorflow2")
        else:
            scripts_list = ["model_main.py", "export_tflite_ssd_graph.py", "export_inference_graph.py"]
            print("This working is running at tensorflow1")
        
        for sp in scripts_list:
            src_work_script = os.path.join(os.path.abspath('..'), "models", "research", "object_detection", sp)
            dst_work_script = os.path.join(os.path.abspath('..'), "workspace", dir_name, sp)
            self.copy_files(src_work_script, dst_work_script)
        print("Finish!")
        
    def open_labelImg(self):   
        !labelImg

# Run Section
---
- The detail description of all the parameters and folders meaning is here [meaning](#id-PDIH)
- To prepare the next step's working folder for training, please update your `Folder Name` and click `Create Working Folder`
- Each working folders are in `workspace` folder
- The structure of working directory is as followed:
    <pre>
    workspace/
    └─training_demo/ 
    │  ├─ annotations/
    │  ├─ exported-models/
    │  ├─ images/
    │  ├─ models/
    │  ├─ pre-trained-models/
    │  ├
    │  ├─ model_main_tf2.py
    │  ├─ exporter_main_v2.py
    │  └─ export_tflite_graph_tf2.py
    │
    └─my_training_dir/
       ├─ annotations/
       └─ ...
</pre>

In [9]:
act = init_download_img_widgets()
act.show_main()

HTML(value="<b><font color='lightgreen'><font size=4>Please Choose the open source images download setting</b>…

Accordion(children=(Box(children=(Box(children=(Label(value='Your Working Directory Name'), Text(value='traini…

Output()

Output(layout=Layout(border_bottom='1px solid green', border_left='1px solid green', border_right='1px solid g…

<a id="id-PDIH"></a>
# Parameter Description & Issue Help
---

## Parameter Description
   1. if you want to search for more special open source image dataset, you can go to [roboflow](https://universe.roboflow.com/) to download more cool dataset.
   2. There are 5 main functions at tab:
        1. Working Directory:
            - Please enter your own defined working folder name at `Your Working Directory Name`.
            - `Tensorflow Version` is basing on your using model is tensorflow1 or tensorflow2.
        1. Download Setting:
            - `The Zoo Dataset Name` is listed in [fiftyone_dataset_zoo](https://voxel51.com/docs/fiftyone/user_guide/dataset_zoo/datasets.html). `Max Download Number` is the number of pictures you want to download partially. `User Dir Name` is your naming for this download dataset. `All Classes` is downloading all the classes. `Classes List` is downloading the classes partially you wnat. `Split Type` is downloading the dataset type, if the dataset have splited into test, train and validation. Please notice that if the dataset doesn't support these attributes, they will be skipped.
            - After your setting finish, click the `Start to Download` buttom.
            - If the dataset doesn’t have `*xml`, transfer the annotation file from `*.json` to `*.xml` (Support COCO Dataset so far) by click `Create .xml`.
      2. Labeling:
          - `Label Image Tool` is open the labeling tool. 
          - `Testing Percentage` is spliting the original dataset into testing and training randomly. `Copy .XML` is copying XML files at the same time when spliting. `Choose the Partition Folder` is which folder you want to split.
      3. Dataset Moving:
          - This will help you move the finish dataset to your training folder. 
          - For example, you only need to choose `coco-2017_xml` to copy the inside folders `train/`&`test/` to `workspace/training_demo/images/` at the same time.
      4. tfrecord Setting:
          - `Label Map Location` is the place of creating label map file and should be in `annotations/`. It gets the value from `Classes List`. 
          - Because using the tensorflow object detection, we need to transfer our dataset to `tfrecord` style and saved in `annotations/`.
          - `Image Folder` is the place of your dataset.
          
## Working Directory Description
   1. The procedure of how to train is in `train_cmd.ipynb`. The below is just introducing the structure of working folder and files.
   2. There are 5 folders in `training_demo`, and you can name this working directory you like. (How to train is in `train_cmd.ipynb`)
      - `annotations`: put the `label_map.pbtxt` and `train/test.record`.
      - `exported-models`: put the pre-trained model and its checkpoint.
      - `images`: put the image data and the corresponding .xml files.
      - `models`: a folder saves the training weights, checkpoints and *.config .
      - `pre-trained-models`: a folder saves the downloaded pre-trained-models checkpoint.
   3. There are 3 files in `training_demo`. 
      - `model_main_tf2.py`: The training script.
      - `exporter_main_v2.py`: The exporting normal model script.
      - `export_tflite_graph_tf2.py`: The exporting tflite model script.
    

## Issue Help
1. If there is an error or stuck when downloading, please re-click start to download again. It will download the rest of images. It may take many times if the dataset is large.
2. Not all dataset support the partial download in `Download Setting`. If not, the dataset will be downloaded fully.  
3. If there is an error when you download `VOC` dataset on Window, please run the register setting `Remove 260 Character Path Limit.reg`
in `tool/LongPathNamesHacks/`