In [25]:
#TF2 Trainer
# Following tutorial on
#https://github.com/TNTran92/TensorFlow-Object-Detection-API-Tutorial-Train-Multiple-Objects-Windows-10
# After images have been labeled using labelImg and put all images into image/train dir

In [26]:
import random
import array

# maximum length of password needed
# this can be changed to suit your password length
def gen_pwd(max_len = 6):
    MAX_LEN = 6

    # declare arrays of the character that we need in out password
    # Represented as chars to enable easy string concatenation
    DIGITS = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']  
    LOCASE_CHARACTERS = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 
                     'i', 'j', 'k', 'm', 'n', 'o', 'p', 'q',
                     'r', 's', 't', 'u', 'v', 'w', 'x', 'y',
                     'z']

    UPCASE_CHARACTERS = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 
                     'I', 'J', 'K', 'M', 'N', 'O', 'P', 'Q',
                     'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y',
                     'Z']

    SYMBOLS = ['@', '#', '$', '%', '=', ':', '?', '.', '/', '|', '~', '>', 
           '*', '(', ')', '<']

    # combines all the character arrays above to form one array
    #COMBINED_LIST = DIGITS + UPCASE_CHARACTERS + LOCASE_CHARACTERS + SYMBOLS
    COMBINED_LIST = LOCASE_CHARACTERS + DIGITS

    # randomly select at least one character from each character set above
    rand_digit = random.choice(DIGITS)
    rand_upper = random.choice(UPCASE_CHARACTERS)
    rand_lower = random.choice(LOCASE_CHARACTERS)
    rand_symbol = random.choice(SYMBOLS)

    # combine the character randomly selected above
    # at this stage, the password contains only 4 characters but 
    # we want a 12-character password
    #temp_pass = rand_digit + rand_upper + rand_lower + rand_symbol
    temp_pass = rand_digit + rand_lower


    # now that we are sure we have at least one character from each
    # set of characters, we fill the rest of
    # the password length by selecting randomly from the combined 
    # list of character above.
    for x in range(MAX_LEN - 3):
        temp_pass = temp_pass + random.choice(COMBINED_LIST)

    # convert temporary password into array and shuffle to 
    # prevent it from having a consistent pattern
    # where the beginning of the password is predictable
    temp_pass_list = array.array('u', temp_pass)
    random.shuffle(temp_pass_list)

    # traverse the temporary password array and append the chars
    # to form the password
    # we want the first charater to always be a letter
    password = ""
    for x in temp_pass_list:
        password = password + x
    password = rand_lower + password
    return password

In [3]:
# Preprocess data
# Replace " " with "_" in all file and .xml
import os
import glob
import xml.etree.ElementTree as ET
import shutil

def replace_spaces_in_xml_filenames(directory):
    for filename in os.listdir(directory):
        if filename.endswith('.xml'):
            image_path = directory + "/" + filename
            tree = ET.parse(image_path)
            root = tree.getroot()
            print(root)
            old_filename = root.find('filename').text
            new_filename = old_filename.replace(" ", "_")
            root.find('filename').text == new_filename
            print(f"{old_filename} ->>>> {new_filename}")

def replace_png_in_xml_filenames(directory):
    for filename in os.listdir(directory):
        if filename.endswith('.xml'):
            image_path = directory + "/" + filename
            tree = ET.parse(image_path)
            root = tree.getroot()
            print(root)
            old_filename = root.find('filename').text
            new_filename = old_filename.replace(".png", ".jpg")
            root.find('filename').text == new_filename
            print(f"{old_filename} ->>>> {new_filename}")
            tree.write(image_path)

def replace_JPG_with_jpg_in_xml_filenames(directory):
    for filename in os.listdir(directory):
        if filename.endswith('.xml'):
            image_path = directory + "/" + filename
            tree = ET.parse(image_path)
            root = tree.getroot()
            print(root)
            old_filename = root.find('filename').text
            new_filename = old_filename.replace(".JPG", ".jpg")
            root.find('filename').text == new_filename
            print(f"{old_filename} ->>>> {new_filename}")

def replace_space_with_underscore(directory):
    for filename in os.listdir(directory):
        filename.replace(" ", "_")

def replace_JPG_with_jpg(directory):
    for filename in os.listdir(directory):
        filename.replace("JPG", "jpg")

def add_format_to_filename(directory, format):
    for filename in os.listdir(directory):
        file_path = os.path.join(directory, filename) 
        new_name = filename + format
        new_file_path = os.path.join(directory, new_name)
        os.rename(file_path, new_file_path)
        print(f"Renamed {filename} into {new_name}")

def move_files_starting_with_number(source_dir, dest_dir):
    os.makedirs(dest_dir, exist_ok=True)
    # Find all files in the source directory
    files = [f for f in os.listdir(source_dir) if os.path.isfile(os.path.join(source_dir, f))]
    # Find file whose name start with a number
    file_starting_with_number = [f for f in files if f[0].isdigit()]
    # Move them to a new directory
    for filename in file_starting_with_number:
        source_path = os.path.join(source_dir, filename)
        dest_path = os.path.join(dest_dir, filename)
        shutil.move(source_path, dest_path)
        print(f"Moved: {filename}")

def add_prefix_to_xml_file_filename_field(directory, prefix):
    for filename in os.listdir(directory):
        if filename.endswith('.xml'):
            image_path = directory + "/" + filename
            tree = ET.parse(image_path)
            root = tree.getroot()
            for elem in root.iter('filename'):
                if elem.text[0].isdigit():
                    print(elem.text)
                    old_filename = root.find('filename').text
                    new_filename = prefix + old_filename
                    elem.text=new_filename
            tree.write(image_path)
            print(f"{old_filename} ->>>> {new_filename}")

def add_prefix_to_file_starting_with_number(directory, prefix):
    # Find all files in the source directory
    filenames = [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]
    for filename in filenames:
        print(filename)
        if filename[0].isdigit():
            new_filename = prefix + filename
            # Construct full file path
            old_file_path = os.path.join(directory, filename)
            new_file_path = os.path.join(directory, new_filename)
            # Rename this file:
            os.rename(old_file_path, new_file_path)
            print(f"Renamed {filename} into {new_filename}")


def rename_filenames(directory, old_char, new_char):
    # Find all files in the source directory
    filenames = [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]
    for filename in filenames:
        print(filename)
        new_filename = prefix + filename
        # Construct full file path
        old_file_path = os.path.join(directory, filename)
        new_filename = filename.replace(old_char, new_char)
        new_file_path = os.path.join(directory, new_filename)
        # Rename this file:
        os.rename(old_file_path, new_file_path)
        print(f"Renamed {filename} into {new_filename}")

def rename_in_xml_filenames(directory, old_char, new_char):
    for filename in os.listdir(directory):
        if filename.endswith('.xml'):
            image_path = directory + "/" + filename
            tree = ET.parse(image_path)
            root = tree.getroot()
            for elem in root.iter('filename'):
                print(elem.text)
                old_filename = root.find('filename').text
                new_filename = old_filename.replace(old_char,new_char)
                elem.text=new_filename
            tree.write(image_path)

def strip_extension(filenames, extension):
    """Helper function to strip extension"""
    filenames = [filename.replace(extension,"") for filename in filenames]
    return filenames

def get_list_of_filename(directory):
    filenames = [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]
    filenames = strip_extension(filenames,".jpg")
    filenames = strip_extension(filenames,".xml")
    # return only unique element in a list
    unique_list=[]
    for item in filenames:
        if item not in unique_list:
            unique_list.append(item)
    return unique_list

def get_list_of_xml(directory):
    xlm_list = []
    for filename in os.listdir(directory):
        if filename.endswith('.xml'):
            xlm_list.append(filename.replace(".xml", ""))
    return xlm_list

def get_list_of_jpg(directory):
    xlm_list = []
    for filename in os.listdir(directory):
        if filename.endswith('.jpg'):
            xlm_list.append(filename.replace(".jpg", ""))
    return xlm_list

def normalize_filename(directory, filenames, max_len=6):
    for filename in filenames:
        new_name = gen_pwd(max_len)
        print(filename)
        print(new_name)
        # Construct full file path
        #Rename jpg
        old_file_path = os.path.join(directory, filename + ".jpg")
        new_file_path = os.path.join(directory, new_name + ".jpg")
        os.rename(old_file_path, new_file_path)

        # Rename filename field inside xml
        old_file_path = os.path.join(directory, filename + ".xml")
        tree = ET.parse(old_file_path)
        root = tree.getroot()
        for elem in root.iter('filename'):
            elem.text=new_name + ".jpg"
        tree.write(old_file_path)
        #Rename xml
        new_file_path = os.path.join(directory, new_name + ".xml")
        os.rename(old_file_path, new_file_path)
        print(f"Renamed {filename} into {new_name}")

def rename_file_to_lowercase(directory):
    filenames = [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]
    print(filenames)
    for filename in filenames:
        new_filename = filename.lower()
        old_file_path = os.path.join(directory, filename)
        if filename.endswith('.xml'):
            image_path = old_file_path
            tree = ET.parse(image_path)
            root = tree.getroot()
            for elem in root.iter('filename'):
                if elem.text[0].isdigit():
                    print(elem.text)
                    elem.text=new_filename
            tree.write(image_path)
            print(f"{filename} ->>>> {new_filename}")
        new_file_path = os.path.join(directory, new_filename)

        if old_file_path != new_file_path:
            os.rename(old_file_path, new_file_path)
            print(f"Renamed: {filename} to {new_filename}")


def remove_subdirectory_but_keep_content(parent_dir):
    for item in os.listdir(parent_dir):
        item_path = os.path.join(parent_dir, item)

        #check if item is a directory:
        if os.path.isdir(item_path):
            # List all item in subdir
            for sub_item in os.listdir(item_path):
                sub_item_path = os.path.join(item_path, sub_item)

                # Move each item to parent directory
                shutil.move(sub_item_path, parent_dir)
                print(f"Moved {sub_item_path} to {parent_dir}")
            print(f"Removed {item_path}")
            os.rmdir(item_path)

import os
import pyheif
from PIL import Image
def _heif_to_jpg(input_path, output_path):


    heif_file = pyheif.read(input_path)

    # Convert heif to PIL
    image = Image.frombytes(heif_file.mode,
                            heif_file.size,
                            heif_file.data,
                            "raw",
                            heif_file.mode,
                            heif_file.stride,
                            )
    # Save the image as JPG
    image.save(output_path, "JPEG")
    print(f"Convert {input_path} to {output_path}")

def convert_heif_to_jpg(directory):
    for filename in os.listdir(directory):
        file_path = os.path.join(directory, filename) 
        new_name = filename.replace(".heif", ".jpg")
        new_file_path = os.path.join(directory, new_name)
        _heif_to_jpg(file_path, new_file_path)
        os.remove(file_path)
        #print(f"Renamed {filename} into {new_name}")

def resize_image(directory, max_size):
    for filename in os.listdir(directory):
        file_path = os.path.join(directory, filename) 
        temp_name = filename.replace(".jpg", "_lite.jpg")
        temp_file_path = os.path.join(directory, temp_name)
        with Image.open(file_path) as img:
            # Resampling.LANCZOS is previously ANTIALIAS
            img.thumbnail(max_size, Image.Resampling.LANCZOS)
            img.save(temp_file_path)
            print(f"Resized image saved to {temp_file_path}")
        # Remove original .jpg
        os.remove(file_path)
        # Rename _lite.jpg to .jpg
        new_name = temp_name.replace("_lite.jpg", ".jpg")
        new_file_path = os.path.join(directory, new_name)
        os.rename(temp_file_path, new_file_path)
        print(f"Resized: {temp_name} to {new_file_path}")


#replace_space_with_underscore("/home/ttran/projects/TFmodels2/models/research/object_detection/images/train")
#replace_spaces_in_xml_filenames("/home/ttran/projects/TFmodels2/models/research/object_detection/images/train")
#replace_png_in_xml_filenames("/home/ttran/projects/TFmodels2/models/research/object_detection/images/train")
#replace_JPG_with_jpg("/home/ttran/projects/TFmodels2/models/research/object_detection/images/train")
#replace_JPG_with_jpg_in_xml_filenames(("/home/ttran/projects/TFmodels2/models/research/object_detection/images/train"))

#source_dir = "/home/ttran/projects/TFmodels2/models/research/object_detection/images/train"
source_dir = "/mnt/sda1/Backup/heif_lite"
dest_dir = "/home/ttran/projects/TFmodels2/models/research/object_detection/images/train_num"
#move_files_starting_with_number(source_dir, dest_dir)
#test_dir = "/home/ttran/projects/TFmodels2/models/research/object_detection/test_images"
#test_dir = "/mnt/sda1/test"


#add_prefix_to_xml_file_filename_field(dest_dir, prefix)
#rename_in_xml_filenames(source_dir, "-", "_")
#rename_filenames(source_dir, "-", "_")

# convert_all_image_to_jpg()
resize_image(source_dir, (800,800))
#remove_subdirectory_but_keep_content(source_dir)
#add_format_to_filename(source_dir, ".heif")
#convert_heif_to_jpg(source_dir)

#rename_file_to_lowercase(source_dir)
# resize all images()
# now we start labeling (labelIMG)
#xml = get_list_of_xml(source_dir)
#jpg = get_list_of_jpg(source_dir)
#common_filename = [item for item in jpg if item in xml]
#normalize_filename(source_dir,common_filename)




Resized image saved to /mnt/sda1/Backup/heif_lite/168c83d85b6730cc58798bab097d9a211a0e63f6_lite.jpg
Resized: 168c83d85b6730cc58798bab097d9a211a0e63f6_lite.jpg to /mnt/sda1/Backup/heif_lite/168c83d85b6730cc58798bab097d9a211a0e63f6.jpg
Resized image saved to /mnt/sda1/Backup/heif_lite/95ae1c23f4c925b3db84d921de5adee8e4740805_lite.jpg
Resized: 95ae1c23f4c925b3db84d921de5adee8e4740805_lite.jpg to /mnt/sda1/Backup/heif_lite/95ae1c23f4c925b3db84d921de5adee8e4740805.jpg
Resized image saved to /mnt/sda1/Backup/heif_lite/9c60590711e39ba8472dff3fc95c114c57daa957_lite.jpg
Resized: 9c60590711e39ba8472dff3fc95c114c57daa957_lite.jpg to /mnt/sda1/Backup/heif_lite/9c60590711e39ba8472dff3fc95c114c57daa957.jpg
Resized image saved to /mnt/sda1/Backup/heif_lite/3da52db220d0f07c0069c4138b3fd01c2fc20cfb_lite.jpg
Resized: 3da52db220d0f07c0069c4138b3fd01c2fc20cfb_lite.jpg to /mnt/sda1/Backup/heif_lite/3da52db220d0f07c0069c4138b3fd01c2fc20cfb.jpg
Resized image saved to /mnt/sda1/Backup/heif_lite/e258a1c1a3b6be

In [28]:
# Move 20% of files from image/train to image/test
# Only run this if image/test is empty
import os
import random
import shutil

def move_file_with_same_base(file_list, source_dir, dest_dir):
    # Check if destination dir exist
    os.makedirs(dest_dir, exist_ok=True)
    # Extract base name 
    #base_names = [os.path.splitext(filename)[0] for filename in file_list]
    base_names = file_list
    for base_name in base_names:
         for filename in os.listdir(source_dir):
              if os.path.splitext(filename)[0] == base_name:
                   source_path = os.path.join(source_dir, filename)
                   dest_path = os.path.join(dest_dir, filename)
                   # Move file to destination
                   if os.path.isfile(source_path):
                        shutil.move(source_path, dest_path)
                        print(f"Moved {filename}")

def move_random_files(source_dir, dest_dir, percentage=0.2):
    """
    This function only move jpg files if it has an xml with the same name
    """
    # Check if destination dir exist
    os.makedirs(dest_dir, exist_ok=True)

    # Get list of file name in the source dir
    entries = os.listdir(source_dir)

    # Filter out directory, keep only files' names
    files = [entry for entry in entries if os.path.isfile(os.path.join(source_dir, entry))]
    #files = [f for f in os.listdir(source_dir) if os.path.isfile(os.path.join(source_dir,f))]
    # Get a list of file name that has .xml extension
    filename_no_xml = [file.replace('.xml', '') for file in files if file.endswith('.xml')]
    # Get number of files to be moved
    num_files_to_move = int(len(filename_no_xml)*percentage)
    # Get list of files to move
    files_to_move = random.sample(filename_no_xml,num_files_to_move)
    files_to_move_xml = [filename + '.xml' for filename in files_to_move]
    # Move xml
    for file_name in files_to_move_xml:
        source_file = os.path.join(source_dir, file_name)
        des_file = os.path.join(dest_dir, file_name)
        shutil.move(source_file,des_file)
        print(f"Moved: {file_name}")
    # Move photos
    move_file_with_same_base(files_to_move, source_dir, dest_dir)


source_directory = "/home/ttran/projects/TFmodels2/models/research/object_detection/images/train"
destination_directory = "/home/ttran/projects/TFmodels2/models/research/object_detection/images/test"

# Only uncomment when need to run
move_random_files(source_directory,destination_directory)

Moved: p9ep0b.xml
Moved: n5onn4.xml
Moved: a0d4va.xml
Moved: aq3ga7.xml
Moved: yyv051.xml
Moved: wwz1at.xml
Moved: ff81uy.xml
Moved: ds07ds.xml
Moved: x2xdk2.xml
Moved: t8nt6a.xml
Moved: hha072.xml
Moved: z87zss.xml
Moved: m1onm9.xml
Moved: aar21c.xml
Moved: hyhen7.xml
Moved: ww53e0.xml
Moved: zzaz9h.xml
Moved: t7864t.xml
Moved: jg07dj.xml
Moved: v8zddv.xml
Moved: kt6kxc.xml
Moved: a656a4.xml
Moved: h7hfwh.xml
Moved: jjxsh3.xml
Moved: p7kepp.xml
Moved: rxar42.xml
Moved: hh3qhm.xml
Moved: shs3ak.xml
Moved: q3rqbj.xml
Moved: bxu8bb.xml
Moved: jijaw3.xml
Moved: uu8aau.xml
Moved: cco94a.xml
Moved: wb16ww.xml
Moved: qaq2hk.xml
Moved: yyzvn5.xml
Moved: dmg0kd.xml
Moved: mxr7om.xml
Moved: h5zh1b.xml
Moved: t5gtyt.xml
Moved: tgntg5.xml
Moved: cc24u7.xml
Moved: c1ikci.xml
Moved: u6pumq.xml
Moved: w3awpi.xml
Moved: g12igk.xml
Moved: rn76kr.xml
Moved: g4p4g0.xml
Moved: t7utkt.xml
Moved: jt6bj5.xml
Moved: v68cv0.xml
Moved: kc5k0h.xml
Moved: h1hnnk.xml
Moved: wyw0g8.xml
Moved: ffdz83.xml
Moved: c2c

In [29]:
#Generate training data
#import from xml_to_csv.py

import os
import glob
import pandas as pd
import xml.etree.ElementTree as ET


def xml_to_csv(path):
    xml_list = []
    for xml_file in glob.glob(path + '/*.xml'):
        tree = ET.parse(xml_file)
        root = tree.getroot()
        for member in root.findall('object'):
            value = (root.find('filename').text,
                     int(root.find('size')[0].text),
                     int(root.find('size')[1].text),
                     member[0].text,
                     int(member[4][0].text),
                     int(member[4][1].text),
                     int(member[4][2].text),
                     int(member[4][3].text)
                     )
            xml_list.append(value)
    column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
    xml_df = pd.DataFrame(xml_list, columns=column_name)
    return xml_df

def generate_csv_from_xml():
    for folder in ['train','test']:
        image_path = os.path.join(os.getcwd(), ('images/' + folder))
        xml_df = xml_to_csv(image_path)
        xml_df.to_csv(('images/' + folder + '_labels.csv'), index=None)
        print('Successfully converted xml to csv.')

#Only uncomment when need to run
generate_csv_from_xml()

Successfully converted xml to csv.
Successfully converted xml to csv.
