In [46]:
import os
import shutil
import pandas as pd
import re

In [47]:
def get_box(image_file):
    '''
    Used to get width and height of object given the track image filename
    image_file: name of image file
    '''
    # Extract numeric values using regular expression, last two represent width and height
    numeric_values = [int(match) for match in re.findall(r'\d+', image_file)]
    rect_w = numeric_values[-1]
    rect_h = numeric_values[-2]
    
    return rect_w, rect_h

In [48]:
def get_category(row):
    '''
    Used to get the category label of a track image
    '''
    cats = ['bird','cable','panel','plant','car','human','other_animal','insect','aircraft','other','unknown']
    for i in range(len(cats)):
        if row[cats[i]] == 1:
            category = i
            return category

In [49]:
def gen_labels(data_file, output_dir, dim=400):
    '''
    Used to generate yolo formatted labels 
    data_file: Path to structured datafile. Tracks should be labeled beforehand.
    output_dir: Output directory of labels.
    dim: Dimension of images. The way track images are generated, should be square with the obj being tracked in the center.
    '''
    # Ensure the destination directory exists
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    df = pd.read_csv(data_file)

    # Find non-unique values based on two frame and video name
    # NOTE 2024-01-23: Frame numbering has issue at the moment, but can still be used to tell if multiple object in a frame
    # TODO: FIND better way to create labels for potential overlap
    non_unique_rows = df[df.duplicated(['video_dir', 'frame'], keep=False)]
    # Drop non-unique rows from the original DataFrame
    df = df.drop_duplicates(['video_dir', 'frame'], keep=False)

    # iterate through each row of remaining dataframe
    for index, row in df.iterrows():
        label_name = row['image_file'].replace('.png','.txt')

        output_file = output_dir + '/' + label_name

        x = dim // 2
        y = dim // 2
        rect_w, rect_h = get_box(row['image_file'])
        category = get_category(row)

        # Write the list to the file, each element as a new line
        with open(output_file, "w") as file:
            print_buffer = []
            # normalize height for yolo format
            x /= dim
            y /= dim 
            rect_w /= dim
            rect_h /= dim
            print_buffer.append("{} {:.3f} {:.3f} {:.3f} {:.3f}".format(category, x, y, rect_w, rect_h)) # class, centerx, centery, width, height
            
            file.write("\n".join(print_buffer))

    print(f'{len(df)} labels generated.')
    print(f'{len(non_unique_rows)} labels skipped.')

    # return list of image files so that we can copy the correct ones over
    return list(df['image_file'])

In [50]:
def copy_png(target_dir, output_dir, imgs):
    '''
    Copies png files to a target directory. Only copies files if in specified list.
    target_dir: root directory of tracks/track images (doesn't matter too much since only copies png)
    output_dir: output directory of images. Single dir for all images.
    imgs: list of images to copy over
    '''
    # Ensure the destination directory exists
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    count = 0

    # Recursively iterate through all files in the source directory and its subdirectories
    for root, dirs, files in os.walk(target_dir):
        for file in files:
            if file.endswith(".png") and file in imgs:
                source_path = os.path.join(root, file)
                destination_path = os.path.join(output_dir, file)
                shutil.copy2(source_path, destination_path)
                
                count += 1
    print(f'{count} images copied over.')

In [51]:
data_file = "C:/Users/Aaron/Desktop/uchicago-aviansolar-detect-track/data/20240114_data_mod.csv"
output_dir_labels = "C:/Users/Aaron/Desktop/uchicago-aviansolar-detect-track/custom/labels"

target_dir = "C:/Users/Aaron/Desktop/uchicago-aviansolar-detect-track/data/"
output_dir_imgs = "C:/Users/Aaron/Desktop/uchicago-aviansolar-detect-track/custom/images"

labels = gen_labels(data_file, output_dir_labels)
copy_png(target_dir, output_dir_imgs, labels)


1512 labels generated.
516 labels skipped.
1512 images copied over.
