In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
def rename_labels_and_files(file_path, labels):
    files = [f for f in os.listdir(file_path) if f.endswith('jpg')]
    for f in files:
        filename = f[:f.index("_jpg.rf")]
        im_num = filename[filename.index("_")+1:]
        padded_filename = im_num.zfill(4) + ".jpg"
        os.rename(os.path.join(file_path, f), os.path.join(file_path, padded_filename))
        labels = labels.rename(index={f: padded_filename})

    return labels

In [3]:
def create_label_file(file_path, orig_df):
    labels = orig_df.copy(deep=True)
    labels['visibility'] = 1
    
    x_coord = (labels.loc[:, 'xmin'] + labels.loc[:, 'xmax']) // 2
    labels['x-coordinate'] = x_coord

    y_coord = (labels.loc[:, 'ymin'] + labels.loc[:, 'ymax']) // 2
    labels['y-coordinate'] = y_coord

    labels['status'] = 0

    labels.index.names = ['file name']
    labels = labels.drop(['width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax'], axis=1)
    labels = labels.sort_index(ascending=True)

    labels.to_csv(file_path + "/Label.csv", index=True)

In [None]:
def clean_train_files(train_path):
    train_list = [f for f in sorted(os.listdir(train_path)) if f.endswith('jpg')]

    train_labels = pd.read_csv(os.path.join(train_path, '_annotations.csv'))
    train_labels = train_labels.set_index('filename')

    train_labels = train_labels.drop(["frame_398_jpg.rf.5b6dcedcdc86b09ca3940aa7652840b7.jpg", "frame_398_jpg.rf.6c041dac651680ca34b010590d505f06.jpg"], errors='ignore')
    train_labels = train_labels.drop(["frame_631_jpg.rf.4ae2683c895698b0d8f742614e873a48.jpg", "frame_631_jpg.rf.7364ef9fd1fa4c803d034c9650b3a861.jpg"], errors='ignore')
    train_labels = train_labels.drop(["frame_209_jpg.rf.3852b93b863602394c7eac1afe1c8da5.jpg", "frame_209_jpg.rf.e49699f0471fe67adf4ee65a358bdb8f.jpg", "frame_209_jpg.rf.f465b65ee5aa9fb93a26a47d633b6820.jpg"], errors='ignore')
    train_labels = train_labels.drop(["frame_358_jpg.rf.7a0a27f461768c90df6691ce70686f7a.jpg", "frame_358_jpg.rf.5394cdea78a07f5a75c0b14a21c1a977.jpg", "frame_358_jpg.rf.e84af07b7d623ef3f3521b48a81fd896.jpg"], errors='ignore')
    train_labels = train_labels.drop(["frame_380_jpg.rf.4acb5a211a462551c12e4994b8ec1715.jpg", "frame_380_jpg.rf.4f85f4ec42e79bd5cedd1368e432645c.jpg", "frame_380_jpg.rf.d8e813d1d166454f3258b887e810df63.jpg"], errors='ignore')
    train_labels = train_labels.drop(["frame_655_jpg.rf.8a8c255e655b5aa9a603c54c8a9e8abf.jpg", "frame_655_jpg.rf.a8342a39e5d4e002e46344ced62ca8c5.jpg", "frame_655_jpg.rf.ae547a60531710977cb530ed9009b71e.jpg"], errors='ignore')
    
    num_files = len(train_list)

    i = 0
    while i < num_files - 2:
        candidate_1 = train_list[i + 0]
        candidate_2 = train_list[i + 1]
        candidate_3 = train_list[i + 2]
        intensities = [candidate_1, candidate_2, candidate_3]
        rand_im = intensities[0]

        if rand_im not in train_labels.index:
            if intensities[1] in train_labels.index:
                temp_row = train_labels.loc[intensities[1]]
                temp_row.rename(index={intensities[1]: rand_im})
                train_labels = pd.concat([train_labels, temp_row], ignore_index=True)
            elif intensities[2] in train_labels.index:
                temp_row = train_labels.loc[intensities[2]]
                temp_row.rename(index={intensities[2]: rand_im})
                train_labels = pd.concat([train_labels, temp_row], ignore_index=True)
            else:
                print("error inserting rand_im")

        if intensities[1] in train_labels.index:
            train_labels = train_labels.drop(intensities[1])
        if intensities[2] in train_labels.index:
            train_labels = train_labels.drop(intensities[2])

        if os.path.exists(os.path.join(train_path, intensities[1])):
            os.remove(os.path.join(train_path, intensities[1]))
        if os.path.exists(os.path.join(train_path, intensities[2])):
            os.remove(os.path.join(train_path, intensities[2]))

        i += 3

    train_labels = rename_labels_and_files(train_path, train_labels)
    return train_labels

In [None]:
train_path = "padel_dataset/train"
train_labels = clean_train_files(train_path)

In [None]:
create_label_file(train_path, train_labels)

In [None]:
val_path = "padel_dataset/valid"

val_labels = pd.read_csv(os.path.join(val_path, '_annotations.csv'))
val_labels = val_labels.set_index('filename')

val_files = [f for f in os.listdir(val_path) if f.endswith('jpg')]
for f in val_files:
    if f not in val_labels.index:
        print(f"{f} index not found in labels; removing file")
        os.remove(os.path.join(val_path, f))

val_labels = rename_labels_and_files(val_path, val_labels)

create_label_file(val_path, val_labels)

In [None]:
test_path = "padel_dataset/test"

test_labels = pd.read_csv(os.path.join(test_path, '_annotations.csv'))
test_labels = test_labels.set_index('filename')

test_files = [f for f in os.listdir(test_path) if f.endswith('jpg')]
for f in test_files:
    if f not in test_labels.index:
        print(f"{f} index not found in labels; removing file")
        os.remove(os.path.join(test_path, f))

test_labels = rename_labels_and_files(test_path, test_labels)

create_label_file(test_path, test_labels)

In [None]:
val_labels = pd.read_csv('padel_dataset/valid/Label.csv')
test_labels = pd.read_csv('padel_dataset/test/Label.csv')

combined_labels = pd.concat([val_labels, test_labels], ignore_index=True).set_index('file name')
combined_labels = combined_labels.sort_index(ascending=True)

combined_labels.to_csv('./padel_dataset/final_testing/Label.csv', index=True)

# print(combined_labels.to_string())