In [1]:
import pandas as pd
import numpy as np
import warnings
from pprint import pprint
from tqdm import tqdm
from glob import glob
from datetime import datetime

warnings.filterwarnings('ignore')

In [2]:
def get_objects(bbox_df):
    objects = []
    for idx, row in bbox_df.iterrows():
        object_features = [row.LabelName, row.XMin, row.XMax, row.YMin, row.YMax]
        objects.append(object_features)
    return objects

In [3]:
def get_relationship_pairs(objects_count):
    pairs = []
    for i in range(objects_count):
        for j in range(i+1, objects_count):
            pairs.append([i, j])
    return pairs

In [28]:
def get_new_rows(pairs, objects, relationship_groups, image_id):
    rows = []
    for x, y in pairs:
        relationship = [objects[x], objects[y]]
        row = {
            'ImageID': image_id,
        }
        if (relationship[0][0], relationship[1][0]) not in relationship_groups:
            relationship = relationship[::-1]
            relationship_groups.append((relationship[0][0], relationship[1][0]))
        for idx, object_features in enumerate(relationship):
            row[f'LabelName{idx+1}'] = object_features[0]
            row[f'XMin{idx+1}'] = object_features[1]
            row[f'XMax{idx+1}'] = object_features[2]
            row[f'YMin{idx+1}'] = object_features[3]
            row[f'YMax{idx+1}'] = object_features[4]
        rows.append(row)
    return rows, relationship_groups

In [23]:
relationship_df = pd.read_csv('../metadata/relationship_final/relationship_final.csv', nrows=450_000)
bbox_df = pd.read_csv('../metadata/predictions_cleaned_v3.csv')

In [27]:
relationship_groups = list(relationship_df.groupby(['LabelName1', 'LabelName2']).groups.keys())
bbox_grouped = bbox_df.groupby('ImageId')

In [1]:
relationship_df_new = pd.DataFrame(columns=relationship_df.columns.tolist()[:11])
for name, group in tqdm(bbox_grouped):
    objects = get_objects(group)
    relationship_pairs = get_relationship_pairs(len(objects))
    rows, relationship_groups = get_new_rows(relationship_pairs, objects, relationship_groups, name)
    relationship_df_new = relationship_df_new.append(pd.DataFrame(rows), ignore_index=True, sort=False)

In [26]:
relationship_df_new.to_csv('../metadata/relationship_pairs_v2.csv', index=False)