In [1]:
import pandas as pd
from tqdm import tqdm
from datetime import datetime

tqdm.pandas()

In [20]:
# df_final = pd.read_csv('../checkpoints/relationship_final/relationship_final_2020-04-16_21-00-58.csv')
df_final = pd.read_csv('../checkpoints/relationship_final/relationship_final_dimensions_2020-04-16_21-21-24.csv')

# Adding Relationship Bounding Boxes:

In [21]:
df_final['XMin3'] = df_final[['XMin1', 'XMin2']].min(axis=1)
df_final['XMax3'] = df_final[['XMax1', 'XMax2']].max(axis=1)

df_final['YMin3'] = df_final[['YMin1', 'YMin2']].min(axis=1)
df_final['YMax3'] = df_final[['YMax1', 'YMax2']].max(axis=1)

# Adding Area:

In [22]:
for i in range(1, 4):
    df_final[f'Area{i}'] = (df_final[f'XMax{i}'] - df_final[f'XMin{i}']) * (df_final[f'YMax{i}'] - df_final[f'YMin{i}'])

# Add Distance:

In [23]:
df_final['DistanceTopLeft'] = ((df_final['XMin2'] - df_final['XMin1']) ** 2 + (df_final['YMin2'] - df_final['YMax1']) ** 2) ** 0.5
df_final['DistanceTopRight'] = ((df_final['XMax2'] - df_final['XMax1']) ** 2 + (df_final['YMax2'] - df_final['YMax1']) ** 2) ** 0.5
df_final['DistanceBottomLeft'] = ((df_final['XMin2'] - df_final['XMin1']) ** 2 + (df_final['YMin2'] - df_final['YMin1']) ** 2) ** 0.5
df_final['DistanceBottomRight'] = ((df_final['XMax2'] - df_final['XMax1']) ** 2 + (df_final['YMin2'] - df_final['YMin1']) ** 2) ** 0.5
df_final['DistanceCenter'] = (df_final['DistanceTopLeft'] + df_final['DistanceTopRight'] + df_final['DistanceBottomLeft'] + df_final['DistanceBottomRight']) / 4

# Adding IOU:

In [24]:
def get_box(row, i):
    return {
        'left': row[f'XMin{i}'],
        'top': row[f'YMin{i}'],
        'width': row[f'XMax{i}'] - row[f'XMin{i}'],
        'height': row[f'YMax{i}'] - row[f'YMin{i}']
    }

In [25]:
def intersection_over_union(row):
    box_a = get_box(row, 1)
    box_b = get_box(row, 2)
    # Determine the coordinates of each of the two boxes
    xA = max(box_a['left'], box_b['left'])
    yA = max(box_a['top'], box_b['top'])
    xB = min(box_a['left'] + box_a['width'], box_b['left']+box_b['width'])
    yB = min(box_a['top'] + box_a['height'], box_b['top']+box_b['height'])

    # Calculate the area of the intersection area
    area_of_intersection = (xB - xA + 1) * (yB - yA + 1)

    # Calculate the area of both rectangles
    box_a_area = (box_a['width'] + 1) * (box_a['height'] + 1)
    box_b_area = (box_b['width'] + 1) * (box_b['height'] + 1)
    # Calculate the area of intersection divided by the area of union
    # Area of union = sum both areas less the area of intersection
    iou = area_of_intersection / float(box_a_area + box_b_area - area_of_intersection)

    # Return the score
    return iou

In [26]:
df_final['IoU'] = df_final.progress_apply(intersection_over_union, axis=1)

100%|████████████████████████████████████████████████████████████████████████| 700000/700000 [01:11<00:00, 9821.73it/s]


# Adding Relationship Frequency

In [27]:
df_freq = pd.read_csv('../checkpoints/relationship_freq/relationships_freq_2020-04-10_18-08-07.csv')

In [28]:
df_final = pd.merge(df_final.reset_index(), df_freq.reset_index(), how='inner', on=['LabelName1', 
                                                                                    'LabelName2', 'RelationshipLabel'])

In [29]:
df_final['RelationshipFrequency'] = df_final.RelationshipCount / df_final.LabelCount

In [30]:
columns = df_final.columns.tolist()
columns.remove('index_y')
columns.remove('index_x')
columns.remove('RelationshipCount')
columns.remove('LabelCount')
columns.remove('RelationshipLabel')
columns.append('RelationshipLabel')
df_final = df_final[columns]

In [31]:
# df_final.to_csv('../metadata/relationship_final/relationship_final.csv', index=False)
df_final.to_csv('../metadata/relationship_final/relationship_final_dimensions.csv', index=False)