In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from shapely.geometry import Polygon

from tqdm import tqdm
from helper_functions.displayFunctions import *

In [2]:
df = pd.read_csv('data/experiment_boxes.csv')

# ID 169     claude-viallat_untitled-2007-235-2007.jpg
# ID 4       edward-hopper_five-a-m.jpg
# df_unique = df[df['id'] == 169]
# df_upsi = df_unique[df_unique['file_name'] == 'claude-viallat_untitled-2007-235-2007.jpg']

# disp = displayAllBB('edward-hopper_five-a-m.jpg', df_unique)

# plt.imshow(disp)
# plt.show()

## Get Nestings

The following function detects nestings in the data for a single image for a single participant.

In [3]:
def getNestings(df: pd.DataFrame, threshold: float):
    rows = len(df)

    idParent = []
    idChild = []
    box_no_Parent = []
    box_no_Child = []
    Parent_size = []
    Child_size = []
    orders = []
    perc_overlap = []
    participant_id = []
    image = []

    # Check every combination of boxes
    for i in range(rows):
        for j in range(i+1, rows):
            # Create polygons from bounding boxes
            polyI = Polygon(getBBPoints(df.iloc[i]))
            polyJ = Polygon(getBBPoints(df.iloc[j]))
            areaI = polyI.area
            areaJ = polyJ.area
            Ibigger = areaI >= areaJ
            min_area = min(areaI, areaJ)

            # Calculate intersection
            intersection = polyI.intersection(polyJ)
            if min_area == 0:
                continue
            intersection_perc = intersection.area / min_area

            # Intersection is above threshold
            if intersection_perc > threshold:
                if Ibigger:
                    idParent.append(int(df.iloc[i]['Index']))
                    idChild.append(int(df.iloc[j]['Index']))
                    box_no_Parent.append(df.iloc[i]['BBox_#'])
                    box_no_Child.append(df.iloc[j]['BBox_#'])
                    Parent_size.append(areaI)
                    Child_size.append(areaJ)

                    if df.iloc[i]['BBox_#'] < df.iloc[j]['BBox_#']:
                        orders.append('Parent to Child')
                    else:
                        orders.append('Child to Parent')
                
                else:
                    idParent.append(int(df.iloc[j]['Index']))
                    idChild.append(int(df.iloc[i]['Index']))
                    box_no_Parent.append(df.iloc[j]['BBox_#'])
                    box_no_Child.append(df.iloc[i]['BBox_#'])
                    Parent_size.append(areaJ)
                    Child_size.append(areaI)

                    if df.iloc[j]['BBox_#'] < df.iloc[i]['BBox_#']:
                        orders.append('Parent to Child')
                    else:
                        orders.append('Child to Parent')

                perc_overlap.append(intersection_perc)
                participant_id.append(df.iloc[i]['id'])
                image.append(df.iloc[i]['file_name'])
    
    # Create DataFrame
    data = {'Parent': idParent,
            'Child': idChild,
            'Parent_size': Parent_size,
            'Child_size': Child_size,
            'Parent_box_no': box_no_Parent,
            'Child_box_no': box_no_Child,
            'Order': orders,
            'Overlap_perc': perc_overlap,
            'Participant_id': participant_id,
            'Image': image}
        
    return pd.DataFrame(data)

# getNestings(df_upsi, 0.8)

### Analyze full dataset

In [4]:
def getNestingsFullDataset(df: pd.DataFrame, threshold: float = 0.8) -> pd.DataFrame:
    participant_ids = df['id'].unique()

    for i in tqdm(participant_ids):
        df_unique = df[df['id'] == i]
        participant_images = df_unique['file_name'].unique()

        for image in participant_images:
            df_image = df_unique[df_unique['file_name'] == image]
            if len(df_image) < 2:
                continue
            else:
                try:
                    df_nestings = getNestings(df_image, threshold)
                except:
                    print(df_image)
                if 'all_nestings' not in locals():
                    all_nestings = df_nestings
                else:
                    all_nestings = pd.concat([all_nestings, df_nestings], ignore_index=True)
    
    return all_nestings

nestings = getNestingsFullDataset(df)
nestings.to_csv('data_out/nesting/nesting_pairs.csv', index=False)
nestings

  0%|          | 0/901 [00:00<?, ?it/s]

100%|██████████| 901/901 [00:16<00:00, 53.19it/s]


Unnamed: 0,Parent,Child,Parent_size,Child_size,Parent_box_no,Child_box_no,Order,Overlap_perc,Participant_id,Image
0,39.0,40.0,132728.000000,25578.000000,0.0,1.0,Parent to Child,0.954319,7.0,EVA - 3272.jpg
1,52.0,53.0,92843.000000,45400.000000,0.0,1.0,Parent to Child,0.819080,7.0,EVA - 35760.jpg
2,52.0,54.0,92843.000000,30000.000000,0.0,2.0,Parent to Child,1.000000,7.0,EVA - 35760.jpg
3,52.0,55.0,92843.000000,14742.000000,0.0,3.0,Parent to Child,1.000000,7.0,EVA - 35760.jpg
4,115.0,117.0,80599.999097,80599.999097,0.0,2.0,Parent to Child,0.944336,13.0,mario-schifano_interno-esterno-1976.jpg
...,...,...,...,...,...,...,...,...,...,...
2525,38933.0,38934.0,20000.000000,20000.000000,0.0,1.0,Parent to Child,1.000000,3991.0,arman-manookian_old-kahala-home-1928.jpg
2526,38940.0,38941.0,20000.000000,20000.000000,0.0,1.0,Parent to Child,1.000000,3991.0,edward-hopper_five-a-m.jpg
2527,38973.0,38972.0,193040.000000,13600.000000,1.0,0.0,Child to Parent,1.000000,3994.0,paul-feeley_alioth-1964.jpg
2528,38985.0,38986.0,102000.000000,102000.000000,0.0,1.0,Parent to Child,0.998039,3997.0,ad-reinhardt_abstract-painting-1957.jpg


## Analyze nestings per participant

In [5]:
def getParticipantNestingData(df_boxes, df_nestings):
    participant_ids = df_boxes['id'].unique()

    participant_id = []
    images = []
    num_boxes = []
    num_nestings = []
    num_nestings_pc = []
    num_nestings_cp = []

    for i in tqdm(participant_ids):
        df_unique = df_boxes[df_boxes['id'] == i]
        participant_images = df_unique['file_name'].unique()

        for image in participant_images:
            
            participant_id.append(i)
            images.append(image)
            num_boxes.append(len(df_boxes[(df_boxes['id'] == i) & (df_boxes['file_name'] == image)]))

            df_image = df_nestings[(df_nestings['Participant_id'] == i) & (df_nestings['Image'] == image)]
            num_nestings.append(len(df_image))
            num_nestings_pc.append(len(df_image[df_image['Order'] == 'Parent to Child']))
            num_nestings_cp.append(len(df_image[df_image['Order'] == 'Child to Parent']))

    data = {'Participant_id': participant_id,
            'Image': images,
            'Num_boxes': num_boxes,
            'Num_nestings': num_nestings,
            'Num_nestings_pc': num_nestings_pc,
            'Num_nestings_cp': num_nestings_cp}
    
    return pd.DataFrame(data)

participant_nestings = getParticipantNestingData(df, nestings)
participant_nestings.to_csv('data_out/nesting/participant_nesting_data.csv', index=False)
participant_nestings

100%|██████████| 901/901 [00:50<00:00, 17.79it/s]


Unnamed: 0,Participant_id,Image,Num_boxes,Num_nestings,Num_nestings_pc,Num_nestings_cp
0,4,camille-bombois_port-de-boulogne-1927.jpg,1,0,0,0
1,4,albrecht-durer_courtyard-of-the-former-castle-...,1,0,0,0
2,4,gulacsy-lajos_return-of-the-pilgrims-1907.jpg,1,0,0,0
3,4,edward-hopper_five-a-m.jpg,2,0,0,0
4,4,anne-appleby_jasmine-2000.jpg,1,0,0,0
...,...,...,...,...,...,...
20010,4018,henri-edmond-cross_a-garden-in-provence-1901.jpg,1,0,0,0
20011,4018,eugene-delacroix_a-corner-of-the-studio(1).jpg,1,0,0,0
20012,4018,arkhip-kuindzhi_view-of-the-isaac-cathedral-at...,1,0,0,0
20013,4018,boris-kustodiev_autumn-1915.jpg,1,0,0,0
