In [1]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from shapely.geometry import Polygon

from tqdm import tqdm
from helper_functions.displayFunctions import *

In [2]:
df = pd.read_csv('data/experiment_boxes.csv')

# ID 169     claude-viallat_untitled-2007-235-2007.jpg
# ID 4       edward-hopper_five-a-m.jpg
# df_unique = df[df['id'] == 169]
# df_upsi = df_unique[df_unique['file_name'] == 'claude-viallat_untitled-2007-235-2007.jpg']

# disp = displayAllBB('edward-hopper_five-a-m.jpg', df_unique)

# plt.imshow(disp)
# plt.show()

## Get Nestings

The following function detects nestings in the data for a single image for a single participant.

In [3]:
def getNestings(df: pd.DataFrame, threshold: float):
    rows = len(df)

    idParent = []
    idChild = []
    box_no_Parent = []
    box_no_Child = []
    Parent_size = []
    Child_size = []
    orders = []
    perc_overlap = []
    participant_id = []
    image = []

    # Check every combination of boxes
    for i in range(rows):
        for j in range(i+1, rows):
            # Create polygons from bounding boxes
            polyI = Polygon(getBBPoints(df.iloc[i]))
            polyJ = Polygon(getBBPoints(df.iloc[j]))
            areaI = polyI.area
            areaJ = polyJ.area
            Ibigger = areaI >= areaJ
            min_area = min(areaI, areaJ)

            # Calculate intersection
            intersection = polyI.intersection(polyJ)
            if min_area == 0:
                continue
            intersection_perc = intersection.area / min_area

            # Intersection is above threshold
            if intersection_perc > threshold:
                if Ibigger:
                    idParent.append(int(df.iloc[i]['Index']))
                    idChild.append(int(df.iloc[j]['Index']))
                    box_no_Parent.append(df.iloc[i]['BBox_#'])
                    box_no_Child.append(df.iloc[j]['BBox_#'])
                    Parent_size.append(areaI)
                    Child_size.append(areaJ)

                    if df.iloc[i]['BBox_#'] < df.iloc[j]['BBox_#']:
                        orders.append('Parent to Child')
                    else:
                        orders.append('Child to Parent')
                
                else:
                    idParent.append(int(df.iloc[j]['Index']))
                    idChild.append(int(df.iloc[i]['Index']))
                    box_no_Parent.append(df.iloc[j]['BBox_#'])
                    box_no_Child.append(df.iloc[i]['BBox_#'])
                    Parent_size.append(areaJ)
                    Child_size.append(areaI)

                    if df.iloc[j]['BBox_#'] < df.iloc[i]['BBox_#']:
                        orders.append('Parent to Child')
                    else:
                        orders.append('Child to Parent')

                perc_overlap.append(intersection_perc)
                participant_id.append(df.iloc[i]['id'])
                image.append(df.iloc[i]['file_name'])
    
    # Create DataFrame
    data = {'Parent': idParent,
            'Child': idChild,
            'Parent_size': Parent_size,
            'Child_size': Child_size,
            'Parent_box_no': box_no_Parent,
            'Child_box_no': box_no_Child,
            'Order': orders,
            'Overlap_perc': perc_overlap,
            'Participant_id': participant_id,
            'Image': image}
        
    return pd.DataFrame(data)

# getNestings(df_upsi, 0.8)

### Analyze full dataset

In [4]:
def getNestingsFullDataset(df: pd.DataFrame, threshold: float = 0.8) -> pd.DataFrame:
    participant_ids = df['id'].unique()

    for i in tqdm(participant_ids):
        df_unique = df[df['id'] == i]
        participant_images = df_unique['file_name'].unique()

        for image in participant_images:
            df_image = df_unique[df_unique['file_name'] == image]
            if len(df_image) < 2:
                continue
            else:
                try:
                    df_nestings = getNestings(df_image, threshold)
                except:
                    print(df_image)
                if 'all_nestings' not in locals():
                    all_nestings = df_nestings
                else:
                    all_nestings = pd.concat([all_nestings, df_nestings], ignore_index=True)
    
    return all_nestings

nestings = getNestingsFullDataset(df)
nestings.to_csv('data_out/nesting/nesting_pairs.csv', index=False)
nestings

100%|██████████| 901/901 [00:50<00:00, 17.90it/s]


Unnamed: 0,Parent,Child,Parent_size,Child_size,Parent_box_no,Child_box_no,Order,Overlap_perc,Participant_id,Image
0,39.0,40.0,132728.000000,25578.000000,0.0,1.0,Parent to Child,0.954319,7.0,EVA - 3272.jpg
1,52.0,53.0,92843.000000,45400.000000,0.0,1.0,Parent to Child,0.819080,7.0,EVA - 35760.jpg
2,52.0,54.0,92843.000000,30000.000000,0.0,2.0,Parent to Child,1.000000,7.0,EVA - 35760.jpg
3,52.0,55.0,92843.000000,14742.000000,0.0,3.0,Parent to Child,1.000000,7.0,EVA - 35760.jpg
4,115.0,117.0,80599.999097,80599.999097,0.0,2.0,Parent to Child,0.944336,13.0,mario-schifano_interno-esterno-1976.jpg
...,...,...,...,...,...,...,...,...,...,...
2525,38933.0,38934.0,20000.000000,20000.000000,0.0,1.0,Parent to Child,1.000000,3991.0,arman-manookian_old-kahala-home-1928.jpg
2526,38940.0,38941.0,20000.000000,20000.000000,0.0,1.0,Parent to Child,1.000000,3991.0,edward-hopper_five-a-m.jpg
2527,38973.0,38972.0,193040.000000,13600.000000,1.0,0.0,Child to Parent,1.000000,3994.0,paul-feeley_alioth-1964.jpg
2528,38985.0,38986.0,102000.000000,102000.000000,0.0,1.0,Parent to Child,0.998039,3997.0,ad-reinhardt_abstract-painting-1957.jpg


## Analyze nestings per participant

In [5]:
def getParticipantNestingData(df_boxes, df_nestings):
    participant_ids = df_boxes['id'].unique()

    participant_id = []
    images = []
    num_boxes = []
    num_nestings = []
    num_nestings_pc = []
    num_nestings_cp = []

    for i in tqdm(participant_ids):
        df_unique = df_boxes[df_boxes['id'] == i]
        participant_images = df_unique['file_name'].unique()

        for image in participant_images:
            
            participant_id.append(i)
            images.append(image)
            num_boxes.append(len(df_boxes[(df_boxes['id'] == i) & (df_boxes['file_name'] == image)]))

            df_image = df_nestings[(df_nestings['Participant_id'] == i) & (df_nestings['Image'] == image)]
            num_nestings.append(len(df_image))
            num_nestings_pc.append(len(df_image[df_image['Order'] == 'Parent to Child']))
            num_nestings_cp.append(len(df_image[df_image['Order'] == 'Child to Parent']))

    data = {'Participant_id': participant_id,
            'Image': images,
            'Num_boxes': num_boxes,
            'Num_nestings': num_nestings,
            'Num_nestings_pc': num_nestings_pc,
            'Num_nestings_cp': num_nestings_cp}
    
    return pd.DataFrame(data)

participant_nestings = getParticipantNestingData(df, nestings)
participant_nestings.to_csv('data_out/nesting/participant_nesting_data.csv', index=False)
participant_nestings

100%|██████████| 901/901 [02:46<00:00,  5.42it/s]


Unnamed: 0,Participant_id,Image,Num_boxes,Num_nestings,Num_nestings_pc,Num_nestings_cp
0,4,camille-bombois_port-de-boulogne-1927.jpg,1,0,0,0
1,4,albrecht-durer_courtyard-of-the-former-castle-...,1,0,0,0
2,4,gulacsy-lajos_return-of-the-pilgrims-1907.jpg,1,0,0,0
3,4,edward-hopper_five-a-m.jpg,2,0,0,0
4,4,anne-appleby_jasmine-2000.jpg,1,0,0,0
...,...,...,...,...,...,...
20010,4018,henri-edmond-cross_a-garden-in-provence-1901.jpg,1,0,0,0
20011,4018,eugene-delacroix_a-corner-of-the-studio(1).jpg,1,0,0,0
20012,4018,arkhip-kuindzhi_view-of-the-isaac-cathedral-at...,1,0,0,0
20013,4018,boris-kustodiev_autumn-1915.jpg,1,0,0,0


# Size Trends

In [6]:
participant_boxes = pd.read_csv('data/experiment_boxes.csv')

participant_ids = []
file_names = []
increasing_sizes = []
decreasing_sizes = []

for participant_id in tqdm(np.unique(participant_boxes['id'])):
    participant_df = participant_boxes[participant_boxes['id'] == participant_id]

    for img_name in np.unique(participant_df['file_name']):
        img_df = participant_df[participant_df['file_name'] == img_name]

        participant_ids.append(participant_id)
        file_names.append(img_name)
        if len(img_df) == 1:
            increasing_sizes.append(0)
            decreasing_sizes.append(0)
        else:
            increasing = 0
            decreasing = 0

            for i in range(1, len(img_df)):
                prev_size = img_df.iloc[i - 1]['width_box']*img_df.iloc[i - 1]['height_box']
                curr_size = img_df.iloc[i]['width_box']*img_df.iloc[i]['height_box']
                if curr_size > prev_size:
                    increasing += 1
                elif curr_size < prev_size:
                    decreasing += 1

            increasing_sizes.append(increasing)
            decreasing_sizes.append(decreasing)

size_trend_df = pd.DataFrame({
    'id': participant_ids,
    'file_name': file_names,
    'Increasing_sizes': increasing_sizes,
    'Decreasing_sizes': decreasing_sizes
})
size_trend_df.to_csv('data_out/nesting/size_trends.csv', index=False)
size_trend_df


100%|██████████| 901/901 [00:22<00:00, 40.44it/s]


Unnamed: 0,id,file_name,Increasing_sizes,Decreasing_sizes
0,4,adam-baltatu_still-life-with-apples-and-pipkin...,0,0
1,4,afro_abstract-composition-1975.jpg,0,0
2,4,albrecht-durer_courtyard-of-the-former-castle-...,0,0
3,4,andre-bauchant_unknown-title.jpg,0,0
4,4,andrei-ryabushkin_red-house.jpg,0,0
...,...,...,...,...
20010,4018,henri-edmond-cross_a-garden-in-provence-1901.jpg,0,0
20011,4018,joan-miro_bird-s-flight-in-moonlight.jpg,0,0
20012,4018,joan-miro_not_detected_227976.jpg,0,0
20013,4018,maurice-de-vlaminck_house-on-the-banks-of-a-ri...,0,0


# Depths

In [7]:
nesting_pairs = pd.read_csv('data_out/nesting/nesting_pairs.csv')
nesting_pairs_int = nesting_pairs[['Parent', 'Child']].astype(int)
nesting_pairs_int

def find_all_graphs_with_depths_and_sizes(df):
    """
    Finds all connected subgraphs, their depths, and sizes in a directed graph.

    Parameters:
    df (pd.DataFrame): A DataFrame with two columns representing edges (parent, child).

    Returns:
    list: A list of dictionaries, each containing:
          - 'nodes': A set of nodes in the connected subgraph.
          - 'depth': The maximum depth of the subgraph.
          - 'size': The number of nodes in the subgraph.
    """
    # Create a directed graph using NetworkX
    G = nx.DiGraph()
    G.add_edges_from(df.values)

    # Find all weakly connected components
    connected_components = list(nx.weakly_connected_components(G))

    results = []
    
    for component in connected_components:
        # Subgraph containing the component
        subgraph = G.subgraph(component)
        
        # Find root nodes (nodes with no incoming edges)
        root_nodes = [node for node in subgraph if subgraph.in_degree(node) == 0]
        
        # Calculate the depth of the subgraph
        max_depth = 0
        
        for root in root_nodes:
            # Perform a Depth-First Search to calculate depths
            lengths = nx.single_source_shortest_path_length(subgraph, root)
            max_depth = max(max_depth, max(lengths.values()))
        
        # Store the result
        results.append({
            'nodes': set(component),
            'depth': max_depth,
            'size': len(component)  # Number of nodes in the subgraph
        })

    return results

# Find all connected subgraphs with depths and sizes
subgraphs_with_details = find_all_graphs_with_depths_and_sizes(nesting_pairs_int)

depths = []
sizes = []
for subgraph in subgraphs_with_details:
    depths.append(subgraph['depth'])
    sizes.append(subgraph['size'])

# Create a DataFrame with the depths and sizes
depths_sizes_df = pd.DataFrame({
    'Depth': depths,
    'Size': sizes
})
depths_sizes_df.to_csv('data_out/nesting/depths_and_sizes.csv', index=False)
depths_sizes_df

Unnamed: 0,Depth,Size
0,1,2
1,1,4
2,1,2
3,1,2
4,1,6
...,...,...
1506,1,2
1507,1,2
1508,1,2
1509,1,2
