In [3]:
from shapely.geometry import Polygon
import pandas as pd
import json
import numpy as np


In [4]:
df = pd.read_csv("../data/bh-annotation.csv")

In [None]:
# Used to store the features of holds
shape_features = []

for idx, row in df.iterrows():
    try:
        shape_info = json.loads(row['region_shape_attributes'])
        # If the comment is not polygon (such as Ellipses and rectangles), then records NaN
        if shape_info.get('name') != 'polygon':
            shape_features.append((np.nan, np.nan, np.nan, np.nan))
            continue
        
        all_x = shape_info['all_points_x']
        all_y = shape_info['all_points_y']
        # If node is less than 3, then it cant construct polygon 
        if len(all_x) < 3 or len(all_y) < 3:
            shape_features.append((np.nan, np.nan, np.nan, np.nan))
            continue

        # Combine x and y, e.g. [(x1,y1),(x2,y2), ...]
        points = list(zip(all_x, all_y))
        # Use it to create a 多边形对象
        poly = Polygon(points)
        
        area = poly.area
        perimeter = poly.length
        aspect_ratio = (poly.bounds[2] - poly.bounds[0]) / (poly.bounds[3] - poly.bounds[1]) if (poly.bounds[3] - poly.bounds[1]) != 0 else 0
        circularity = 4 * np.pi * area / (perimeter ** 2) if perimeter != 0 else 0
        
        shape_features.append((area, perimeter, aspect_ratio, circularity))
    except Exception as e:
        shape_features.append((np.nan, np.nan, np.nan, np.nan))

In [8]:
df['shape_area'] = [s[0] for s in shape_features]
df['shape_perimeter'] = [s[1] for s in shape_features]
df['shape_aspect_ratio'] = [s[2] for s in shape_features]
df['shape_circularity'] = [s[3] for s in shape_features]

In [9]:
df_cleaned = df.dropna(subset=['shape_area'])

In [18]:
df_cleaned

Unnamed: 0,filename,file_size,file_attributes,region_count,region_id,region_shape_attributes,region_attributes,shape_area,shape_perimeter,shape_aspect_ratio,shape_circularity
0,0000.jpg,4501555,{},76,0,"{""name"":""polygon"",""all_points_x"":[895,888,879,...","{""label_type"":""handlabeled"",""hold_type"":""hold""}",8463.0,350.548866,0.790698,0.865440
1,0000.jpg,4501555,{},76,1,"{""name"":""polygon"",""all_points_x"":[2058,1973,19...","{""label_type"":""handlabeled"",""hold_type"":""hold""}",35619.5,698.205728,0.836066,0.918186
2,0000.jpg,4501555,{},76,2,"{""name"":""polygon"",""all_points_x"":[2352,2335,23...","{""label_type"":""handlabeled"",""hold_type"":""hold""}",5830.0,296.728826,1.207317,0.832068
3,0000.jpg,4501555,{},76,3,"{""name"":""polygon"",""all_points_x"":[908,899,895,...","{""label_type"":""handlabeled"",""hold_type"":""hold""}",2008.0,166.220468,0.827586,0.913281
4,0000.jpg,4501555,{},76,4,"{""name"":""polygon"",""all_points_x"":[614,626,665,...","{""label_type"":""handlabeled"",""hold_type"":""hold""}",15616.0,561.903101,1.321918,0.621522
...,...,...,...,...,...,...,...,...,...,...,...
2505,1037.jpg,4951223,{},84,79,"{""name"":""polygon"",""all_points_x"":[2444,2444,24...","{""label_type"":""handlabeled"",""hold_type"":""hold""}",5617.5,304.220222,1.120879,0.762740
2506,1037.jpg,4951223,{},84,80,"{""name"":""polygon"",""all_points_x"":[2450,2451,24...","{""label_type"":""handlabeled"",""hold_type"":""hold""}",5185.0,279.747596,1.197531,0.832580
2507,1037.jpg,4951223,{},84,81,"{""name"":""polygon"",""all_points_x"":[1755,1750,17...","{""label_type"":""handlabeled"",""hold_type"":""hold""}",9680.5,358.154237,0.744186,0.948347
2508,1037.jpg,4951223,{},84,82,"{""name"":""polygon"",""all_points_x"":[2589,2590,25...","{""label_type"":""handlabeled"",""hold_type"":""hold""}",42090.0,822.913427,0.820000,0.781053
