In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from skimage.measure import label, regionprops_table
from tqdm.notebook import tqdm
import os

# --- Setup & Helper Function ---
BASE_PATH = '../data' # Assumes data is in a sibling folder
METADATA_FILE = os.path.join(BASE_PATH, 'train.csv')

def rle_decode(mask_rle, shape):
    s = str(mask_rle).split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, starts + lengths):
        img[lo:hi] = 1
    return img.reshape(shape).T

# --- Analysis ---
df = pd.read_csv(METADATA_FILE)
sample_df = df.sample(n=100, random_state=42) 

ftu_properties_list = []
print("Analyzing FTU properties for 100 random samples...")

for index, row in tqdm(sample_df.iterrows(), total=len(sample_df)):
    shape = (row['img_height'], row['img_width'])
    mask = rle_decode(row['rle'], shape)
    labeled_mask = label(mask)
    
    properties = regionprops_table(
        labeled_mask,
        properties=('label', 'area', 'eccentricity', 'solidity')
    )
    
    props_df = pd.DataFrame(properties)
    if not props_df.empty:
        props_df['organ'] = row['organ']
        ftu_properties_list.append(props_df)

all_ftu_properties = pd.concat(ftu_properties_list)
print("Analysis complete.")

# --- Visualization ---
fig, axes = plt.subplots(1, 3, figsize=(20, 6))
fig.suptitle('Exploratory Data Analysis: FTU Morphology by Organ', fontsize=16)

sns.violinplot(data=all_ftu_properties, x='organ', y='area', ax=axes[0])
axes[0].set_title('FTU Area (Size)')
axes[0].set_yscale('log')
axes[0].tick_params(axis='x', rotation=45)

sns.violinplot(data=all_ftu_properties, x='organ', y='eccentricity', ax=axes[1])
axes[1].set_title('FTU Eccentricity (0=Circle, 1=Line)')
axes[1].tick_params(axis='x', rotation=45)

sns.violinplot(data=all_ftu_properties, x='organ', y='solidity', ax=axes[2])
axes[2].set_title('FTU Solidity (1=Solid, <1=Irregular)')
axes[2].tick_params(axis='x', rotation=45)

plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.show()