In [None]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
from src.semantic_features import extract_semantic_features
from src.mask_utils import crop_mask_contained

# === Configuration ===
input_csv = 'data/meta_info_classification.csv'
mask_base = 'data/Detected/Mask'
output_csv = 'outputs/semantic_features.csv'
crop_size = 64

# === Load DataFrame ===
df = pd.read_csv(input_csv)
rows = []

# === Process Each Mask ===
for _, row in tqdm(df.iterrows(), total=len(df)):
    mask_rel_path = row['mask_image']
    full_path = os.path.join(mask_base, mask_rel_path + '.npy')
    patient_id = row['patient_id']
    slice_no = row['slice_no']
    malignancy = row['malignancy']

    try:
        mask = np.load(full_path)
    except Exception as e:
        print(f"Failed to load: {full_path} ({e})")
        continue

    if np.sum(mask) == 0:
        continue

    cropped_mask = crop_mask_contained(mask, crop_size=crop_size)
    features = extract_semantic_features(cropped_mask)

    rows.append({
        'patient_id': patient_id,
        'slice_no': slice_no,
        'mask_image': mask_rel_path,
        'malignancy': malignancy,
        **features
    })

# === Save Output CSV ===
df_out = pd.DataFrame(rows)
os.makedirs(os.path.dirname(output_csv), exist_ok=True)
df_out.to_csv(output_csv, index=False)
print(f"Saved {len(df_out)} rows to {output_csv}")

