
# Organelle Contacts Analysis — Suite (v2)

This notebook generates **tables (CSV + single Excel with multiple sheets)** and **figures (PNG)** for:

1. **GLOBAL per class — ANY vs NO_contact** (stacked to 100%)  
2. **PER-Class TOTAL MASS — ANY vs NO_contact** (stacked to 100%)  
3. **Per class object VOLUME — ANY vs NO_contact** (mean of per-cell means ± SD)  
4. **Contact-only (COUNTS) — partner prevalence by class** (denominator: objects with ≥1 contact; **sum across partners can exceed 100%**)  
5. **Contact-only — DEGREE distribution per class** (stacked to 100%)

**Conventions**
- **ER** is **excluded as focal** (1 object/cell) but **included as a partner**.  
- Interaction `volume` in a row is **split equally** among distinct partners listed in that row for partner-attribution.  
- All per-cell tables are saved, and aggregates (mean ± SD across cells) are computed.  
- At the end, all outputs are written to **one Excel** with multiple sheets.


In [49]:

# =====================
# Parameters
# =====================
ORGANELLES_CSV = "J:/Cohen Lab/Maria Clara/2_Lab data/9_Napari/OUTPUT new code/iNday14 new perox and new lyso/neurites/01082026_iNday14_neurites_organelles.csv"      # <-- change per dataset
INTERACTIONS_CSV = "J:/Cohen Lab/Maria Clara/2_Lab data/9_Napari/OUTPUT new code/iNday14 new perox and new lyso/neurites/01082026_iNday14_neurites_interactions.csv"  # <-- change per dataset

# Classes & partners
FOCAL_CLASSES = ['LD','lyso','mito','golgi','perox']   # ER excluded as focal
PARTNERS      = ['LD','lyso','mito','golgi','perox','ER']  # ER included as partner

# Plotting/labels
TITLE_TAG  = "iN day14 neurit"                # appears in plot titles
OUT_PREFIX = "J:/Cohen Lab/Maria Clara/2_Lab data/9_Napari/OUTPUT new code/Contact analysis v2/iNday14_neurit"      # base path + prefix for PNG/CSV names
OUT_XLSX   = f"{OUT_PREFIX}_contact_analysis_v2.xlsx"  # single Excel with sheets

# Color-blind palette
COLORBLIND = {
    'ER':'#56B4E9','lyso':'#0072B2','mito':'#D55E00','golgi':'#F0E442',
    'perox':'#CC79A7','LD':'#009E73','NO_contact':'#7F7F7F'
}

RANDOM_SEED = 0


In [50]:

import pandas as pd, numpy as np, matplotlib.pyplot as plt
from pathlib import Path
from collections import defaultdict

def excel_writer(path):
    """Prova xlsxwriter, poi openpyxl. Almeno uno deve essere installato."""
    try:
        import xlsxwriter  # noqa: F401
        return pd.ExcelWriter(path, engine="xlsxwriter")
    except ImportError:
        try:
            import openpyxl  # noqa: F401
            return pd.ExcelWriter(path, engine="openpyxl", mode="w")
        except ImportError:
            raise ImportError(
                "Installare almeno uno tra 'xlsxwriter' o 'openpyxl':\n"
                "  pip install XlsxWriter   oppure   pip install openpyxl"
            )

np.random.seed(RANDOM_SEED)

def _parse_tokens(s): return str(s).split('X')
def _parse_labels(lbl):
    out=[]
    for p in str(lbl).split('_'):
        try: out.append(int(p))
        except: pass
    return out

def _ensure_dir(path: str):
    Path(path).parent.mkdir(parents=True, exist_ok=True)


In [51]:

# =====================
# Load & preprocess
# =====================
org = pd.read_csv(ORGANELLES_CSV, usecols=['image_name','object','label','volume'])
inter = pd.read_csv(INTERACTIONS_CSV, usecols=['image_name','object','label','volume'])

# Explode interactions into (image, focal class, focal label, partner class) edges
inter2 = inter.copy()
inter2['tokens'] = inter2['object'].map(_parse_tokens)
inter2['label_list'] = inter2['label'].map(_parse_labels)
inter2 = inter2[inter2.apply(lambda r: len(r['tokens']) == len(r['label_list']), axis=1)].reset_index(drop=True)

edge_rows=[]; vol_rows=[]
for _, r in inter2.iterrows():
    toks=r['tokens']; labs=r['label_list']; vol=float(r['volume'])
    for i, focal in enumerate(toks):
        flab = labs[i]
        partners=[t for j,t in enumerate(toks) if j!=i]
        uniq = sorted(set(partners))
        for p in uniq:
            edge_rows.append({'image_name': r['image_name'], 'focal': focal, 'focal_label': flab, 'partner': p})
        # split volume equally among distinct partners in this row
        share = vol/len(uniq) if len(uniq)>0 else 0.0
        for p in uniq:
            vol_rows.append({'image_name': r['image_name'], 'focal': focal, 'focal_label': flab, 'partner': p, 'v_share': share})

edges = pd.DataFrame(edge_rows).drop_duplicates()
vparts = pd.DataFrame(vol_rows)

# Per-object total volumes
obj_vol = (org.groupby(['image_name','object','label'])['volume']
             .sum().reset_index()
             .rename(columns={'object':'focal','label':'focal_label','volume':'object_volume'}))

# Flag ANY-contact
any_df = edges[['image_name','focal','focal_label']].drop_duplicates().copy()
any_df['contact_any'] = True

obj_any = obj_vol.merge(any_df, on=['image_name','focal','focal_label'], how='left')
obj_any['contact_any'] = obj_any['contact_any'].fillna(False)

# Restrict to desired focal classes (ER excluded as focal)
obj_any = obj_any[obj_any['focal'].isin(FOCAL_CLASSES)].reset_index(drop=True)


In [52]:

# =====================
# (1) GLOBAL per class — ANY vs NO_contact (stacked to 100%)
# =====================
rows=[]
for (img, focal), sub in obj_any.groupby(['image_name','focal']):
    labels = set(sub['focal_label'])
    N_total = len(labels)
    N_any = sub[sub['contact_any']].shape[0]
    pct_any = (N_any / N_total * 100.0) if N_total>0 else 0.0
    rows.append({'image_name': img, 'focal': focal, 'N_total': N_total, 'N_any': N_any,
                 'pct_contact_any': pct_any, 'pct_NO_contact': 100.0 - pct_any})
global_per_cell = pd.DataFrame(rows)

agg1 = (global_per_cell.groupby('focal')[['pct_contact_any','pct_NO_contact']]
        .agg(['mean','std']).reset_index())
agg1.columns = ['focal','pct_contact_any_mean','pct_contact_any_sd','pct_NO_contact_mean','pct_NO_contact_sd']
agg1['focal'] = pd.Categorical(agg1['focal'], categories=FOCAL_CLASSES, ordered=True)
agg1 = agg1.sort_values('focal')

csv1 = f"{OUT_PREFIX}_GLOBAL_perClass_ANYvsNO.csv"
_ensure_dir(csv1); agg1.to_csv(csv1, index=False)

# Plot
fig, ax = plt.subplots(figsize=(10,5))
x = np.arange(len(agg1))
any_vals = agg1['pct_contact_any_mean'].to_numpy(float); any_sds = agg1['pct_contact_any_sd'].to_numpy(float)
no_vals  = agg1['pct_NO_contact_mean'].to_numpy(float);  no_sds  = agg1['pct_NO_contact_sd'].to_numpy(float)

ax.bar(x, any_vals, yerr=any_sds, capsize=4, label='ANY contact', color=COLORBLIND['ER'])
ax.bar(x, no_vals,  bottom=any_vals, yerr=no_sds, capsize=4, label='NO_contact', color=COLORBLIND['NO_contact'])

for i,(a,n) in enumerate(zip(any_vals, no_vals)):
    if a>=2: ax.text(i, a/2, f"{a:.1f}%", ha='center', va='center', fontsize=9)
    if n>=2: ax.text(i, a + n/2, f"{n:.1f}%", ha='center', va='center', fontsize=9)

ax.set_xticks(x); ax.set_xticklabels(FOCAL_CLASSES)
ax.set_ylabel("% of objects in class (mean ± SD)")
ax.set_title(f"GLOBAL per class — ANY vs NO_contact (stacked to 100%) — {TITLE_TAG}")
ax.legend()
fig.tight_layout()
png1 = f"{OUT_PREFIX}_GLOBAL_perClass_ANYvsNO_stacked100.png"
fig.savefig(png1, dpi=300, bbox_inches='tight'); plt.close(fig)


In [53]:

# =====================
# (2) PER-Class TOTAL MASS — ANY vs NO_contact (stacked to 100%)
# =====================
sum_cell = (obj_any.groupby(['image_name','focal','contact_any'])['object_volume']
                 .sum().reset_index()
                 .pivot_table(index=['image_name','focal'], columns='contact_any', values='object_volume', aggfunc='first')
                 .reset_index().rename(columns={False:'sum_vol_NO', True:'sum_vol_ANY'}).fillna(0.0))

sum_cell['sum_vol_TOTAL'] = sum_cell['sum_vol_ANY'] + sum_cell['sum_vol_NO']
sum_cell['pct_mass_ANY'] = np.where(sum_cell['sum_vol_TOTAL']>0, sum_cell['sum_vol_ANY']/sum_cell['sum_vol_TOTAL']*100.0, 0.0)
sum_cell['pct_mass_NO']  = 100.0 - sum_cell['pct_mass_ANY']

agg2 = (sum_cell.groupby('focal')[['pct_mass_ANY','pct_mass_NO']]
        .agg(['mean','std']).reset_index())
agg2.columns = ['focal','pct_mass_ANY_mean','pct_mass_ANY_sd','pct_mass_NO_mean','pct_mass_NO_sd']
agg2['focal'] = pd.Categorical(agg2['focal'], categories=FOCAL_CLASSES, ordered=True)
agg2 = agg2.sort_values('focal')

csv2 = f"{OUT_PREFIX}_PERCLASS_TOTALMASS_ANYvsNO.csv"
_ensure_dir(csv2); agg2.to_csv(csv2, index=False)

# Plot
fig, ax = plt.subplots(figsize=(10,5))
x = np.arange(len(agg2))
any_vals = agg2['pct_mass_ANY_mean'].to_numpy(float); any_sds = agg2['pct_mass_ANY_sd'].to_numpy(float)
no_vals  = agg2['pct_mass_NO_mean'].to_numpy(float);  no_sds  = agg2['pct_mass_NO_sd'].to_numpy(float)

ax.bar(x, any_vals, yerr=any_sds, capsize=4, label='ANY contact mass', color=COLORBLIND['ER'])
ax.bar(x, no_vals,  bottom=any_vals, yerr=no_sds, capsize=4, label='NO_contact mass', color=COLORBLIND['NO_contact'])

for i,(a,n) in enumerate(zip(any_vals, no_vals)):
    if a>=2: ax.text(i, a/2, f"{a:.1f}%", ha='center', va='center', fontsize=9)
    if n>=2: ax.text(i, a + n/2, f"{n:.1f}%", ha='center', va='center', fontsize=9)

ax.set_xticks(x); ax.set_xticklabels(FOCAL_CLASSES)
ax.set_ylabel("% of class total volume (mean ± SD)")
ax.set_title(f"PER-Class TOTAL MASS — ANY vs NO_contact (stacked to 100%) — {TITLE_TAG}")
ax.legend()
fig.tight_layout()
png2 = f"{OUT_PREFIX}_PERCLASS_TOTALMASS_ANYvsNO_stacked100.png"
fig.savefig(png2, dpi=300, bbox_inches='tight'); plt.close(fig)


In [54]:

# =====================
# (3) Per class object VOLUME — ANY vs NO_contact (mean of per-cell means ± SD)
# =====================
percell_means = (obj_any.groupby(['image_name','focal','contact_any'])['object_volume']
                          .mean().reset_index()
                          .pivot_table(index=['image_name','focal'], columns='contact_any', values='object_volume', aggfunc='first')
                          .reset_index().rename(columns={False:'mean_vol_NO', True:'mean_vol_ANY'}).fillna(0.0))

agg3 = (percell_means.groupby('focal')[['mean_vol_ANY','mean_vol_NO']]
               .agg(['mean','std']).reset_index())
agg3.columns = ['focal','mean_vol_ANY_mean','mean_vol_ANY_sd','mean_vol_NO_mean','mean_vol_NO_sd']
agg3['focal'] = pd.Categorical(agg3['focal'], categories=FOCAL_CLASSES, ordered=True)
agg3 = agg3.sort_values('focal')

csv3 = f"{OUT_PREFIX}_PERCLASS_ObjectVOLUME_ANYvsNO_meanOfCellMeans.csv"
_ensure_dir(csv3); agg3.to_csv(csv3, index=False)

# Plot grouped
fig, ax = plt.subplots(figsize=(10,5))
x = np.arange(len(agg3)); width=0.35
b1 = ax.bar(x - width/2, agg3['mean_vol_ANY_mean'], width, yerr=agg3['mean_vol_ANY_sd'], capsize=4, label='ANY contact')
b2 = ax.bar(x + width/2, agg3['mean_vol_NO_mean'],  width, yerr=agg3['mean_vol_NO_sd'],  capsize=4, label='NO_contact')
for b in b1:
    h=b.get_height()
    if h>0: ax.text(b.get_x()+b.get_width()/2, h, f"{h:.1f}", ha='center', va='bottom', fontsize=9)
for b in b2:
    h=b.get_height()
    if h>0: ax.text(b.get_x()+b.get_width()/2, h, f"{h:.1f}", ha='center', va='bottom', fontsize=9)

ax.set_xticks(x); ax.set_xticklabels(FOCAL_CLASSES)
ax.set_ylabel("Object volume (mean of per-cell means)")
ax.set_title(f"Per class object VOLUME — ANY vs NO_contact (mean of per-cell means ± SD) — {TITLE_TAG}")
ax.legend()
fig.tight_layout()
png3 = f"{OUT_PREFIX}_PERCLASS_ObjectVOLUME_ANYvsNO_meanOfCellMeans.png"
fig.savefig(png3, dpi=300, bbox_inches='tight'); plt.close(fig)


In [55]:

# =====================
# (4) Contact-only (COUNTS) — partner prevalence by class (sum can exceed 100%)
# =====================
counts_rows=[]
for (img, focal), sub in obj_any.groupby(['image_name','focal']):
    labels = set(sub['focal_label'])
    ecell = edges[(edges['image_name']==img) & (edges['focal']==focal)]
    any_labels = set(ecell['focal_label'].unique().tolist())
    N_contact = len(labels & any_labels)
    row = {'image_name': img, 'focal': focal, 'N_contact': N_contact}
    for p in PARTNERS:
        lab_with_p = set(ecell[ecell['partner']==p]['focal_label'].unique().tolist())
        N_with_p = len(labels & lab_with_p)
        row[f'pct_with_{p}_contactOnly'] = (N_with_p / N_contact * 100.0) if N_contact>0 else 0.0
    counts_rows.append(row)

counts_per_cell = pd.DataFrame(counts_rows)
mean_counts = counts_per_cell.groupby('focal')[[f'pct_with_{p}_contactOnly' for p in PARTNERS]].mean().reset_index()
sd_counts   = counts_per_cell.groupby('focal')[[f'pct_with_{p}_contactOnly' for p in PARTNERS]].std(ddof=1).reset_index()
sd_counts = sd_counts.rename(columns={c: f"{c}__sd" for c in sd_counts.columns if c!='focal'})
agg4 = mean_counts.merge(sd_counts, on='focal', how='left')
agg4['focal'] = pd.Categorical(agg4['focal'], categories=FOCAL_CLASSES, ordered=True)
agg4 = agg4.sort_values('focal')

csv4 = f"{OUT_PREFIX}_CONTACTONLY_counts_partnerPrevalence_byClass.csv"
_ensure_dir(csv4); agg4.to_csv(csv4, index=False)

# Plot stacked (sum may exceed 100)
fig, ax = plt.subplots(figsize=(12,6))
x = np.arange(len(agg4)); bottom = np.zeros(len(agg4))
for p in PARTNERS:
    col = f'pct_with_{p}_contactOnly'; sdc = f"{col}__sd"
    vals = agg4[col].to_numpy(float)
    sds  = agg4.get(sdc, pd.Series([np.nan]*len(agg4))).to_numpy(float)
    bars = ax.bar(x, vals, bottom=bottom, yerr=sds, capsize=3, label=p, color=COLORBLIND.get(p,'#999999'))
    for i,b in enumerate(bars):
        h=b.get_height()
        if h>=3:
            ax.text(b.get_x()+b.get_width()/2, bottom[i]+h/2, f"{h:.0f}%", ha='center', va='center', fontsize=8)
    bottom += np.nan_to_num(vals)

ax.set_xticks(x); ax.set_xticklabels(FOCAL_CLASSES)
ax.set_ylabel("% of contacting objects (mean ± SD)")
ax.set_title(f"Contact-only (COUNTS) — partner prevalence by class — {TITLE_TAG}\nDenominator: objects with ≥1 contact (sum across partners can exceed 100%)")
ax.legend(ncol=6, fontsize=8)
fig.tight_layout()
png4 = f"{OUT_PREFIX}_CONTACTONLY_counts_partnerPrevalence_byClass.png"
fig.savefig(png4, dpi=300, bbox_inches='tight'); plt.close(fig)


In [56]:

# =====================
# (5) Contact-only — DEGREE distribution per class (stacked to 100%)
# =====================
degree_map = defaultdict(set)
for _, r in edges.iterrows():
    img = r['image_name']; focal = r['focal']; flab = r['focal_label']; p = r['partner']
    if focal in FOCAL_CLASSES and p in PARTNERS:
        degree_map[(img, focal, flab)].add(p)

deg_rows=[]
for (img, focal, flab), pset in degree_map.items():
    deg = len(pset)
    if deg>0:
        deg_rows.append({'image_name': img, 'focal': focal, 'focal_label': flab, 'degree': deg})
deg_df = pd.DataFrame(deg_rows)

degree_per_cell = pd.DataFrame(); agg5 = pd.DataFrame(); png5 = None
if not deg_df.empty:
    max_deg = int(deg_df['degree'].max()); cap = 5
    bins = list(range(1, min(max_deg, cap)+1)); use_5plus = max_deg > cap

    cell_rows=[]
    for (img, focal), sub in deg_df.groupby(['image_name','focal']):
        counts = sub['degree'].value_counts()
        total = counts.sum()
        rec={'image_name': img, 'focal': focal}
        for k in bins:
            rec[f'pct_deg_{k}'] = counts.get(k, 0) / total * 100.0 if total>0 else 0.0
        if use_5plus:
            rec['pct_deg_5plus'] = counts[counts.index > cap].sum() / total * 100.0 if total>0 else 0.0
        cell_rows.append(rec)
    degree_per_cell = pd.DataFrame(cell_rows).fillna(0.0)

    deg_cols = [c for c in degree_per_cell.columns if c.startswith('pct_deg_')]
    mean_df = degree_per_cell.groupby('focal')[deg_cols].mean().reset_index()
    sd_df   = degree_per_cell.groupby('focal')[deg_cols].std(ddof=1).reset_index().rename(columns={c:f"{c}__sd" for c in deg_cols})
    agg5 = mean_df.merge(sd_df, on='focal', how='left')

    # Plot stacked 100%
    fig, ax = plt.subplots(figsize=(12,6))
    x = np.arange(len(agg5)); bottom = np.zeros(len(agg5))
    plot_cols = [f'pct_deg_{k}' for k in bins] + (['pct_deg_5plus'] if use_5plus else [])
    display_names = [c.replace('pct_deg_','degree=') for c in plot_cols]

    for c, disp in zip(plot_cols, display_names):
        vals = agg5[c].to_numpy(float)
        sds  = agg5.get(f"{c}__sd", pd.Series([np.nan]*len(agg5))).to_numpy(float)
        bars = ax.bar(x, vals, bottom=bottom, yerr=sds, capsize=3, label=disp)
        for i,b in enumerate(bars):
            h=b.get_height()
            if h>=3:
                ax.text(b.get_x()+b.get_width()/2, bottom[i]+h/2, f"{h:.0f}%", ha='center', va='center', fontsize=8)
        bottom += np.nan_to_num(vals)

    ax.set_xticks(x); ax.set_xticklabels(FOCAL_CLASSES)
    ax.set_ylabel("% of contacting objects (mean ± SD)")
    ax.set_title(f"Contact-only — DEGREE distribution per class (stacked to 100%) — {TITLE_TAG}")
    ax.legend(ncol=6, fontsize=8)
    fig.tight_layout()
    png5 = f"{OUT_PREFIX}_CONTACTONLY_degree_byClass_stacked100.png"
    fig.savefig(png5, dpi=300, bbox_inches='tight'); plt.close(fig)


In [57]:

# =====================
# Write ONE Excel with all per-cell and aggregated tables
# =====================
with excel_writer(OUT_XLSX) as xw:
    # (1) GLOBAL
    global_per_cell.to_excel(xw, sheet_name="GLOBAL_ANYvsNO_per_cell", index=False)
    agg1.to_excel(xw, sheet_name="GLOBAL_ANYvsNO_agg", index=False)

    # (2) TOTAL MASS
    sum_cell.to_excel(xw, sheet_name="TOTALMASS_ANYvsNO_per_cell", index=False)
    agg2.to_excel(xw, sheet_name="TOTALMASS_ANYvsNO_agg", index=False)

    # (3) Object VOLUME per-object, per-cell means
    percell_means.to_excel(xw, sheet_name="ObjVOL_ANYvsNO_percell", index=False)
    agg3.to_excel(xw, sheet_name="ObjVOL_ANYvsNO_agg", index=False)

    # (4) Contact-only counts partner
    counts_per_cell.to_excel(xw, sheet_name="CONTACTONLY_counts_percell", index=False)
    agg4.to_excel(xw, sheet_name="CONTACTONLY_counts_agg", index=False)

    # (5) Degree
    if 'degree_per_cell' in globals() and not degree_per_cell.empty:
        degree_per_cell.to_excel(xw, sheet_name="DEGREE_percell", index=False)
    if 'agg5' in globals() and not agg5.empty:
        agg5.to_excel(xw, sheet_name="DEGREE_agg", index=False)

OUT_XLSX


'J:/Cohen Lab/Maria Clara/2_Lab data/9_Napari/OUTPUT new code/Contact analysis v2/iNday14_neurit_contact_analysis_v2.xlsx'