# Mean nucleus value analysis

In [1]:
from pathlib import Path
import pandas as pd
import numpy as np
from scipy.stats import spearmanr
import re
import matplotlib.pyplot as plt

In [2]:
def base_name_from_csv(filename: str) -> str:
    # remove extension
    name = re.sub(r"\.csv$", "", filename)
    # remove trailing _0262-0212 (or similar) if present
    name = re.sub(r"_\d+-\d+$", "", name)
    return name

In [3]:
path1 = "/mnt/c/Users/Elena/Desktop/Data_processing/020226_U2OS2_fixed_MGS1" # path to the folder containing the csv files with nucleus area and MFI
path2 = "/mnt/c/Users/Elena/Desktop/Data_processing/020226_U2OS2_fixed_MGS1/res8" # path to the folder with csv files containing the number of foci and their MFI

In [4]:
# ---- NUCLEI TABLE (path1) ----
dir_path1 = Path(path1)
files1 = sorted(dir_path1.glob("*.csv"))
dfs1 = []
for f in files1:
    key = base_name_from_csv(f.name)
    key = key[:-4]
    df = pd.read_csv(f)
    df["File_name"] = key
    dfs1.append(df)

final = pd.concat(dfs1, ignore_index=True)
final = final.rename(columns={"Area": "Nucleus_area", "Mean": "Nucleus_MFI"})
final = final[["File_name", "Nucleus_area", "Nucleus_MFI"]]

# ---- FOCI SUMMARY (path2) ----
dir_path2 = Path(path2)
files2 = sorted(dir_path2.glob("*.csv"))
foci_rows = []
for f in files2:
    key = base_name_from_csv(f.name)
    df = pd.read_csv(f)
    # count rows + mean intensity
    foci_rows.append({
        "File_name": key,
        "Foci_number": int(df.shape[0]),
        "Foci_MFI": float(df["intensity [photon]"].mean()) if "intensity [photon]" in df.columns else pd.NA
    })

foci_summary = pd.DataFrame(foci_rows)

# ---- MERGE ----
final = final.merge(foci_summary, on="File_name", how="left")
final["Foci_MFI"] = pd.to_numeric(final["Foci_MFI"], errors="coerce")

# Export file
final.to_csv(path2 + "/final.csv", index=False)

In [5]:
final

Unnamed: 0,File_name,Nucleus_area,Nucleus_MFI,Foci_number,Foci_MFI
0,C2_MP_U2OS_fixed_20nMJF549_ORC1_MGS1.nd2_(seri...,355.54,94.377,31,3393.089074
1,C2_MP_U2OS_fixed_20nMJF549_ORC1_MGS1.nd2_(seri...,345.568,48.913,31,4159.511457
2,C2_MP_U2OS_fixed_20nMJF549_ORC1_MGS1.nd2_(seri...,385.315,34.386,11,5125.034442
3,C2_MP_U2OS_fixed_20nMJF549_ORC1_MGS1.nd2_(seri...,253.089,30.647,16,3953.589222
4,C2_MP_U2OS_fixed_20nMJF549_ORC1_MGS1.nd2_(seri...,443.101,24.777,21,4454.09199
5,C2_MP_U2OS_fixed_20nMJF549_ORC1_MGS1.nd2_(seri...,210.428,31.383,21,4159.06139
6,C2_MP_U2OS_fixed_20nMJF549_ORC1_MGS1.nd2_(seri...,218.375,86.691,22,2990.766335
7,C2_MP_U2OS_fixed_20nMJF549_ORC1_MGS1.nd2_(seri...,375.486,24.418,10,10886.74314
8,C2_MP_U2OS_fixed_20nMJF549_ORC1_MGS1.nd2_(seri...,470.805,134.282,14,5976.578689
9,C2_MP_U2OS_fixed_20nMJF549_ORC1_MGS1.nd2_(seri...,335.651,63.387,31,4029.130603


# Boxplots

In [None]:
numeric_cols = final.select_dtypes(include="number").columns
n = len(numeric_cols)

fig, axes = plt.subplots(
    nrows=1,
    ncols=n,
    figsize=(4 * n, 4),   # controls size (smaller plots)
    sharey=False
)

# If only one column, axes is not a list
if n == 1:
    axes = [axes]

for ax, column_name in zip(axes, numeric_cols):
    # Boxplot
    final.boxplot(
        column=column_name,
        showfliers=False,
        ax=ax
    )

    # Jittered dots
    y = final[column_name].dropna().values
    x = np.random.normal(loc=1, scale=0.04, size=len(y))
    ax.plot(x, y, "o", alpha=0.6, markersize=4)

    # Labels
    ax.set_ylabel(column_name.replace("_", " "))
    ax.set_xlabel("")
    ax.set_xticks([])
    ax.set_title(column_name.replace("_", " "), fontsize=10)

plt.tight_layout()
plt.show()


# Spearman correlation coefficient

In [None]:
cols = final.select_dtypes(include="number").columns

corr = pd.DataFrame(index=cols, columns=cols, dtype=float)
pvals = pd.DataFrame(index=cols, columns=cols, dtype=float)

pairs = []
for i, c1 in enumerate(cols):
    for c2 in cols[i+1:]:
        x, y = df[c1], df[c2]
        mask = x.notna() & y.notna()
        n = int(mask.sum())
        if n > 2:
            r, p = spearmanr(x[mask], y[mask])
            pairs.append({"var1": c1, "var2": c2, "n": n, "spearman_r": r, "p_value": p})

pairs_df = pd.DataFrame(pairs)

pairs_df.to_csv("spearman_pairs.csv", index=False)


In [21]:
pairs_df

Unnamed: 0,var1,var2,n,spearman_r,p_value
0,Nucleus_area,Nucleus_MFI,24,-0.086957,0.686195
1,Nucleus_area,Foci_number,24,-0.286215,0.175139
2,Nucleus_area,Foci_MFI,24,0.563478,0.00414
3,Nucleus_MFI,Foci_number,24,0.198518,0.352413
4,Nucleus_MFI,Foci_MFI,24,-0.286957,0.173974
5,Foci_number,Foci_MFI,24,-0.529672,0.00777


In [None]:
col1 = "Foci_MFI"
col2 = "Foci_number"

plt.figure()
plt.scatter(final[col1], final[col2], alpha=0.6)
plt.xlabel(col1)
plt.ylabel(col2)
plt.tight_layout()
plt.show()