In [3]:
import os
import pandas as pd

def analyze_redsea_combined(parent_folder, top_n=20):
    """
    Produces one combined table of marker-level changes across all folders.
    Prints directly in notebook.
    """

    all_rows = []

    for folder_name in sorted(os.listdir(parent_folder)):
        sub_path = os.path.join(parent_folder, folder_name)

        if folder_name.startswith(".") or not os.path.isdir(sub_path):
            continue

        before_file = os.path.join(sub_path, "single_cell_before_redsea.csv")
        after_file  = os.path.join(sub_path, "single_cell_after_redsea.csv")

        if not (os.path.exists(before_file) and os.path.exists(after_file)):
            continue

        before = pd.read_csv(before_file)
        after  = pd.read_csv(after_file)

        metadata_cols = {"CellID", "cell_size", "x_centroid", "y_centroid"}
        marker_cols = [c for c in before.columns if c not in metadata_cols]

        for marker in marker_cols:
            mean_before = before[marker].mean()
            mean_after  = after[marker].mean()
            mean_change = mean_after - mean_before

            pct_before = (before[marker] > 0).mean() * 100
            pct_after  = (after[marker] > 0).mean() * 100
            pct_change = pct_after - pct_before

            all_rows.append({
                "Folder": folder_name,
                "Marker": marker,
                "MeanBefore": mean_before,
                "MeanAfter": mean_after,
                "MeanChange": mean_change,
                "PctBefore(>0)": pct_before,
                "PctAfter(>0)": pct_after,
                "PctChange": pct_change
            })

    # Combine all folders
    df = pd.DataFrame(all_rows)

    # Aggregate by marker (average change across folders)
    combined = (
        df.groupby("Marker")
          .agg({
              "MeanChange": "mean",
              "PctChange": "mean"
          })
          .reset_index()
    )

    combined["AbsMeanChange"] = combined["MeanChange"].abs()
    combined["AbsPctChange"]  = combined["PctChange"].abs()

    # Sort by magnitude of change
    combined_sorted = combined.sort_values(
        by=["AbsMeanChange", "AbsPctChange"],
        ascending=False
    ).head(top_n)

    print("ðŸ“Š Combined REDSEA Changes Across All Folders")
    display(combined_sorted[["Marker", "MeanChange", "PctChange"]])

    return combined_sorted  # in case you want to use programmatically


In [4]:
parent_folder = "/mnt/jwh83-data/Confetti/output/Redsea/MicroSAM/"
analyze_redsea_combined(parent_folder, top_n=10)


ðŸ“Š Combined REDSEA Changes Across All Folders


Unnamed: 0,Marker,MeanChange,PctChange
25,CD49a,720.11023,-7.273391
39,Cytokeratin,407.424785,-7.034186
10,CD163,320.698398,-14.980736
19,CD36,317.886336,-11.77774
23,CD45,306.322227,-8.474097
43,Hoechst1,306.136116,-0.823138
34,CD90,303.952548,-12.939004
38,CollIV,270.50963,-12.747162
59,aSMA,242.784022,-5.330299
17,CD31,242.075235,-20.662636


Unnamed: 0,Marker,MeanChange,PctChange,AbsMeanChange,AbsPctChange
25,CD49a,720.11023,-7.273391,720.11023,7.273391
39,Cytokeratin,407.424785,-7.034186,407.424785,7.034186
10,CD163,320.698398,-14.980736,320.698398,14.980736
19,CD36,317.886336,-11.77774,317.886336,11.77774
23,CD45,306.322227,-8.474097,306.322227,8.474097
43,Hoechst1,306.136116,-0.823138,306.136116,0.823138
34,CD90,303.952548,-12.939004,303.952548,12.939004
38,CollIV,270.50963,-12.747162,270.50963,12.747162
59,aSMA,242.784022,-5.330299,242.784022,5.330299
17,CD31,242.075235,-20.662636,242.075235,20.662636
