In [None]:
# This script shall be used to combine the results from the cluster nodes and create the final metric reported in the manuscript. This code assumes that the individual CSV files have already been generated by the experiment scripts. 

import os, glob
import numpy as np
import pandas as pd

# === Setup directory ===
results_dir = "/home/nandy.15/Research/Experiments_revision/Results/Effect_of_data_integration"  # <-- UPDATE THIS PATH
pattern = os.path.join(results_dir, "reconstruction_*_lam*_rho*.csv")
all_csvs = sorted(glob.glob(pattern))
csv_files = [fn for fn in all_csvs if "_iters" not in os.path.basename(fn)]

N_TRIALS = 50
EPS = 0.0  # if your metric must be nonnegative, keep 0.0; change if needed

records = []

for file in csv_files:
    df = pd.read_csv(file)

    for idx, row in df.iterrows():
        setting = row["setting"]
        # Replace setting labels using Unicode
        if setting == "std":
            setting = "γ₂ = 0.25"
        elif setting == "sparse":
            setting = "γ₂ = 0.05"

        lam = float(row["lambda"])
        rho = float(row["rho"])

        method_map = {
            "E1_joint": ("Modality 1", "Joint AMP"),
            "E2_joint": ("Modality 2", "Joint AMP"),
            "E3_joint": ("Modality 3", "Joint AMP"),
            "E1_sep":   ("Modality 1", "Separate AMP"),
            "E2_sep":   ("Modality 2", "Separate AMP"),
            "E3_sep":   ("Modality 3", "Separate AMP"),
            "E1_svd":   ("Modality 1", "SVD"),
            "E2_svd":   ("Modality 2", "SVD"),
            "E3_svd":   ("Modality 3", "SVD"),
            "E2ab_joint": ("Modality 2 first two cols", "Joint AMP"),
            "E2ab_sep":   ("Modality 2 first two cols", "Separate AMP"),
            "E2ab_svd":   ("Modality 2 first two cols", "SVD"),
            "E2ab_joint_std": ("Modality 2 first two cols", "Joint AMP"),
            "E2ab_sep_std":   ("Modality 2 first two cols", "Separate AMP"),
            "E2ab_svd_std":   ("Modality 2 first two cols", "SVD"),
        }
        
        for base_col, (modality, method) in method_map.items():
            # Standardize per-modality if desired (kept from your code)
            divisor = 2 if modality == "Modality 1" else 3 if modality == "Modality 2" else 2

            # Mean reconstruction error on ORIGINAL scale (no log)
            mean_val = row[base_col] / divisor

            # Convert stored std to standard error
            std_col = base_col + "_std"
            if std_col in df.columns and pd.notna(row.get(std_col, np.nan)):
                std_val = row[std_col] / divisor
                se_val = std_val / np.sqrt(N_TRIALS)
                # ±2 SE bands on ORIGINAL scale
                ci_lb = max(mean_val - 2 * se_val, EPS)
                ci_ub = mean_val + 2 * se_val
            else:
                se_val = np.nan
                ci_lb = np.nan
                ci_ub = np.nan

            records.append({
                "Modality": modality,
                "Method": method,
                "lam": lam,
                "rho": rho,
                "setting": setting,
                "rec_error_mean": mean_val,
                "rec_error_se": se_val,
                "rec_error_ci_lb": ci_lb,
                "rec_error_ci_ub": ci_ub,
            })

# === Create final DataFrame ===
final_df = pd.DataFrame(records)

# === Save to CSV ===
out_path = os.path.join(results_dir, "combined_reconstruction_errors_original_scale_with_SE_new_set_up.csv")
final_df.to_csv(out_path, index=False)
print(f"Saved: {out_path}")


Saved: /home/nandy.15/Research/Experiments_revision/Results/Effect_of_data_integration/combined_reconstruction_errors_original_scale_with_SE_new_set_up.csv


In [2]:
import os
import pandas as pd
from typing import List

# ======= USER CONFIG =======
combined_csv = "/home/nandy.15/Research/Experiments_revision/Results/Effect_of_data_integration/combined_reconstruction_errors_original_scale_with_SE_new_set_up.csv"  # <-- UPDATE THIS PATH
out_dir = os.path.dirname(combined_csv)

# Columns (rho) order shown across the table
RHO_ORDER: List[float] = [0.80, 0.85, 0.90, 0.95, 1.00]
# Row groups (lambda) — set to None to auto-detect & sort
LAMBDA_ORDER: List[float] | None = None

# Formatting
MEAN_FMT = "{:.6f}"   # mean to SIX decimals
SE_FMT   = "{:.6f}"
METHOD_ORDER = ["Joint AMP", "Separate AMP", "SVD"]
PANELS = ["γ₂ = 0.05", "γ₂ = 0.25"]  # top then bottom
TABCOLSEP_PT = 3      # smaller = tighter columns
ARRAY_STRETCH = 0.9   # <1 = tighter rows
FONTSIZE_CMD = "\\scriptsize"  # or \\footnotesize

# ======= LOAD =======
df = pd.read_csv(combined_csv)
df["lam"] = df["lam"].astype(float)
df["rho"] = df["rho"].astype(float)

def fmt_cell(mean: float, se: float, bold: bool) -> str:
    s = f"{MEAN_FMT.format(mean)} ({SE_FMT.format(se)})"
    return f"\\textbf{{{s}}}" if bold else s

def panel_block(df_mod: pd.DataFrame, setting: str,
                lambda_order: List[float], rho_order: List[float]) -> str:
    sub = df_mod[df_mod["setting"] == setting].copy()
    if sub.empty:
        return ""  # skip if panel missing

    lines: List[str] = []
    lines.append("\\midrule")
    lines.append(f"\\multicolumn{{{1+len(rho_order)}}}{{l}}{{\\textit{{Setting: {setting}}}}}\\\\")
    lines.append("\\midrule")

    # Header: blank corner + rho columns
    header = [" "]
    header += [f"$\\rho={r:g}$" for r in rho_order]
    lines.append(" & ".join(header) + " \\\\")
    lines.append("\\midrule")

    for lam in lambda_order:
        # determine best method per rho at this lambda
        best_idx_for_rho = []
        for rho in rho_order:
            trip = sub[(sub["lam"] == lam) & (sub["rho"] == rho)]
            means = []
            for m in METHOD_ORDER:
                rec = trip[trip["Method"] == m]
                means.append(float(rec["rec_error_mean"].values[0]) if not rec.empty else float("inf"))
            best_idx_for_rho.append(int(pd.Series(means).idxmin()))

        # three sub-rows: Joint, Separate, SVD
        for mi, method in enumerate(METHOD_ORDER):
            # first column: multirow lambda on the first of the three rows,
            # with the method name printed in that same cell (your sketch)
            if mi == 0:
                first_cell = f"\\multirow{{3}}{{*}}{{\\(\\lambda={lam:g}\\)\\\\ {method}}}"
            else:
                first_cell = method  # subsequent lines: just the method name in the first column

            row_cells = [first_cell]
            for ci, rho in enumerate(rho_order):
                rec = sub[(sub["lam"] == lam) & (sub["rho"] == rho) & (sub["Method"] == method)]
                if rec.empty:
                    row_cells.append("--")
                else:
                    mean = float(rec["rec_error_mean"].values[0])
                    se   = float(rec["rec_error_se"].values[0])
                    row_cells.append(fmt_cell(mean, se, bold=(best_idx_for_rho[ci] == mi)))
            lines.append(" & ".join(row_cells) + " \\\\")
        lines.append("\\addlinespace[2pt]")  # small gap between lambda blocks

    return "\n".join(lines)

def make_landscape_table(df: pd.DataFrame, modality: str) -> str:
    df_mod = df[df["Modality"] == modality].copy()
    if df_mod.empty:
        return ""

    # establish orders
    lambda_order = sorted(df_mod["lam"].unique().tolist()) if LAMBDA_ORDER is None else LAMBDA_ORDER
    rho_order = [r for r in RHO_ORDER if r in set(df_mod["rho"].unique())]

    tabspec = "@{}l" + "c"*len(rho_order) + "@{}"   # 1 left column + rho columns, tight margins

    lines: List[str] = []
    lines.append("% Requires: \\usepackage{booktabs,multirow,pdflscape}")
    lines.append("\\begin{landscape}")
    lines.append("\\begin{table}[p]")
    lines.append("\\centering")
    lines.append(FONTSIZE_CMD)
    lines.append(f"\\setlength{{\\tabcolsep}}{{{TABCOLSEP_PT}pt}}")
    lines.append(f"\\renewcommand{{\\arraystretch}}{{{ARRAY_STRETCH}}}")
    lines.append(f"\\begin{{tabular}}{{{tabspec}}}")
    lines.append("\\toprule")

    wrote_any = False
    for setting in PANELS:
        blk = panel_block(df_mod, setting, lambda_order, rho_order)
        if blk:
            lines.append(blk)
            wrote_any = True

    lines.append("\\bottomrule")
    lines.append("\\end{tabular}")
    cap = (f"Reconstruction error (SE) for {modality} with columns indexed by $\\rho$ "
           f"and rows grouped by $\\lambda$ (three sub-rows: Joint AMP, Separate AMP, SVD). "
           f"Best (lowest mean) bolded. Top panel $\\gamma_2=0.05$, bottom panel $\\gamma_2=0.25$.")
    lab = f"tab:{modality.lower().replace(' ', '_')}_lambda_by_rho"
    lines.append(f"\\caption{{{cap}}}")
    lines.append(f"\\label{{{lab}}}")
    lines.append("\\end{table}")
    lines.append("\\end{landscape}")

    return "\n".join(lines) if wrote_any else ""

# ======= WRITE FILES =======
for modality in sorted(df["Modality"].unique()):
    tex = make_landscape_table(df, modality)
    if tex:
        out_path = os.path.join(out_dir, f"{modality.lower().replace(' ', '_')}_lambda_by_rho_landscape.tex")
        with open(out_path, "w") as f:
            f.write(tex)
        print(f"Wrote: {out_path}")
    else:
        print(f"[WARN] No data for modality: {modality}")


Wrote: /home/nandy.15/Research/Experiments_revision/Results/Effect_of_data_integration/modality_1_lambda_by_rho_landscape.tex
Wrote: /home/nandy.15/Research/Experiments_revision/Results/Effect_of_data_integration/modality_2_lambda_by_rho_landscape.tex
Wrote: /home/nandy.15/Research/Experiments_revision/Results/Effect_of_data_integration/modality_2_first_two_cols_lambda_by_rho_landscape.tex
Wrote: /home/nandy.15/Research/Experiments_revision/Results/Effect_of_data_integration/modality_3_lambda_by_rho_landscape.tex


In [3]:
def make_panel_dataframe(df, modality, setting, lambda_order, rho_order):
    sub = df[(df["Modality"] == modality) & (df["setting"] == setting)].copy()
    if sub.empty:
        return None

    # Prepare multi-index for rows: (lambda, method)
    index = []
    data = []
    for lam in lambda_order:
        trip = sub[sub["lam"] == lam]
        # Find best method per rho
        best_idx_for_rho = []
        for rho in rho_order:
            means = []
            for m in METHOD_ORDER:
                rec = trip[(trip["Method"] == m) & (trip["rho"] == rho)]
                means.append(float(rec["rec_error_mean"].values[0]) if not rec.empty else float("inf"))
            best_idx_for_rho.append(int(pd.Series(means).idxmin()))
        for mi, method in enumerate(METHOD_ORDER):
            index.append((lam, method))
            row = []
            for ci, rho in enumerate(rho_order):
                rec = sub[(sub["lam"] == lam) & (sub["rho"] == rho) & (sub["Method"] == method)]
                if rec.empty:
                    row.append("--")
                else:
                    mean = float(rec["rec_error_mean"].values[0])
                    se = float(rec["rec_error_se"].values[0])
                    cell = f"{mean:.6f} ({se:.6f})"
                    # Mark best method with asterisk
                    if best_idx_for_rho[ci] == mi:
                        cell += " *"
                    row.append(cell)
            data.append(row)
    df_panel = pd.DataFrame(data, index=pd.MultiIndex.from_tuples(index, names=["lambda", "method"]), columns=[f"rho={r:g}" for r in rho_order])
    return df_panel

# Example usage for all modalities and settings:
dfs = {}
lambda_order = sorted(df["lam"].unique().tolist()) if LAMBDA_ORDER is None else LAMBDA_ORDER
rho_order = [r for r in RHO_ORDER if r in set(df["rho"].unique())]
for modality in sorted(df["Modality"].unique()):
    for setting in PANELS:
        panel_df = make_panel_dataframe(df, modality, setting, lambda_order, rho_order)
        if panel_df is not None:
            dfs[(modality, setting)] = panel_df
            # Save each panel DataFrame to CSV
            csv_name = f'{modality.lower().replace(' ', '_')}_{setting.replace(' ', '_').replace('γ₂', 'gamma2')}_panel.csv'
            csv_path = os.path.join(results_dir, csv_name)
            panel_df.to_csv(csv_path)
            print(f'Saved panel CSV: {csv_path}')



Saved panel CSV: /home/nandy.15/Research/Experiments_revision/Results/Effect_of_data_integration/modality_1_gamma2_=_0.05_panel.csv
Saved panel CSV: /home/nandy.15/Research/Experiments_revision/Results/Effect_of_data_integration/modality_1_gamma2_=_0.25_panel.csv
Saved panel CSV: /home/nandy.15/Research/Experiments_revision/Results/Effect_of_data_integration/modality_2_gamma2_=_0.05_panel.csv
Saved panel CSV: /home/nandy.15/Research/Experiments_revision/Results/Effect_of_data_integration/modality_2_gamma2_=_0.25_panel.csv
Saved panel CSV: /home/nandy.15/Research/Experiments_revision/Results/Effect_of_data_integration/modality_2_first_two_cols_gamma2_=_0.05_panel.csv
Saved panel CSV: /home/nandy.15/Research/Experiments_revision/Results/Effect_of_data_integration/modality_2_first_two_cols_gamma2_=_0.25_panel.csv
Saved panel CSV: /home/nandy.15/Research/Experiments_revision/Results/Effect_of_data_integration/modality_3_gamma2_=_0.05_panel.csv
Saved panel CSV: /home/nandy.15/Research/Exper