In [1]:
import numpy as np
import pandas as pd

In [2]:
run = 100  # number of repetitions

# ---------- allocate ----------
# For each run, we compute mean/std on:
#   - testdata1 vs model_outputtest1 
#   - testdata2 vs model_outputtest2 
mean_test1      = np.zeros(run)
mean_test2      = np.zeros(run)

mean_QRGMM_1    = np.zeros(run)
mean_QRGMM_2    = np.zeros(run)
mean_CWGAN_1    = np.zeros(run)
mean_CWGAN_2    = np.zeros(run)
mean_Diff_1     = np.zeros(run)
mean_Diff_2     = np.zeros(run)
mean_Rect_1     = np.zeros(run)
mean_Rect_2     = np.zeros(run)

std_test1       = np.zeros(run)
std_test2       = np.zeros(run)

std_QRGMM_1     = np.zeros(run)
std_QRGMM_2     = np.zeros(run)
std_CWGAN_1     = np.zeros(run)
std_CWGAN_2     = np.zeros(run)
std_Diff_1      = np.zeros(run)
std_Diff_2      = np.zeros(run)
std_Rect_1      = np.zeros(run)
std_Rect_2      = np.zeros(run)

# ---------- loop ----------
for runi in range(run):
    # ----------------------------
    # Ground-truth test sets
    # ----------------------------
    test1 = pd.read_csv(f'./data/testdata1/testdata1_{runi+1}.csv', header=None)
    test2 = pd.read_csv(f'./data/testdata2/testdata2_{runi+1}.csv', header=None)

    # Ensure we always use column "F" (5th column) robustly
    # If your files already have headers, set header=0 and use test1["F"] directly.
    y_test1 = test1.iloc[:, 4]
    y_test2 = test2.iloc[:, 4]

    # ----------------------------
    # Generated test sets (test1 side / test2 side)
    # QRGMM outputs are typically single-column "F" without header
    # CWGAN/Diff/Rect outputs are full DF with columns A,B,C,D,F
    # We'll always take F as:
    #   - QRGMM: col 0
    #   - others: col 4
    # ----------------------------
    qrgmm_test1 = pd.read_csv(f'./data/QRGMMoutputtest1/QRGMMoutputtest1_{runi+1}.csv', header=None)
    qrgmm_test2 = pd.read_csv(f'./data/QRGMMoutputtest2/QRGMMoutputtest2_{runi+1}.csv', header=None)

    cwgan_test1 = pd.read_csv(f'./data/CWGANoutputtest1/CWGANoutputtest1_{runi+1}.csv')
    cwgan_test2 = pd.read_csv(f'./data/CWGANoutputtest2/CWGANoutputtest2_{runi+1}.csv')

    diff_test1  = pd.read_csv(f'./data/Diffusionoutputtest1/Diffusionoutputtest1_{runi+1}.csv')
    diff_test2  = pd.read_csv(f'./data/Diffusionoutputtest2/Diffusionoutputtest2_{runi+1}.csv')

    rect_test1  = pd.read_csv(f'./data/RectFlowoutputtest1/RectFlowoutputtest1_{runi+1}.csv')
    rect_test2  = pd.read_csv(f'./data/RectFlowoutputtest2/RectFlowoutputtest2_{runi+1}.csv')

    y_qrgmm_1 = qrgmm_test1.iloc[:, 0]
    y_qrgmm_2 = qrgmm_test2.iloc[:, 0]

    y_cwgan_1 = cwgan_test1.iloc[:, 4]
    y_cwgan_2 = cwgan_test2.iloc[:, 4]

    y_diff_1  = diff_test1.iloc[:, 4]
    y_diff_2  = diff_test2.iloc[:, 4]

    y_rect_1  = rect_test1.iloc[:, 4]
    y_rect_2  = rect_test2.iloc[:, 4]

    # ----- mean -----
    mean_test1[runi]   = y_test1.mean()
    mean_test2[runi]   = y_test2.mean()

    mean_QRGMM_1[runi] = y_qrgmm_1.mean()
    mean_QRGMM_2[runi] = y_qrgmm_2.mean()

    mean_CWGAN_1[runi] = y_cwgan_1.mean()
    mean_CWGAN_2[runi] = y_cwgan_2.mean()

    mean_Diff_1[runi]  = y_diff_1.mean()
    mean_Diff_2[runi]  = y_diff_2.mean()

    mean_Rect_1[runi]  = y_rect_1.mean()
    mean_Rect_2[runi]  = y_rect_2.mean()

    # ----- std -----
    std_test1[runi]    = y_test1.std(ddof=1)
    std_test2[runi]    = y_test2.std(ddof=1)

    std_QRGMM_1[runi]  = y_qrgmm_1.std(ddof=1)
    std_QRGMM_2[runi]  = y_qrgmm_2.std(ddof=1)

    std_CWGAN_1[runi]  = y_cwgan_1.std(ddof=1)
    std_CWGAN_2[runi]  = y_cwgan_2.std(ddof=1)

    std_Diff_1[runi]   = y_diff_1.std(ddof=1)
    std_Diff_2[runi]   = y_diff_2.std(ddof=1)

    std_Rect_1[runi]   = y_rect_1.std(ddof=1)
    std_Rect_2[runi]   = y_rect_2.std(ddof=1)

# -------------------------------------------------------------------
# Build summary tables
# We provide:
#   (A) test1-side summary: Truth(testdata1) vs generated_outputtest1
#   (B) test2-side summary: Truth(testdata2) vs generated_outputtest2
# Each table reports:
#   - mean(mean): average of per-run means
#   - std(mean):  std  of per-run means
#   - mean(std):  average of per-run stds
#   - std(std):   std  of per-run stds
# -------------------------------------------------------------------
summary_test1 = pd.DataFrame({
    "mean(mean)": [
        mean_test1.mean(),
        mean_QRGMM_1.mean(),
        mean_CWGAN_1.mean(),
        mean_Diff_1.mean(),
        mean_Rect_1.mean(),
    ],
    "std(mean)": [
        mean_test1.std(ddof=1) if run > 1 else np.nan,
        mean_QRGMM_1.std(ddof=1) if run > 1 else np.nan,
        mean_CWGAN_1.std(ddof=1) if run > 1 else np.nan,
        mean_Diff_1.std(ddof=1) if run > 1 else np.nan,
        mean_Rect_1.std(ddof=1) if run > 1 else np.nan,
    ],
    "mean(std)": [
        std_test1.mean(),
        std_QRGMM_1.mean(),
        std_CWGAN_1.mean(),
        std_Diff_1.mean(),
        std_Rect_1.mean(),
    ],
    "std(std)": [
        std_test1.std(ddof=1) if run > 1 else np.nan,
        std_QRGMM_1.std(ddof=1) if run > 1 else np.nan,
        std_CWGAN_1.std(ddof=1) if run > 1 else np.nan,
        std_Diff_1.std(ddof=1) if run > 1 else np.nan,
        std_Rect_1.std(ddof=1) if run > 1 else np.nan,
    ]
}, index=["Truth(test1)", "QRGMM(test1)", "CWGAN(test1)", "Diffusion(test1)", "RectFlow(test1)"])

summary_test2 = pd.DataFrame({
    "mean(mean)": [
        mean_test2.mean(),
        mean_QRGMM_2.mean(),
        mean_CWGAN_2.mean(),
        mean_Diff_2.mean(),
        mean_Rect_2.mean(),
    ],
    "std(mean)": [
        mean_test2.std(ddof=1) if run > 1 else np.nan,
        mean_QRGMM_2.std(ddof=1) if run > 1 else np.nan,
        mean_CWGAN_2.std(ddof=1) if run > 1 else np.nan,
        mean_Diff_2.std(ddof=1) if run > 1 else np.nan,
        mean_Rect_2.std(ddof=1) if run > 1 else np.nan,
    ],
    "mean(std)": [
        std_test2.mean(),
        std_QRGMM_2.mean(),
        std_CWGAN_2.mean(),
        std_Diff_2.mean(),
        std_Rect_2.mean(),
    ],
    "std(std)": [
        std_test2.std(ddof=1) if run > 1 else np.nan,
        std_QRGMM_2.std(ddof=1) if run > 1 else np.nan,
        std_CWGAN_2.std(ddof=1) if run > 1 else np.nan,
        std_Diff_2.std(ddof=1) if run > 1 else np.nan,
        std_Rect_2.std(ddof=1) if run > 1 else np.nan,
    ]
}, index=["Truth(test2)", "QRGMM(test2)", "CWGAN(test2)", "Diffusion(test2)", "RectFlow(test2)"])

print("\nSummary statistics (Y1):\n")
print(summary_test1.round(4))

print("\nSummary statistics (Y2):\n")
print(summary_test2.round(4))



Summary statistics (Y1):

                  mean(mean)  std(mean)  mean(std)  std(std)
Truth(test1)         15.6000     0.0894     9.5288    0.0553
QRGMM(test1)         15.5018     0.1227     9.5026    0.0821
CWGAN(test1)         15.4213     2.7809     8.0462    1.7469
Diffusion(test1)     15.7320     0.2996     9.9453    0.1748
RectFlow(test1)      15.7210     0.2738     9.9604    0.1845

Summary statistics (Y2):

                  mean(mean)  std(mean)  mean(std)  std(std)
Truth(test2)         15.6290     0.0921     9.5357    0.0533
QRGMM(test2)         15.5427     0.1178     9.5127    0.0747
CWGAN(test2)         15.8217     2.6824     8.0826    1.7796
Diffusion(test2)     15.7170     0.2859     9.9406    0.1730
RectFlow(test2)      15.7563     0.2511     9.9406    0.1712


In [3]:
# ---------------------------
# Load timing of CWGAN / Diffusion / RectFlow
# Format: run, model, split (Y1/Y2), gen_time
# ---------------------------
df_other = pd.read_csv("./results/timing_online_generation_GMMs.csv")[["run", "model", "split", "gen_time"]]
df_other["run"] = df_other["run"].astype(int)

# ---------------------------
# Load QRGMM timing from MATLAB:
# onlinetime_QRGMM_x_1 (run x 1), onlinetime_QRGMM_x_2 (run x 1)
# saved as CSV with shape (run x 2): col0=Y1, col1=Y2
# ---------------------------
q = np.loadtxt("./data/savetime/onlinetime_QRGMM.csv", delimiter=",")
q = np.atleast_2d(q)  # handles run=1 case

df_qrgmm = pd.DataFrame({
    "run": np.arange(1, q.shape[0] + 1),
    "Y1": q[:, 0],
    "Y2": q[:, 1],
}).melt(id_vars="run", var_name="split", value_name="gen_time")
df_qrgmm["model"] = "QRGMM"

# ---------------------------
# Merge + summary
# ---------------------------
df_all = pd.concat([df_other, df_qrgmm], ignore_index=True)

summary = df_all.groupby(["model", "split"])[["gen_time"]].agg(["mean", "std"])
print("\nSummary time (model x split):\n", summary)

# Optional: save merged timing + summary
df_all.to_csv("./results/timing_online_generation_all_models.csv", index=False)
summary.to_csv("./results/timing_summary_all_models.csv")



Summary time (model x split):
                  gen_time          
                     mean       std
model     split                    
CWGAN     Y1     0.228664  0.019916
          Y2     0.209175  0.029999
Diffusion Y1     0.840236  0.015134
          Y2     0.838391  0.014034
QRGMM     Y1     0.001674  0.000565
          Y2     0.001703  0.000583
RectFlow  Y1     0.775913  0.009647
          Y2     0.772597  0.009465
