In [33]:
#mount drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [34]:
cd '/content/drive/MyDrive/TFG-AnalisisResultados'

/content/drive/MyDrive/TFG-AnalisisResultados


In [35]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import glob

#LD

In [36]:
for df_path in ['./LD_SGP_combined_logs.parquet', './LD_MGP_combined_logs.parquet', './LD_MGPR1_combined_logs.parquet']:
  df = pd.read_parquet(df_path)
  summary_records = []

  # === 3. Group by acquisition and seed (i.e., per run) ===
  grouped = df.groupby(['acquisition', 'seed'])


  # For each acquisition + seed/run
  for (acq, seed), run_df in grouped:
      run_df = run_df.sort_values('iteration')
      last_iter = run_df['iteration'].max()

      final_front = run_df.loc[run_df['iteration'] == last_iter]

      # Assuming objectives is a list of shape [num_points, 2] where [:, 0] = accuracy, [:, 1] = depth
      final_obj = np.stack(final_front['pareto_front'].values[0])  # shape (batch_size, 2)

      acc_mean = np.mean(final_obj[:, 0])
      acc_std = np.std(final_obj[:, 0])
      depth_mean = np.mean(final_obj[:, 1])
      depth_std = np.std(final_obj[:, 1])

      mindepth_index = np.argmax(final_obj[:, 1])
      maxacc_index = np.argmax(final_obj[:, 0])
      # Extract the corresponding row
      min_depth_pair = final_obj[mindepth_index]
      max_accuracy_pair = final_obj[maxacc_index]

      # Drop rows without posterior std (i.e., Sobol)
      if acq != "Sobol":
          std_vals = np.stack(run_df['posterior_std'].dropna().values)  # list of arrays (batch_size x 2)

          # Collect per-objective stds per iteration
          obj0_stds = []
          obj1_stds = []

          for arr in std_vals:
              arr = np.stack(arr)  # shape (batch_size, 2)
              obj0_stds.append(arr[:, 0].mean())  # mean over batch, objective 0
              obj1_stds.append(arr[:, 1].mean())  # mean over batch, objective 1

          # Mean across iterations
          posterior_std_obj0_mean = np.mean(obj0_stds)
          posterior_std_obj1_mean = np.mean(obj1_stds)
          posterior_std_obj0_std = np.std(obj0_stds)
          posterior_std_obj1_std = np.std(obj1_stds)
      else:
          posterior_std_obj0_mean = posterior_std_obj1_mean = np.nan
          posterior_std_obj0_std = posterior_std_obj1_std = np.nan


      summary_records.append({
          "acquisition": acq,
          "seed": seed,
          "pareto_acc_mean": acc_mean,
          "pareto_acc_std": acc_std,
          "pareto_depth_mean": depth_mean,
          "pareto_depth_std": depth_std,
          "min_depth_pair": min_depth_pair,
          "max_accuracy_pair": max_accuracy_pair,
          "posterior_std_obj0_mean": posterior_std_obj0_mean,
          "posterior_std_obj0_std": posterior_std_obj0_std,
          "posterior_std_obj1_mean": posterior_std_obj1_mean,
          "posterior_std_obj1_std": posterior_std_obj1_std,

      })

  per_run_summary = pd.DataFrame(summary_records)

  # === 4. Aggregate Stats per Acquisition Function ===
  final_summary = per_run_summary.groupby('acquisition').agg({
      'pareto_acc_mean': ['mean', 'std'],
      'pareto_acc_std': ['mean'],  # Optional: std of stds
      'pareto_depth_mean': ['mean', 'std'],
      'pareto_depth_std': ['mean'],  # Optional
      'min_depth_pair': list,
      'max_accuracy_pair': list,
      'posterior_std_obj0_mean': ['mean', 'std'],
      'posterior_std_obj0_std': ['mean'],
      'posterior_std_obj1_mean': ['mean', 'std'],
      'posterior_std_obj1_std': ['mean'],

  }).reset_index()

  # Flatten column names
  final_summary.columns = ['_'.join(col).strip('_') for col in final_summary.columns]

  final_summary['best_min_depth_pair'] = final_summary['min_depth_pair_list'].apply(
    lambda lst: max(lst, key=lambda x: x[1]) if lst else np.nan
  )

  final_summary['best_max_accuracy_pair'] = final_summary['max_accuracy_pair_list'].apply(
    lambda lst: max(lst, key=lambda x: x[0]) if lst else np.nan
  )

  # === 5. Save summary ===
  parts = df_path.split('_')
  name = '_'.join(parts[:2])

  # Table pareto front
  obj_cols = ['acquisition', 'pareto_acc_mean_mean', 'pareto_acc_mean_std',
              'pareto_depth_mean_mean', 'pareto_depth_mean_std', 'best_min_depth_pair', 'best_max_accuracy_pair']
  final_summary[obj_cols].to_csv(name + "_final_objectives.csv", index=False)

  # Table gp uncertainty
  unc_cols = ['acquisition', 'posterior_std_obj0_mean_mean', 'posterior_std_obj0_mean_std', 'posterior_std_obj1_mean_mean', 'posterior_std_obj1_mean_std']
  final_summary[unc_cols].to_csv(name + "_gp_uncertainty.csv", index=False)


final_summary.head()

Unnamed: 0,acquisition,pareto_acc_mean_mean,pareto_acc_mean_std,pareto_acc_std_mean,pareto_depth_mean_mean,pareto_depth_mean_std,pareto_depth_std_mean,min_depth_pair_list,max_accuracy_pair_list,posterior_std_obj0_mean_mean,posterior_std_obj0_mean_std,posterior_std_obj0_std_mean,posterior_std_obj1_mean_mean,posterior_std_obj1_mean_std,posterior_std_obj1_std_mean,best_min_depth_pair,best_max_accuracy_pair
0,Sobol,0.694535,0.012882,0.008337,-39.074286,9.045653,41.80664,"[[0.6913092125954291, -2.0], [0.66396335822157...","[[0.7154748806057313, -141.0], [0.682297674226...",,,,,,,"[0.6913092125954291, -2.0]","[0.7268195673247976, -198.0]"
1,qParEGO,0.69321,0.016669,0.009725,-32.236667,14.04757,29.764195,"[[0.6986466426147627, -2.0], [0.66026284133007...","[[0.7143697455588598, -89.0], [0.6842371421203...",62.05219,13.642816,17.898096,71.509461,11.498704,21.293429,"[0.6986466426147627, -2.0]","[0.7175272648255199, -25.0]"
2,qQEHVI,0.693223,0.014586,0.009542,-47.041667,10.580462,46.646032,"[[0.7003057547846309, -2.0], [0.66576686649979...","[[0.711302471888677, -160.0], [0.6852095196702...",61.913901,8.299829,18.76386,66.64986,15.298347,19.547822,"[0.7003057547846309, -2.0]","[0.7149438992865244, -77.0]"
3,qQNEHVI,0.692628,0.016483,0.008376,-49.447778,19.663105,53.279294,"[[0.698934447676717, -2.0], [0.660262841330078...","[[0.711302471888677, -160.0], [0.6824712230049...",60.802445,20.371652,14.156569,66.58836,13.437907,18.557631,"[0.698934447676717, -2.0]","[0.7164327567407028, -200.0]"


#LD_NEURO

In [37]:
for df_path in ['./LD_NEURO_SGP_combined_logs.parquet', './LD_NEURO_MGP_combined_logs.parquet', './LD_NEURO_MGPR1_combined_logs.parquet']:
  df = pd.read_parquet(df_path)
  summary_records = []

  # === 3. Group by acquisition and seed (i.e., per run) ===
  grouped = df.groupby(['acquisition', 'seed'])


  # For each acquisition + seed/run
  for (acq, seed), run_df in grouped:
      run_df = run_df.sort_values('iteration')
      last_iter = run_df['iteration'].max()

      final_front = run_df.loc[run_df['iteration'] == last_iter]

      # Assuming objectives is a list of shape [num_points, 2] where [:, 0] = accuracy, [:, 1] = depth
      final_obj = np.stack(final_front['pareto_front'].values[0])  # shape (batch_size, 2)

      acc_mean = np.mean(final_obj[:, 0])
      acc_std = np.std(final_obj[:, 0])
      depth_mean = np.mean(final_obj[:, 1])
      depth_std = np.std(final_obj[:, 1])

      mindepth_index = np.argmax(final_obj[:, 1])
      maxacc_index = np.argmax(final_obj[:, 0])
      # Extract the corresponding row
      min_depth_pair = final_obj[mindepth_index]
      max_accuracy_pair = final_obj[maxacc_index]

      # Drop rows without posterior std (i.e., Sobol)
      if acq != "Sobol":
          std_vals = np.stack(run_df['posterior_std'].dropna().values)  # list of arrays (batch_size x 2)

          # Collect per-objective stds per iteration
          obj0_stds = []
          obj1_stds = []

          for arr in std_vals:
              arr = np.stack(arr)  # shape (batch_size, 2)
              obj0_stds.append(arr[:, 0].mean())  # mean over batch, objective 0
              obj1_stds.append(arr[:, 1].mean())  # mean over batch, objective 1

          # Mean across iterations
          posterior_std_obj0_mean = np.mean(obj0_stds)
          posterior_std_obj1_mean = np.mean(obj1_stds)
          posterior_std_obj0_std = np.std(obj0_stds)
          posterior_std_obj1_std = np.std(obj1_stds)
      else:
          posterior_std_obj0_mean = posterior_std_obj1_mean = np.nan
          posterior_std_obj0_std = posterior_std_obj1_std = np.nan


      summary_records.append({
          "acquisition": acq,
          "seed": seed,
          "pareto_acc_mean": acc_mean,
          "pareto_acc_std": acc_std,
          "pareto_depth_mean": depth_mean,
          "pareto_depth_std": depth_std,
          "min_depth_pair": min_depth_pair,
          "max_accuracy_pair": max_accuracy_pair,
          "posterior_std_obj0_mean": posterior_std_obj0_mean,
          "posterior_std_obj0_std": posterior_std_obj0_std,
          "posterior_std_obj1_mean": posterior_std_obj1_mean,
          "posterior_std_obj1_std": posterior_std_obj1_std,

      })

  per_run_summary = pd.DataFrame(summary_records)

  # === 4. Aggregate Stats per Acquisition Function ===
  final_summary = per_run_summary.groupby('acquisition').agg({
      'pareto_acc_mean': ['mean', 'std'],
      'pareto_acc_std': ['mean'],  # Optional: std of stds
      'pareto_depth_mean': ['mean', 'std'],
      'pareto_depth_std': ['mean'],  # Optional
      'min_depth_pair': list,
      'max_accuracy_pair': list,
      'posterior_std_obj0_mean': ['mean', 'std'],
      'posterior_std_obj0_std': ['mean'],
      'posterior_std_obj1_mean': ['mean', 'std'],
      'posterior_std_obj1_std': ['mean'],

  }).reset_index()

  # Flatten column names
  final_summary.columns = ['_'.join(col).strip('_') for col in final_summary.columns]

  final_summary['best_min_depth_pair'] = final_summary['min_depth_pair_list'].apply(
    lambda lst: max(lst, key=lambda x: x[1]) if lst else np.nan
  )

  final_summary['best_max_accuracy_pair'] = final_summary['max_accuracy_pair_list'].apply(
    lambda lst: max(lst, key=lambda x: x[0]) if lst else np.nan
  )

  # === 5. Save summary ===
  parts = df_path.split('_')
  name = '_'.join(parts[:3])

  # Table pareto front
  obj_cols = ['acquisition', 'pareto_acc_mean_mean', 'pareto_acc_mean_std',
              'pareto_depth_mean_mean', 'pareto_depth_mean_std', 'best_min_depth_pair', 'best_max_accuracy_pair']
  final_summary[obj_cols].to_csv(name + "_final_objectives.csv", index=False)

  # Table gp uncertainty
  unc_cols = ['acquisition', 'posterior_std_obj0_mean_mean', 'posterior_std_obj0_mean_std', 'posterior_std_obj1_mean_mean', 'posterior_std_obj1_mean_std']
  final_summary[unc_cols].to_csv(name + "_gp_uncertainty.csv", index=False)


final_summary.head()

Unnamed: 0,acquisition,pareto_acc_mean_mean,pareto_acc_mean_std,pareto_acc_std_mean,pareto_depth_mean_mean,pareto_depth_mean_std,pareto_depth_std_mean,min_depth_pair_list,max_accuracy_pair_list,posterior_std_obj0_mean_mean,posterior_std_obj0_mean_std,posterior_std_obj0_std_mean,posterior_std_obj1_mean_mean,posterior_std_obj1_mean_std,posterior_std_obj1_std_mean,best_min_depth_pair,best_max_accuracy_pair
0,Sobol,0.684149,0.009093,0.011897,-36.971825,20.787861,38.104252,"[[0.6301063888263303, -2.0], [0.65424474922088...","[[0.7002313094689402, -80.0], [0.6931944022262...",,,,,,,"[0.6301063888263303, -2.0]","[0.7091411604397216, -189.0]"
1,qParEGO,0.678134,0.007289,0.01763,-20.02,13.152215,22.894004,"[[0.6442234212219066, -2.0], [0.65639890040222...","[[0.7029878331599809, -150.0], [0.693576366078...",58.426263,15.648676,15.577895,61.649391,10.006473,21.240016,"[0.6442234212219066, -2.0]","[0.7042147459131958, -27.0]"
2,qQEHVI,0.679354,0.005974,0.020339,-29.650476,20.504735,34.012126,"[[0.6400522171617423, -2.0], [0.65442649137786...","[[0.7054994909327834, -182.0], [0.691841336153...",52.044032,7.873817,22.501111,58.717302,16.34761,23.81991,"[0.6400522171617423, -2.0]","[0.709822065771705, -11.0]"
3,qQNEHVI,0.679543,0.007784,0.015183,-41.599351,18.821862,43.005726,"[[0.6442234212219066, -2.0], [0.64802391838387...","[[0.7008836022370205, -86.0], [0.6981739192583...",58.138262,10.401275,18.614753,65.924303,15.538949,19.222268,"[0.6442234212219066, -2.0]","[0.7087572257909536, -59.0]"




#HD

In [38]:
for df_path in ['./HD_SGP_combined_logs.parquet', './HD_MGP_combined_logs.parquet', './HD_MGPR1_combined_logs.parquet']:
  df = pd.read_parquet(df_path)
  summary_records = []

  # === 3. Group by acquisition and seed (i.e., per run) ===
  grouped = df.groupby(['acquisition', 'seed'])


  # For each acquisition + seed/run
  for (acq, seed), run_df in grouped:
      run_df = run_df.sort_values('iteration')
      last_iter = run_df['iteration'].max()

      final_front = run_df.loc[run_df['iteration'] == last_iter]

      # Assuming objectives is a list of shape [num_points, 2] where [:, 0] = accuracy, [:, 1] = depth
      final_obj = np.stack(final_front['pareto_front'].values[0])  # shape (batch_size, 2)

      acc_mean = np.mean(final_obj[:, 0])
      acc_std = np.std(final_obj[:, 0])
      depth_mean = np.mean(final_obj[:, 1])
      depth_std = np.std(final_obj[:, 1])

      mindepth_index = np.argmax(final_obj[:, 1])
      maxacc_index = np.argmax(final_obj[:, 0])
      # Extract the corresponding row
      min_depth_pair = final_obj[mindepth_index]
      max_accuracy_pair = final_obj[maxacc_index]

      # Drop rows without posterior std (i.e., Sobol)
      if acq != "Sobol":
          std_vals = np.stack(run_df['posterior_std'].dropna().values)  # list of arrays (batch_size x 2)

          # Collect per-objective stds per iteration
          obj0_stds = []
          obj1_stds = []

          for arr in std_vals:
              arr = np.stack(arr)  # shape (batch_size, 2)
              obj0_stds.append(arr[:, 0].mean())  # mean over batch, objective 0
              obj1_stds.append(arr[:, 1].mean())  # mean over batch, objective 1

          # Mean across iterations
          posterior_std_obj0_mean = np.mean(obj0_stds)
          posterior_std_obj1_mean = np.mean(obj1_stds)
          posterior_std_obj0_std = np.std(obj0_stds)
          posterior_std_obj1_std = np.std(obj1_stds)
      else:
          posterior_std_obj0_mean = posterior_std_obj1_mean = np.nan
          posterior_std_obj0_std = posterior_std_obj1_std = np.nan


      summary_records.append({
          "acquisition": acq,
          "seed": seed,
          "pareto_acc_mean": acc_mean,
          "pareto_acc_std": acc_std,
          "pareto_depth_mean": depth_mean,
          "pareto_depth_std": depth_std,
          "min_depth_pair": min_depth_pair,
          "max_accuracy_pair": max_accuracy_pair,
          "posterior_std_obj0_mean": posterior_std_obj0_mean,
          "posterior_std_obj0_std": posterior_std_obj0_std,
          "posterior_std_obj1_mean": posterior_std_obj1_mean,
          "posterior_std_obj1_std": posterior_std_obj1_std,

      })

  per_run_summary = pd.DataFrame(summary_records)

  # === 4. Aggregate Stats per Acquisition Function ===
  final_summary = per_run_summary.groupby('acquisition').agg({
      'pareto_acc_mean': ['mean', 'std'],
      'pareto_acc_std': ['mean'],  # Optional: std of stds
      'pareto_depth_mean': ['mean', 'std'],
      'pareto_depth_std': ['mean'],  # Optional
      'min_depth_pair': list,
      'max_accuracy_pair': list,
      'posterior_std_obj0_mean': ['mean', 'std'],
      'posterior_std_obj0_std': ['mean'],
      'posterior_std_obj1_mean': ['mean', 'std'],
      'posterior_std_obj1_std': ['mean'],

  }).reset_index()

  # Flatten column names
  final_summary.columns = ['_'.join(col).strip('_') for col in final_summary.columns]

  final_summary['best_min_depth_pair'] = final_summary['min_depth_pair_list'].apply(
    lambda lst: max(lst, key=lambda x: x[1]) if lst else np.nan
  )

  final_summary['best_max_accuracy_pair'] = final_summary['max_accuracy_pair_list'].apply(
    lambda lst: max(lst, key=lambda x: x[0]) if lst else np.nan
  )

  # === 5. Save summary ===
  parts = df_path.split('_')
  name = '_'.join(parts[:2])

  # Table pareto front
  obj_cols = ['acquisition', 'pareto_acc_mean_mean', 'pareto_acc_mean_std',
              'pareto_depth_mean_mean', 'pareto_depth_mean_std', 'best_min_depth_pair', 'best_max_accuracy_pair']
  final_summary[obj_cols].to_csv(name + "_final_objectives.csv", index=False)

  # Table gp uncertainty
  unc_cols = ['acquisition', 'posterior_std_obj0_mean_mean', 'posterior_std_obj0_mean_std', 'posterior_std_obj1_mean_mean', 'posterior_std_obj1_mean_std']
  final_summary[unc_cols].to_csv(name + "_gp_uncertainty.csv", index=False)


final_summary.head()

Unnamed: 0,acquisition,pareto_acc_mean_mean,pareto_acc_mean_std,pareto_acc_std_mean,pareto_depth_mean_mean,pareto_depth_mean_std,pareto_depth_std_mean,min_depth_pair_list,max_accuracy_pair_list,posterior_std_obj0_mean_mean,posterior_std_obj0_mean_std,posterior_std_obj0_std_mean,posterior_std_obj1_mean_mean,posterior_std_obj1_mean_std,posterior_std_obj1_std_mean,best_min_depth_pair,best_max_accuracy_pair
0,Sobol,0.721341,0.006818,0.008229,-123.953333,101.634734,109.894283,"[[0.6852258993973568, -3.0], [0.69178326761615...","[[0.7333716534203215, -267.0], [0.729211634826...",,,,,,,"[0.6917832676161513, -2.0]","[0.7369019796989017, -120.0]"
1,qParEGO,0.71378,0.00284,0.011751,-141.526667,61.816491,161.837118,"[[0.6960183789759306, -2.0], [0.68786340733944...","[[0.7258885059740576, -586.0], [0.730836898809...",181.282051,30.53354,52.220089,204.556227,46.237486,56.542242,"[0.6960183789759306, -2.0]","[0.7308368988091022, -115.0]"
2,qQEHVI,0.715528,0.004037,0.011871,-83.127619,62.153127,100.999321,"[[0.6905017227500567, -2.0], [0.69073241787116...","[[0.7258885059740576, -586.0], [0.729612331494...",183.698212,37.009862,54.653058,180.029484,23.437882,65.395062,"[0.6905017227500567, -2.0]","[0.7360190682635, -145.0]"
3,qQNEHVI,0.716212,0.005007,0.010364,-109.209524,52.437068,94.929158,"[[0.6960183789759306, -2.0], [0.69573171577852...","[[0.7323217023991403, -212.0], [0.730290709006...",175.557515,52.756665,48.372619,198.019061,27.272356,58.304483,"[0.6960183789759306, -2.0]","[0.7338845588970334, -242.0]"
