In [44]:
import pandas as pd

In [45]:
import re
def extract_all_times(log: str, total_steps: int) -> list:
    """
    Extracts all occurrences of the time taken for the final step from a progress log.

    Args:
        log (str): The string containing the progress log.
        total_steps (int): The total number of steps (e.g., 17 in "17/17").

    Returns:
        list: A list of times (strings between `[` and `<`) for all occurrences of the final step.
    """
    # Construct the regex pattern dynamically based on total_steps
    pattern = rf"{total_steps}/{total_steps} \[([^\]]+)<"
    matches = re.findall(pattern, log)
    return [match.strip() for match in matches]

In [46]:
extract_all_times(s, 17)

['56:41', '00:00', '21:19', '03:43', '03:11', '02:46', '04:04', '04:49']

In [None]:
scenario_numers = [1,2,3,4,5,6,7]
scenario_names = [
    "ig",
    "ig_intercept",
    "Laplace_ig",
    "Laplace_ig_intercept",
    "Gamma_ig",
    "logreg_ig",
    "ig_gamma_response_reparam"
]

notebook_names = [
    "PFN_Experiments_CFM_InverseGamma_v2_tabpfnX",
    "PFN_Experiments_CFM_InverseGamma_intercept_tabpfn_v4",
    "PFN_Experiments_CFM_InverseGammaLaplace_TabpfnX",
    "PFN_Experiments_CFM_InverseGammaLaplace_intercept_TabpfnX",
    "PFN_Experiments_CFM_InverseGammaGamma_tabpfnX",
    "PFN_Experiments_CFM_LogReg_TabpfnXt",
    "PFN_Experiments_CFM_InverseGamma_GammaResponse_raparam_TabpfnX"
]

runtime_training = [
    "365.1309976577759s * 150",
    "311 * 150",
    "363 * 150",
    "362 * 150",
    "367 * 150",
    "366 * 150",
    "369 * 150"
]

runtime_synthetic_eval = [
   "1:12:45/100",
   '57:41/50',
   '48:45/100',
   '1:04:00/50',
   '1:07:09/100',
   '40:04/100',
   '41:45/50'
]

runtime_hmc_synthetic_eval = [
    "2:21:14/100",
    "2:42:37/50",
   '2:50:38/100',
    '2:11:21/50',
    '2:58:34/100',
    '1:24:32/100',
    '1:48:51/50'
]

runtime_vi_multivariate_normal_synthetic_eval = [
    "22:27/100",
    "12:25/50",
    '20:14/100',
     '11:35/50',
     '22:15/100',
     '22:23/100',
     '11:09/50'
]

runtime_vi_diag_normal_synthetic_eval = [
    "19:14/100",
    "10:50/50",
    '16:46/100',
     '09:45/50',
     '19:05/100',
     '19:09/100',
     '09:35/50'
]

runtime_laplace_vi_synthetic_eval = [
    "16:57/100",
    "09:39/50",
    '14:29/100',
     '08:33/50',
     '16:31/100',
     '16:57/100',
     '08:25/50'
]

runtime_vi_auto_IAF_synthetic_eval = [
    "25:11/100",
    '13:44/50',
    '22:16/100',
     '12:37/50',
     '24:49/100',
     '24:55/100',
     '12:21/50'
]

runtime_vi_autostructerd_IAF_synthetic_eval = [
     "29:19/100",
     "20:38/50",
     '27:00/100',
      '19:43/50',
      '28:48/100',
      '27:51/100',
      '14:34/50'
]


runtime_real_world_eval = [
    "56:41/17",
    "50:55/18",
    "46:07/17",
    '54:14/17',
    '28:57/17',
    '48:04/17',
    '50:58/17'
]

runtime_hmc_real_world_eval = [
    "21:19/17",
    '1:12:51/18',
    "23:00/17",
     '38:43/17',
     '22:09/17',
     '21:50/17',
     '46:25/17'
]

runtime_vi_multivariate_normal_real_world_eval = [
    '03:43/17',
    '04:29/18',
    '03:42/17' ,
     '04:12/17',
     '03:12/17',
     '04:09/17',
     '04:20/17'
]

runtime_vi_diag_normal_real_world_eval = [
    '03:11/17',
    '03:56/18',
    '03:06/17',
     '03:39/17',
     '03:50/17',
     '03:35/17',
     '03:44/17'
]


runtime_laplace_vi_real_world_eval = [
    '02:46/17',
    '03:28/18',
    '02:43/17',
     '03:15/17',
     '03:12/17',
     '03:19/17',
]

runtime_vi_auto_IAF_real_world_eval = [
    '04:04/17',
    '04:52/18',
    '04:02/17',
     '04:36/17',
     '05:00/17',
     '04:33/17',
     '04:43/17',
]

runtime_vi_autostructerd_IAF_real_world_eval = [
    '04:49/17',
    '07:14/18',
    '04:47/17',
    '06:52/17',
    '04:09/17',
    '06:19/17',
    '06:57/17',
]




In [48]:
runtime_lists = {
    "runtime_training": runtime_training,
    "runtime_synthetic_eval": runtime_synthetic_eval,
    "runtime_hmc_synthetic_eval": runtime_hmc_synthetic_eval,
    "runtime_vi_multivariate_normal_synthetic_eval": runtime_vi_multivariate_normal_synthetic_eval,
    "runtime_vi_diag_normal_synthetic_eval": runtime_vi_diag_normal_synthetic_eval,
    "runtime_laplace_vi_synthetic_eval": runtime_laplace_vi_synthetic_eval,
    "runtime_vi_auto_IAF_synthetic_eval": runtime_vi_auto_IAF_synthetic_eval,
    "runtime_vi_autostructerd_IAF_synthetic_eval": runtime_vi_autostructerd_IAF_synthetic_eval,
    "runtime_real_world_eval": runtime_real_world_eval,
    "runtime_hmc_real_world_eval": runtime_hmc_real_world_eval,
    "runtime_vi_multivariate_normal_real_world_eval": runtime_vi_multivariate_normal_real_world_eval,
    "runtime_vi_diag_normal_real_world_eval": runtime_vi_diag_normal_real_world_eval,
    "runtime_laplace_vi_real_world_eval": runtime_laplace_vi_real_world_eval,
    "runtime_vi_auto_IAF_real_world_eval": runtime_vi_auto_IAF_real_world_eval,
    "runtime_vi_autostructerd_IAF_real_world_eval": runtime_vi_autostructerd_IAF_real_world_eval

}


In [49]:
def convert_to_seconds(s):
    """
    Take an string of the form "1:12:45/100" and convert it to seconds
    """
    num_colons = s.count(":")

    if num_colons == 2:
        h, m, s = s.split(":")
        h = int(h)
        m = int(m)
        s, d= s.split("/")
        d = float(d)
        s = int(s)
        return (h * 3600 + m * 60 + s)/d
    
    if num_colons == 1:
        m, s = s.split(":")
        m = int(m)
        s, d = s.split("/")
        d = float(d)
        s = int(s)
        return (m * 60 + s)/d

    if num_colons == 0:
        s = s.replace("s", "")
        a, b = s.split("*")

        return float(a) * float(b)


runtime_lists = {k: [convert_to_seconds(x) for x in v] for k, v in runtime_lists.items()}

# convert everything to minutes

#runtime_lists = {k: [x/60 for x in v] for k, v in runtime_lists.items()}


In [50]:
runtime_lists

{'runtime_training': [54769.64964866638,
  46650.0,
  54450.0,
  54300.0,
  55050.0,
  54900.0,
  55350.0],
 'runtime_synthetic_eval': [43.65, 69.22, 29.25, 76.8, 40.29, 24.04, 50.1],
 'runtime_hmc_synthetic_eval': [84.74,
  195.14,
  102.38,
  157.62,
  107.14,
  50.72,
  130.62],
 'runtime_vi_multivariate_normal_synthetic_eval': [13.47,
  14.9,
  12.14,
  13.9,
  13.35,
  13.43,
  13.38],
 'runtime_vi_diag_normal_synthetic_eval': [11.54,
  13.0,
  10.06,
  11.7,
  11.45,
  11.49,
  11.5],
 'runtime_laplace_vi_synthetic_eval': [10.17,
  11.58,
  8.69,
  10.26,
  9.91,
  10.17,
  10.1],
 'runtime_vi_auto_IAF_synthetic_eval': [15.11,
  16.48,
  13.36,
  15.14,
  14.89,
  14.95,
  14.82],
 'runtime_vi_autostructerd_IAF_synthetic_eval': [17.59,
  24.76,
  16.2,
  23.66,
  17.28,
  16.71,
  17.48],
 'runtime_real_world_eval': [200.05882352941177,
  169.72222222222223,
  162.76470588235293,
  191.41176470588235,
  102.17647058823529,
  169.64705882352942,
  179.88235294117646],
 'runtime_hm

In [52]:
def conpute_stats(runtime_lists):
    stats = {}
    for k, v in runtime_lists.items():
        stats[k] = {
            "mean": sum(v)/len(v),
            "std": (sum([(x - sum(v)/len(v))**2 for x in v])/len(v))**0.5
        }
    return stats

conpute_stats(runtime_lists)

{'runtime_training': {'mean': 53638.52137838091, 'std': 2871.726534545249},
 'runtime_synthetic_eval': {'mean': 47.62142857142858,
  'std': 18.075816068178284},
 'runtime_hmc_synthetic_eval': {'mean': 118.33714285714287,
  'std': 44.196869221468525},
 'runtime_vi_multivariate_normal_synthetic_eval': {'mean': 13.51,
  'std': 0.7570242494700643},
 'runtime_vi_diag_normal_synthetic_eval': {'mean': 11.534285714285714,
  'std': 0.7891742002671823},
 'runtime_laplace_vi_synthetic_eval': {'mean': 10.125714285714285,
  'std': 0.7787693064400145},
 'runtime_vi_auto_IAF_synthetic_eval': {'mean': 14.964285714285714,
  'std': 0.8408984505168464},
 'runtime_vi_autostructerd_IAF_synthetic_eval': {'mean': 19.097142857142853,
  'std': 3.2769354773094},
 'runtime_real_world_eval': {'mean': 167.95191409897294,
  'std': 29.478508392303393},
 'runtime_hmc_real_world_eval': {'mean': 122.13585434173667,
  'std': 58.98912555575271},
 'runtime_vi_multivariate_normal_real_world_eval': {'mean': 13.8828197945845

In [51]:


df_runtime = pd.DataFrame(runtime_lists)

ValueError: All arrays must be of the same length