In [1]:
import os
import re
from datetime import datetime
from pathlib import Path
import pandas as pd
from loguru import logger
os.chdir('..')

In [2]:
log_path = Path('log')

BORF_ID = 987820
BORF_0_ID = 989914
MCELS_ID = 990458
GLACIER_ID = 984423

In [26]:
logger_marks = ['INFO', 'WARNING']
time_format = r"%Y-%m-%d %H:%M:%S"

def is_log(line):
    for mark in logger_marks:
        if mark in line:
            return True
    return False

def is_borf_succes_log(line):
    if 'Success after' in line:
        return True
    else:
        return False
    
def get_effective_ter(line):
    print(line)
    total_iter = re.findall('after (\d+) ', line)[0]
    discarded_iter = re.findall('in.?luding (\d+) ', line)[0]
    
    return int(total_iter) - int(discarded_iter)

def get_avg_effective_iter(lines):
    lines = lines.split('\n')
    lines = filter(is_borf_succes_log, lines)
    iters = map(get_effective_ter, lines)
    iters = list(iters)
    if len(iters) == 0:
        return -1
    return sum(iters) / len(iters)

def get_timestmap(line):
    timestamp_pattern = r"(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})"
    timestamps = re.findall(timestamp_pattern, line)
    if len(timestamps) == 0:
        return None
    else:
        return timestamps[0]
    
def is_borf_cnt_line(line):
    if 'borf.explainer.borf:counterfactual' in line:
        return True
    else:
        return False
    
def total_runtime(log_lines, is_borf=False):

    filtered_logs = filter(is_log, log_lines.split('\n'))
    log_with_timestamps = list(map(lambda x: (x, get_timestmap(x)), filtered_logs))
    log_with_timestamps = list(filter(lambda x: x[1] is not None, log_with_timestamps))
    
    if is_borf:
        borf_logs_with_timestamps = list(filter(lambda x: is_borf_cnt_line(x[0]), log_with_timestamps))
        
    dt1 = datetime.strptime(log_with_timestamps[0][1], time_format)
    dt2 = datetime.strptime(log_with_timestamps[-1][1], time_format)
    
    runtime = dt2 - dt1
    
    if is_borf:
        dt1_borf = datetime.strptime(borf_logs_with_timestamps[0][1], time_format)
        dt2_borf = datetime.strptime(borf_logs_with_timestamps[-1][1], time_format)
        n_trails = 5
        borf_trial_overhead = (dt2_borf - dt1_borf) * (n_trails - 1) / n_trails
        
        runtime = runtime - borf_trial_overhead
        
    runtime = f"{runtime.seconds // 3600:02}:{(runtime.seconds % 3600) // 60:02}:{runtime.seconds % 60:02}"
        
    return runtime
    
    

In [27]:
res = []
borf_iter = []
for path in log_path.glob(f'*{BORF_ID}*.log'):
    logger.info(path)
    with open(path, 'r') as f:
        loglines = f.read()
    res.append((path.stem, total_runtime(loglines, is_borf=True), r'MASCOTS, $\lambda=0.1$'))
    borf_iter.append((path.stem, get_avg_effective_iter(loglines), r'MASCOTS, $\lambda=0.1$'))
    
for path in log_path.glob(f'*{BORF_0_ID}*.log'):
    logger.info(path)
    with open(path, 'r') as f:
        loglines = f.read()
    res.append((path.stem, total_runtime(loglines, is_borf=True), r'MASCOTS, $\lambda=0.0$'))
    borf_iter.append((path.stem, get_avg_effective_iter(loglines), r'MASCOTS, $\lambda=0.0$'))
    
for path in log_path.glob(f'*{MCELS_ID}*.log'):
    logger.info(path)
    with open(path, 'r') as f:
        loglines = f.read()
    res.append((path.stem, total_runtime(loglines, is_borf=False), 'M-CELS'))
    
for path in log_path.glob(f'*{GLACIER_ID}*.log'):
    logger.info(path)
    with open(path, 'r') as f:
        loglines = f.read()
    res.append((path.stem, total_runtime(loglines, is_borf=False), 'Glacier'))

[32m2025-03-13 15:36:29.134[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mlog/create_borf_987820_1.log[0m
[32m2025-03-13 15:36:29.136[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mlog/create_borf_987820_4.log[0m
[32m2025-03-13 15:36:29.138[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mlog/create_borf_987820_0.log[0m
[32m2025-03-13 15:36:29.140[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mlog/create_borf_987820_8.log[0m
[32m2025-03-13 15:36:29.141[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mlog/create_borf_987820_12.log[0m
[32m2025-03-13 15:36:29.144[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mlog/create_borf_987820_7.log[0m
[32m2025-03-13 15:36:29.146[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mlog/create_borf_987820_9.log[0m
[32m2025-03-13 15:

2025-03-07 13:07:36.763 | INFO     | borf.explainer.borf:counterfactual:221 - Success after 6 iterations (inluding 4 failed)
2025-03-07 13:07:46.574 | INFO     | borf.explainer.borf:counterfactual:221 - Success after 5 iterations (inluding 3 failed)
2025-03-07 13:08:02.027 | INFO     | borf.explainer.borf:counterfactual:221 - Success after 7 iterations (inluding 5 failed)
2025-03-07 13:08:17.962 | INFO     | borf.explainer.borf:counterfactual:221 - Success after 7 iterations (inluding 4 failed)
2025-03-07 13:08:20.214 | INFO     | borf.explainer.borf:counterfactual:221 - Success after 1 iterations (inluding 0 failed)
2025-03-07 13:08:26.017 | INFO     | borf.explainer.borf:counterfactual:221 - Success after 3 iterations (inluding 0 failed)
2025-03-07 13:08:37.220 | INFO     | borf.explainer.borf:counterfactual:221 - Success after 5 iterations (inluding 0 failed)
2025-03-07 13:08:45.377 | INFO     | borf.explainer.borf:counterfactual:221 - Success after 3 iterations (inluding 1 failed)


In [37]:
borf_res = pd.DataFrame(borf_iter)
borf_res.columns = ['filename', 'avg_iter', 'method']
borf_res['data_id'] = borf_res['filename'].str.extract(r'\_(\d+)$').astype(int)
borf_res['type'] = borf_res['data_id'].apply(lambda x: 'univariate' if x <= 5 else 'multivariate')
borf_res[['avg_iter', 'method', 'type']].groupby(['method', 'type']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,avg_iter
method,type,Unnamed: 2_level_1
"MASCOTS, $\lambda=0.0$",multivariate,2.360419
"MASCOTS, $\lambda=0.0$",univariate,2.685162
"MASCOTS, $\lambda=0.1$",multivariate,1.749449
"MASCOTS, $\lambda=0.1$",univariate,2.993321


In [38]:
borf_res[['avg_iter', 'method', 'type']].groupby(['method', 'type']).std()

Unnamed: 0_level_0,Unnamed: 1_level_0,avg_iter
method,type,Unnamed: 2_level_1
"MASCOTS, $\lambda=0.0$",multivariate,3.645745
"MASCOTS, $\lambda=0.0$",univariate,1.287684
"MASCOTS, $\lambda=0.1$",multivariate,2.720214
"MASCOTS, $\lambda=0.1$",univariate,0.988047


In [5]:
df = pd.DataFrame(res)
df.columns = ['filename', 'runtime', 'ID']
df['data_id'] = df['filename'].str.extract(r'\_(\d+)$').astype(int)

In [6]:
summary = df.pivot(index='data_id', columns='ID', values='runtime')
summary.sort_index()

ID,Glacier,M-CELS,"MASCOTS, $\lambda=0.0$","MASCOTS, $\lambda=0.1$"
data_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,00:26:27,01:10:45,00:07:50,00:11:41
1,00:24:52,01:08:52,00:09:30,00:17:12
2,00:23:29,01:28:53,00:23:17,00:15:40
3,00:13:54,00:31:21,00:08:20,00:04:35
4,00:40:19,00:53:58,00:11:14,00:06:38
5,00:25:49,00:44:51,00:08:50,00:06:35
6,,00:52:22,00:52:33,00:50:45
7,,00:39:18,00:49:13,00:28:18
8,,01:12:30,00:52:57,00:51:01
9,,00:52:18,00:49:02,00:29:25


In [11]:
from experiments.data.data import UNI_DATASETS, MULTI_DATASETS
import numpy as np
datasets = UNI_DATASETS + MULTI_DATASETS

In [13]:
df['data-name'] = np.array(datasets)[df['data_id']]
df['method'] = df['ID']
df.to_csv('runtimes.csv', index=False)