In [1]:
import os
from datetime import datetime, timedelta
import pandas as pd

In [2]:
current_dir = os.getcwd()

config_name_pattern = 'setr_vit-l'

def listdir(*rel_paths):
    return os.listdir(os.path.join(current_dir, *rel_paths))

def isdir(*rel_paths):
    return os.path.isdir(os.path.join(current_dir, *rel_paths))


In [3]:
work_dirs = os.listdir()
print(work_dirs)
matching_work_dirs = filter(lambda x: config_name_pattern in x, work_dirs)

['setr_vit-l_pup-noaux_8xb2-160k_ade20k-512x512', 'setrvpt_vit-b-224_pup-noaux_8xb2-160k_ade20k-512x512', 'setrvpt_vit-l_pup-noaux_8xb2-160k_ade20k-512x512', 'setrvpt_vit-l_pup_8xb2-160k_ade20k-512x512', 'setrvpt_vit-l_pup_8xb2-160k_cbis-ddsm-binary-512x512', 'setr_vit-l_pup_8xb2-160k_ade20k-512x512', 'setrvpt_vit-l_pup_1xb16-160k_ade20k-512x512', 'results.ipynb', 'setr_vit-b-224_pup-noaux_8xb2-160k_ade20k-512x512']


In [4]:
logfiles = []
for work_dir in matching_work_dirs:
    for run_dir in listdir(work_dir):
        if not isdir(work_dir, run_dir):
            continue
        logfile = filter(lambda x: '.log' in x, listdir(work_dir, run_dir)).__next__()
        logfiles.append(os.path.join(work_dir, run_dir, logfile))
print(logfiles)



StopIteration: 

In [65]:
def parse_train_line(line):
    parsed_line = {}
    t_str = line.split('- mmengine -')[0].strip()
    t = datetime.strptime(t_str, '%Y/%m/%d %H:%M:%S')
    iteration_str = line.split('Iter(train) [')[1].split(']')[0].strip().split('/')[0]
    iteration = int(iteration_str)
    
    parsed_line['at_iteration'] = iteration
    parsed_line['time'] = t
    parsed_line['type'] = 'train'
    return parsed_line
    

def parse_val_line(line):
    parsed_line = {}
    t_str = line.split('- mmengine -')[0].strip()
    t = datetime.strptime(t_str, '%Y/%m/%d %H:%M:%S')
    iteration_str = line.split('Iter(val) [')[1].split(']')[0].strip().split('/')[0]
    iteration = int(iteration_str)
    
    parsed_line['at_iteration'] = iteration
    parsed_line['time'] = t
    parsed_line['type'] = 'val'
    return parsed_line

def calculate_time_after(parsed_lines):
    for i, line in enumerate(parsed_lines):
        line['duration_after'] = timedelta(0)
        if i < len(parsed_lines) - 1:
            line['duration_after'] = parsed_lines[i+1]['time'] - line['time']

def calculate_time_before(parsed_lines):
    for i, line in enumerate(parsed_lines):
        line['duration_before'] = timedelta(0)
        if i > 0:
            line['duration_before'] = line['time'] - parsed_lines[i-1]['time']
        
def calculate_inbetween_times(parsed_lines):
    calculate_time_after(parsed_lines)
    calculate_time_before(parsed_lines)

def calculate_iterations_per_line(parsed_lines):
    for i, line in enumerate(parsed_lines):
        if i > 0 and line['type'] == parsed_lines[i-1]['type']:
            line['iterations'] = line['at_iteration'] - parsed_lines[i-1]['at_iteration']
            continue
        line['iterations'] = line['at_iteration']

def calculate_duration_per_iteration(parsed_lines):
    for line in parsed_lines:
        line['duration_per_iteration'] = line['duration_before'] / line['iterations']        
    

def parse_logfile(rel_log_path):
    parsed_lines = []
    train_pattern = "Iter(train)"
    val_pattern = "Iter(val)"
    with open(os.path.join(current_dir, rel_log_path), 'r') as f:
        lines = f.readlines()
        for line in lines:
            parsed_line = None
            if train_pattern in line:
                parsed_line = parse_train_line(line)
            if val_pattern in line:
                parsed_line = parse_val_line(line)
            if parsed_line is None:
                continue
            parsed_lines.append(parsed_line)
    # for now, let's filter out lines that report the same iteration as a previous line
    parsed_lines = [parsed_line for i, parsed_line in enumerate(parsed_lines) if i == 0 or parsed_line['at_iteration'] != parsed_lines[i-1]['at_iteration']]
    calculate_inbetween_times(parsed_lines)
    calculate_iterations_per_line(parsed_lines)
    calculate_duration_per_iteration(parsed_lines)
    return parsed_lines
            
parsed_logfiles = {}
for logfile in logfiles:
    parsed_logfiles[logfile] = parse_logfile(logfile)
    


In [66]:

for logfile in logfiles:
    parsed_logfile = parsed_logfiles[logfile]
    if len(parsed_logfile) == 0:
        continue
    df = pd.DataFrame(parsed_logfiles[logfile])
    display(df)
    aggregate_df = df.groupby('type').agg({'duration_after': ['sum', 'mean', 'std'], 'duration_before': ['sum', 'mean', 'std'], 'iterations': ['sum', 'mean', 'std'], 'duration_per_iteration': ['mean', 'std']})
    display(aggregate_df)

    # percentage of time spent on training vs validation:
    print('percentage of time spent on training vs validation:')
    print(aggregate_df['duration_before']['sum'] / aggregate_df['duration_before']['sum'].sum())


Unnamed: 0,at_iteration,time,type,duration_after,duration_before,iterations,duration_per_iteration
0,50,2023-10-23 14:02:39,train,0 days 00:00:57,0 days 00:00:00,50,0 days 00:00:00
1,100,2023-10-23 14:03:36,train,0 days 00:00:58,0 days 00:00:57,50,0 days 00:00:01.140000
2,150,2023-10-23 14:04:34,train,0 days 00:00:57,0 days 00:00:58,50,0 days 00:00:01.160000
3,200,2023-10-23 14:05:31,train,0 days 00:00:56,0 days 00:00:57,50,0 days 00:00:01.140000
4,250,2023-10-23 14:06:27,train,0 days 00:00:57,0 days 00:00:56,50,0 days 00:00:01.120000
...,...,...,...,...,...,...,...
3295,300,2023-10-25 16:54:35,val,0 days 00:00:06,0 days 00:00:08,50,0 days 00:00:00.160000
3296,350,2023-10-25 16:54:41,val,0 days 00:00:08,0 days 00:00:06,50,0 days 00:00:00.120000
3297,400,2023-10-25 16:54:49,val,0 days 00:00:07,0 days 00:00:08,50,0 days 00:00:00.160000
3298,450,2023-10-25 16:54:56,val,0 days 00:00:07,0 days 00:00:07,50,0 days 00:00:00.140000


Unnamed: 0_level_0,duration_after,duration_after,duration_after,duration_before,duration_before,duration_before,iterations,iterations,iterations,duration_per_iteration,duration_per_iteration
Unnamed: 0_level_1,sum,mean,std,sum,mean,std,sum,mean,std,mean,std
type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
train,2 days 02:32:47,0 days 00:00:56.864687500,0 days 00:00:02.158056626,2 days 02:38:12,0 days 00:00:56.966250,0 days 00:00:01.182390725,880000,275.0,4770.375918,0 days 00:00:01.136046960,0 days 00:00:00.064732105
val,0 days 00:19:37,0 days 00:00:11.770000,0 days 00:00:14.779767087,0 days 00:14:12,0 days 00:00:08.520000,0 days 00:00:04.083670363,5000,50.0,0.0,0 days 00:00:00.170400,0 days 00:00:00.081673407


percentage of time spent on training vs validation:
type
train    0.995348
val      0.004652
Name: sum, dtype: float64
