In [66]:
import re
from datetime import datetime

# Initialize a dictionary to hold the timestamps
model_training_times = {
    'SAITS': [],
    'Transformer': [],
    'TimesNet': [],
    'CSDI': [],
    'GPVAE': [],
    'USGAN': [],
    'BRITS': [],
    'MRNN': []
}

# Regex patterns for initialization and completion messages
init_pattern = r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}).* initialized with the given hyperparameters'
finish_pattern = r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}).* Finished training.'

# Store the current model being processed
current_model = None

# Path to the log file
log_file_path = 'ettm1.log'

# Read the file content
with open(log_file_path, 'r') as file:
    log_content = file.read()

# Split the log content into lines
lines = log_content.split('\n')

# Iterate through each line in the log
for line in lines:
    # Check for model initialization
    init_match = re.search(init_pattern, line)
    if init_match:
        timestamp = datetime.strptime(init_match.group(1), "%Y-%m-%d %H:%M:%S")
        for model in model_training_times.keys():
            if model in line:
                current_model = model
                model_training_times[model].append({'start': timestamp, 'end': None})
                break
    
    # Check for model training completion
    finish_match = re.search(finish_pattern, line)
    if finish_match and current_model:
        timestamp = datetime.strptime(finish_match.group(1), "%Y-%m-%d %H:%M:%S")
        if model_training_times[current_model]:
            model_training_times[current_model][-1]['end'] = timestamp
        current_model = None

# Calculate training durations and averages
model_durations = {}

for model, times in model_training_times.items():
    durations = []
    for time in times:
        if time['start'] and time['end']:
            duration = (time['end'] - time['start']).total_seconds() / 3600.0  # Convert to hours
            durations.append(duration)
    
    if durations:
        average_duration = sum(durations) / len(durations)
        model_durations[model] = {
            'durations': durations,
            'average_duration': average_duration
        }

# Display the results
for model, data in model_durations.items():
    print(f"Model: {model}")
    print(f"Training Durations (hours): {data['durations']}")
    print(f"Average Duration (hours): {data['average_duration']:.4f}h\n")


Model: SAITS
Training Durations (hours): [0.006388888888888889, 0.0033333333333333335, 0.008888888888888889, 0.008888888888888889, 0.006388888888888889]
Average Duration (hours): 0.0068h

Model: Transformer
Training Durations (hours): [0.004722222222222222, 0.005277777777777778, 0.004722222222222222, 0.005277777777777778, 0.0030555555555555557]
Average Duration (hours): 0.0046h

Model: TimesNet
Training Durations (hours): [0.0022222222222222222, 0.0022222222222222222, 0.0030555555555555557, 0.0025, 0.003611111111111111]
Average Duration (hours): 0.0027h

Model: CSDI
Training Durations (hours): [0.06333333333333334, 0.03805555555555556, 0.035555555555555556, 0.0425, 0.03111111111111111]
Average Duration (hours): 0.0421h

Model: GPVAE
Training Durations (hours): [0.008333333333333333, 0.0077777777777777776, 0.007222222222222222, 0.006944444444444444, 0.008333333333333333]
Average Duration (hours): 0.0077h

Model: USGAN
Training Durations (hours): [0.26944444444444443, 0.3238888888888889,

In [67]:
file_path = 'ettm1.log'
# file_path = 'air.log'
with open(file_path, 'r', encoding='utf-8') as file:
    content = file.read()

pattern = r'(\w+)\son\sdata_overlay_premask/{}:\sMAE=([0-9.]+)±([0-9.]+),\sMSE=([0-9.]+)±([0-9.]+)'.format('ettm1')
# pattern = r'(\w+)\son\sdata_overlay_premask/{}:\sMAE=([0-9.]+)±([0-9.]+),\sMSE=([0-9.]+)±([0-9.]+)'.format('air_quality')
matches = re.findall(pattern, content)

results = []
for match in matches:
    model, mae, mae_err, mse, mse_err = match
    formatted_result = f"{model}: MAE={float(mae):.3f}±{float(mae_err):.3f}, MSE={float(mse):.3f}±{float(mse_err):.3f}"
    results.append(formatted_result)

for result in results:
    print(result)

SAITS: MAE=0.166±0.016, MSE=0.054±0.011
Transformer: MAE=0.137±0.007, MSE=0.039±0.004
TimesNet: MAE=0.115±0.001, MSE=0.029±0.000
CSDI: MAE=0.133±0.011, MSE=0.042±0.007
GPVAE: MAE=0.289±0.004, MSE=0.178±0.007
USGAN: MAE=0.159±0.006, MSE=0.063±0.004
BRITS: MAE=0.141±0.003, MSE=0.057±0.002
MRNN: MAE=0.653±0.116, MSE=1.121±0.265
LOCF: MAE=0.142±0.000, MSE=0.079±0.000
Median: MAE=0.653±0.000, MSE=0.815±1.110
Mean: MAE=0.659±0.000, MSE=0.799±0.000
