In [1]:
import re
from pathlib import Path

In [2]:
logs_path = Path("logs")

In [24]:
log_files = sorted([f for f in logs_path.iterdir() if f.is_file() and f.suffix == ".log"])

In [51]:
main_program_pattern = re.compile(r"(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}) \| INFO     \| __main__:run_evaluation:42 - Models: (.*) \| Tasks: (.*) \| Prompting: (.*)")
loop_combination_pattern = re.compile(r"(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}) \| INFO     \| __main__:run_evaluation:78 - Running (.*) on (.*) with (.*)")
model_start_pattern = re.compile(r"(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}) \| INFO     \| __main__:run_evaluation:106 - Running (.*)")
skip_evaluation_pattern = re.compile(r"(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}) \| INFO     \| __main__:run_evaluation:110 - Skipping (\d+)") # OR
start_prompt_pattern = re.compile(r"(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}) \| DEBUG    \| __main__:run_evaluation:116 - (.*)")
inference_start_pattern = re.compile(r"(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}) \| INFO     \| src.llms.(?:.*):inference:17 - Generating response from (.*)")

no_response_pattern = re.compile(r"(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}) \| DEBUG    \| src.llms.(?:.*):inference:23 - No response generated, returning empty string") # OR
start_answer_pattern = re.compile(r"(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}) \| DEBUG    \| src.llms.(?:.*):inference:25 - (.*)")
metadata_pattern = re.compile(r"(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+) \| SUCCESS  \| src.llms.(?:.*):inference:26 - Response generated from (.*)")

failed_extraction_pattern = re.compile(r"(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+) \| DEBUG    \| src.tasks.(?:.*):evaluate:76 - Could not extract prediction from response") # OR
failed_extraction_empty_pattern = re.compile(r"(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+) \| DEBUG    \| src.tasks.(?:.*):evaluate:55 - Could not extract prediction from response as response is empty")
prediction_pattern = re.compile(r"(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+) \| DEBUG    \| src.tasks.(?:.*):evaluate:74 - Prediction: (.*), Answer: (.*)")

current_progress_pattern = re.compile(r"(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+) \| INFO     \| __main__:run_evaluation:140 - Correct: (\d+)/(\d+) \((\d+\.\d+)%\)")

In [None]:
with open(log_files[0], "r") as f:
    log_lines = f.read().splitlines()
    current_model = None
    current_task = None
    current_prompting = None
    current_content = None
    in_content_loop = False
    obj = {
        "prompt": None,
        
    }
    counter = 0
    for line in log_lines[:100]:
        main_match = main_program_pattern.match(line)
        loop_match = loop_combination_pattern.match(line)
        if main_match:
            print("=> Main match")
            log_time, models, tasks, prompting = main_match.groups()
            models = models.split(", ")
            tasks = tasks.split(", ")
            prompting = prompting.split(", ")
        elif loop_match:
            print("=> Loop match")
            log_time, model, task, prompting = loop_match.groups()
            current_model = model
            current_task = task
            current_prompting = prompting
        elif model_start_pattern.match(line):
            print("=> [Model start match] " + line)
            log_time, model = model_start_pattern.match(line).groups()
            current_model = model
            is_current_a_prompt = False
            is_current_a_response = False
        elif skip_evaluation_pattern.match(line):
            pass
        elif start_prompt_pattern.match(line):
            print("=> [Prompt match] " + line)
            log_time, content = start_prompt_pattern.match(line).groups()
            current_content = content + "\n"
            in_content_loop = True
            is_current_a_prompt = True
        elif inference_start_pattern.match(line):
            print("=> [Inference match] " + line)
            log_time, model = inference_start_pattern.match(line).groups()
            in_content_loop = False
            is_current_a_prompt = False
        elif start_answer_pattern.match(line):
            print("=> [Answer match] " + line)
            log_time, content = start_answer_pattern.match(line).groups()
            current_content = content + "\n"
            in_content_loop = True
            is_current_a_response = True
        elif metadata_pattern.match(line):
            print("=> [Metadata match] " + line)
            log_time, model = metadata_pattern.match(line).groups()
            in_content_loop = False
            is_current_a_response = False
        elif no_response_pattern.match(line):
            print("=> [No response match] " + line)
            log_time = no_response_pattern.match(line).groups()[0]
            in_content_loop = False
            is_current_a_response = False
        elif failed_extraction_pattern.match(line):
            print("=> [Failed extraction match] " + line)
            log_time = failed_extraction_pattern.match(line).groups()[0]
            in_content_loop = False
            is_current_a_response = False
        elif failed_extraction_empty_pattern.match(line):
            print("=> [Failed extraction empty match] " + line)
            log_time = failed_extraction_empty_pattern.match(line).groups()[0]
            in_content_loop = False
            is_current_a_response = False
        elif prediction_pattern.match(line):
            print("=> [Prediction match] " + line)
            log_time, prediction, answer = prediction_pattern.match(line).groups()
            in_content_loop = False
            is_current_a_response = False
        elif current_progress_pattern.match(line):
            print("=> [Progress match] " + line)
            log_time, correct, total, percentage = current_progress_pattern.match(line).groups()
            in_content_loop = False
            is_current_a_response = False
        elif in_content_loop:
            print("=> [ADD] " + line)
            current_content += line + "\n"
        else:
            raise Exception("Unknown line: " + line)