In [203]:
import re
from collections import defaultdict
from dataclasses import dataclass
from datetime import datetime, timedelta
import gzip

In [209]:
entry_pattern = re.compile(
    r"\[(?P<asctime>.+?)\]\s+\[(?P<name>.{16})\]\s+\[(?P<levelname>.{9})\]\s+(?P<message>.+?)",
    flags=re.DOTALL,
)

@dataclass
class EntryData:
    time: datetime
    logger_name: str
    log_level_name: str
    message: str

In [174]:
@dataclass
class Interval:
    start: datetime | None = None
    end: datetime | None = None
    
    @property
    def duration(self) -> timedelta:
        return self.end - self.start
    
    def update(self, other: Interval):
        if self.start is None or self.start > other.start:
            self.start = other.start

        if self.end is None or self.end < other.end:
            self.end = other.end
        
        return self

In [210]:
def parse_log_file(file_path: str):
    with gzip.open(file_path) as file_handle:
        lines = file_handle.readlines()
        
    entries = list()

    for line in lines:
        line = line.decode()
        
        if line.startswith("["):
            entries.append(line)
        elif line.startswith("│") or line.startswith("└"):
            entries[-1] += line
        else:
            raise ValueError()
            
    entry_data = list()

    for entry in entries:
        m = entry_pattern.fullmatch(entry).groupdict()

        assert m is not None

        time = datetime.strptime(m["asctime"] + "00", "%Y-%m-%d %H:%M:%S,%f")

        logger_name = m["name"].strip()
        log_level_name = m["levelname"].strip()

        message_lines = m["message"].splitlines()
        message = message_lines[0] + "".join([line[1:] for line in message_lines[1:]])    

        entry_data.append(EntryData(
            time=time,
            logger_name=logger_name,
            log_level_name=log_level_name,
            message=message,
        ))
        
    return entry_data

In [211]:
log_paths = [f"log.{i}.txt.gz" for i in [*range(3), "3in1"]]

durations = []

for log_path in log_paths:
    log = parse_log_file(log_path)
    
    duration = Interval(start=log[0].time, end=log[-1].time).duration
    durations.append(duration)
    
    print(log_path, duration)

log.0.txt.gz 1:39:39.995800
log.1.txt.gz 1:36:11.078000
log.2.txt.gz 1:33:45.012900
log.3in1.txt.gz 1:43:29.991900


In [212]:
print(sum(durations[:3], timedelta()))

4:49:36.086700
