In [1]:
MCPERF_FILENAME = '[replace_me]mcperf'
JOBS_FILENAME = '[replace_me]jobs'

In [50]:
import re
import pandas as pd
from datetime import datetime

def parse_mcperf(filename):
    with open(filename, 'r') as f:
        mcperf_start = f.readline().split(' ')[2]
        mcperf_end = f.readline().split(' ')[2]
        f.readline()
        
        substituted_lines = [re.sub(r"\s+", " ", line).strip()
                            for line in f.readlines()]
        headers = substituted_lines[0].split(" ")

        datapoints = []
        for line in substituted_lines[1:]:
            datapoint = dict(zip(headers, line.split()))
            datapoints.append(datapoint)

        mcperf_df = pd.DataFrame(data=datapoints)
        mcperf_df = mcperf_df.astype(
            {"#type": str, 'p95': float, 'QPS': float})
    return mcperf_start, mcperf_end, mcperf_df

def parse_jobs(filename):
    events = []
    with open(filename, 'r') as f:
        for line in f.readlines():
            columns = line.strip().split(' ')
            event = {
                'timestamp': datetime.fromisoformat(columns[0]),
                'type': columns[1],
                'job': columns[2]
            }
            print(event, columns)
            if event['type'] == 'start' and event['job'] != 'scheduler':
                event['initial_cores'] = columns[3].strip('[]').split(',')
                event['initial_threads'] = int(columns[4])
            elif event['type'] == 'update_cores':
                event['cores'] = columns[3].strip('[]').split(',')
            elif event['type'] == 'custom':
                event['comment'] = columns[3]
            events.append(event)
    jobs_df = pd.DataFrame(data=events)
    return jobs_df

In [51]:

for run in [1]: # FIXME: [1, 2, 3]
    mcperf_path = f'../{MCPERF_FILENAME}_{run}.txt'
    jobs_path = f'../{JOBS_FILENAME}_{run}.txt'

    mcperf_start, mcperf_end, mcperf_df = parse_mcperf(mcperf_path)
    jobs_df = parse_jobs(jobs_path)
    print(jobs_df.dtypes)
    

{'timestamp': datetime.datetime(2023, 4, 12, 11, 13, 46, 591828), 'type': 'start', 'job': 'scheduler'} ['2023-04-12T11:13:46.591828', 'start', 'scheduler']
{'timestamp': datetime.datetime(2023, 4, 12, 11, 13, 46, 591857), 'type': 'start', 'job': 'memcached'} ['2023-04-12T11:13:46.591857', 'start', 'memcached', '[0]', '3']
{'timestamp': datetime.datetime(2023, 4, 12, 11, 14, 31, 663978), 'type': 'start', 'job': 'canneal'} ['2023-04-12T11:14:31.663978', 'start', 'canneal', '[1,2,3]', '8']
{'timestamp': datetime.datetime(2023, 4, 12, 11, 14, 40, 976039), 'type': 'pause', 'job': 'canneal'} ['2023-04-12T11:14:40.976039', 'pause', 'canneal']
{'timestamp': datetime.datetime(2023, 4, 12, 11, 15, 12, 245838), 'type': 'update_cores', 'job': 'memcached'} ['2023-04-12T11:15:12.245838', 'update_cores', 'memcached', '[2,3]']
{'timestamp': datetime.datetime(2023, 4, 12, 11, 15, 23, 687179), 'type': 'update_cores', 'job': 'canneal'} ['2023-04-12T11:15:23.687179', 'update_cores', 'canneal', '[0,1,2,3]'