In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [None]:
# Read the memory log
memory = pd.read_csv('../dpss_memory_log.txt', sep=r'\s+', skiprows=1, header=None)

In [None]:
memory.columns = ['date', 'time', 'bytes', 'size', 'size_unit']
memory = memory.drop(['size', 'size_unit'], axis=1).set_index(pd.to_datetime(memory['date'] + ' ' + memory['time'])).drop(['date', 'time'], axis=1)

In [None]:
memory.head()

In [None]:
# Plot with time as X-axis
memory.plot(figsize=(20,6))

In [None]:
main_log = pd.read_csv('../dpss_log.txt', sep='\0', header=None, squeeze=True).str.split(' ', n=2, expand=True)
main_log.columns = ['date', 'time', 'message']
main_log = main_log[main_log['date'].str.match(r'\d{4}-\d{2}-\d{2}')]
main_log = main_log.set_index(pd.to_datetime(main_log['date'] + ' ' + main_log['time'])).drop(['date', 'time'], axis=1)

# drop duplcated time indices
main_log = main_log[~main_log.index.duplicated(keep='first')]
main_log.head()

In [None]:
# Filter entries before/after last memory interval
start, stop = memory.index.min(), memory.index.max()
main_log = main_log[main_log.index.map(lambda dt: start <= dt <= stop)]

In [None]:
indices = [(dt, memory.index.searchsorted(dt)) for dt in main_log.index]
points = {x: memory.iloc[i]['bytes'] for (x, i) in indices}
annotations = {x: main_log.loc[x, 'message'] for x in points}

In [None]:
# Add log annotations
memory.plot(figsize=(30,6))
for (i, (x, y)) in enumerate(points.items()):
    plt.plot(x, y, label=f'{i}. {annotations[x]}', c='red')
    plt.axvline(x=x, color='red')
    plt.annotate(xy=(x,y), s=str(i), ha='center')
plt.legend(bbox_to_anchor=(0.75,-0.15))