# Collectl Log Analysis

## Functionalities
- Plot CPU utilization graphs.
- Plot memory utilization graphs.
- Plot disk I/O utilization graphs.

## Input
Log files are read from a directory in `../data`. This directory is assumed to have the following structure:
```
logs/
  [node-1]/
    collectl.tar.gz
  ...
  [node-n]/
    collectl.tar.gz
```
A tarball `collectl.tar.gz` contains log files. The log file extension identifies the type of resource monitored:
- `.cpu.gz`: CPU monitoring log file.
- `.numa.gz`: memory monitoring log file.
- `.dsk.gz`: disk I/O monitoring log file.

## Notebook Configuration

In [None]:
########## GENERAL
# Name of the directory in `../data`
EXPERIMENT_DIRNAME = "BuzzBlogBenchmark_2021-11-11-16-06-13"

########## CPU
# Analyzed metric (options: "user", "nice", "system", "wait", "irq", "soft",
# "steal", "idle", "total", "guest", "guest_n", "intrpt")
COLLECTL_CPU_METRIC = "total"
# List of core numbers to be analyzed
COLLECTL_CPU_CORES = range(0, 8)

########## MEMORY
# Analyzed metric (options: "used", "free", "slab", "mapped", "anon", "anonh", "inactive", "hits")
COLLECTL_MEM_METRIC = "free"

########## DISK I/O
# Analyzed metric (options: "reads", "rmerge", "rkbytes", "waitr", "writes", "wmerge", "wkbytes", "waitw", "request",
# "quelen", "wait", "svctim", "util")
COLLECTL_DSK_METRIC = "quelen"

## Notebook Setup

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import os
import pandas as pd
import sys
import warnings
warnings.filterwarnings("ignore")

sys.path.append(os.path.abspath(os.path.join("..")))
from parsers.collectl_parser import CollectlParser
from utils.utils import *

## Log Parsing

In [None]:
# Build data frames
cpu = pd.concat([
    pd.DataFrame.from_dict(CollectlParser(logfile, "cpu").parse()).assign(node_name=node_name)
    for node_name, logfile in get_collectl_cpu_logfiles(EXPERIMENT_DIRNAME)
], ignore_index=True)
mem = pd.concat([
    pd.DataFrame.from_dict(CollectlParser(logfile, "mem").parse()).assign(node_name=node_name)
    for node_name, logfile in get_collectl_mem_logfiles(EXPERIMENT_DIRNAME)
], ignore_index=True)
dsk = pd.concat([
    pd.DataFrame.from_dict(CollectlParser(logfile, "dsk").parse()).assign(node_name=node_name)
    for node_name, logfile in get_collectl_dsk_logfiles(EXPERIMENT_DIRNAME)
], ignore_index=True)

In [None]:
# Filter data frames
start_time = get_experiment_start_time(EXPERIMENT_DIRNAME)
cpu = cpu[(cpu["timestamp"] > start_time) & (cpu["hw_metric"] == COLLECTL_CPU_METRIC) &
        (cpu["hw_no"].isin(COLLECTL_CPU_CORES))]
mem = mem[(mem["timestamp"] > start_time) & (mem["hw_metric"] == COLLECTL_MEM_METRIC)]
dsk = dsk[(dsk["timestamp"] > start_time) & (dsk["hw_metric"] == COLLECTL_DSK_METRIC)]

In [None]:
# (Re) Build columns
cpu["timestamp"] = cpu.apply(lambda r: (r["timestamp"] - start_time).total_seconds(), axis=1)
mem["timestamp"] = mem.apply(lambda r: (r["timestamp"] - start_time).total_seconds(), axis=1)
dsk["timestamp"] = dsk.apply(lambda r: (r["timestamp"] - start_time).total_seconds(), axis=1)

In [None]:
# (Re) Create index
cpu.set_index("timestamp", inplace=True)
mem.set_index("timestamp", inplace=True)
dsk.set_index("timestamp", inplace=True)

In [None]:
# Get values
node_names = get_node_names(EXPERIMENT_DIRNAME)

## CPU Monitoring

In [None]:
########## LOCAL CONFIG
# Minimum time (in seconds)
MIN_TIME = None
# Maximum time (in seconds)
MAX_TIME = None

# Plot CPU utilization
fig = plt.figure(figsize=(24, len(node_names) * 12))
for (i, node_name) in enumerate(node_names):
    df = cpu[(cpu["node_name"] == node_name)]
    if MIN_TIME:
        df = df[(df["timestamp"] >= MIN_TIME)]
    if MAX_TIME:
        df = df[(df["timestamp"] <= MAX_TIME)]
    df = df.groupby(["timestamp", "hw_no"])["value"].mean()
    df = df.unstack()
    ax = fig.add_subplot(len(node_names), 1, i + 1)
    ax.set_xlim((df.index.min(), df.index.max()))
    ax.set_ylim((0, 100))
    ax.grid(alpha=0.75)
    df.plot(ax=ax, kind="line", title="%s - CPU Utilization" % node_name, xlabel="Time (seconds)",
            ylabel="%s (%%)" % COLLECTL_CPU_METRIC, grid=True, legend=False, yticks=range(0, 101, 10))

## Memory Monitoring

In [None]:
########## LOCAL CONFIG
# Minimum time (in seconds)
MIN_TIME = None
# Maximum time (in seconds)
MAX_TIME = None

# Plot memory utilization
fig = plt.figure(figsize=(24, len(node_names) * 12))
for (i, node_name) in enumerate(node_names):
    df = mem[(mem["node_name"] == node_name)]
    if MIN_TIME:
        df = df[(df["timestamp"] >= MIN_TIME)]
    if MAX_TIME:
        df = df[(df["timestamp"] <= MAX_TIME)]
    df = df.groupby(["timestamp", "hw_no"])["value"].mean()
    df = df.unstack()
    ax = fig.add_subplot(len(node_names), 1, i + 1)
    ax.set_xlim((df.index.min(), df.index.max()))
    ax.set_ylim((0, df.values.max()))
    ax.grid(alpha=0.75)
    df.plot(ax=ax, kind="line", title="%s - Mem Utilization" % node_name, xlabel="Time (seconds)",
            ylabel="%s" % COLLECTL_MEM_METRIC, grid=True)

## Disk Monitoring

In [None]:
########## LOCAL CONFIG
# Minimum time (in seconds)
MIN_TIME = None
# Maximum time (in seconds)
MAX_TIME = None

# Plot disk I/O utilization
fig = plt.figure(figsize=(24, len(node_names) * 12))
for (i, node_name) in enumerate(node_names):
    df = dsk[(dsk["node_name"] == node_name)]
    if MIN_TIME:
        df = df[(df["timestamp"] >= MIN_TIME)]
    if MAX_TIME:
        df = df[(df["timestamp"] <= MAX_TIME)]
    df = df.groupby(["timestamp", "hw_no"])["value"].mean()
    df = df.unstack()
    ax = fig.add_subplot(len(node_names), 1, i + 1)
    ax.set_xlim((df.index.min(), df.index.max()))
    ax.set_ylim((0, df.values.max()))
    ax.grid(alpha=0.75)
    df.plot(ax=ax, kind="line", title="%s - Disk I/O Utilization" % node_name, xlabel="Time (seconds)",
            ylabel="%s" % COLLECTL_DSK_METRIC, grid=True)