# fun_alloc_forever experiment

In [None]:
# *NOTE*: run this command to clean output cell and meta data.
# $ nb-clean clean ./measure_malloc_stat.ipynb 

In [None]:
# generate html
# $ python ./convert_nb.py --filename ./measure_malloc_stat.ipynb --execute

In [None]:
# TODO
# title print
# more descriptions
# move to package modules


In [None]:
import json
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from typing import Tuple
import yaml
import json
import os
import logging
import requests
from bs4 import BeautifulSoup
import re
from collections import Counter

import seaborn as sns


from typing import Iterable, Any, List, Optional, Union, Callable, TextIO, Dict, Tuple

from utils import *
from utils_fod import *
from utils_plot import *
from convert_nb import generate_report

%matplotlib inline

import matplotlib.pyplot as plt
plt.rcParams.update({'font.size': 22})

#https://stackoverflow.com/questions/36288670/how-to-programmatically-generate-markdown-output-in-jupyter-notebooks
from IPython.display import display, Markdown, Latex

pd.set_option('max_colwidth', 800)

def printmd(string):  ###
    display(Markdown(string))  ###

In [None]:
SHOW_MALLOC_FOREVER_STAT = True
SHOW_HU_INIT_STAT = False
GENERATE_HTML = True

In [None]:

# measurement of malloc time over 160 vps
# CONFIG_FILE = "configs/measure_malloc_stat_cluster_wp2_160vps.yml"

# measurement of spinlock wait time with benchmark and cluster wp 160 vps
CONFIG_FILE = "configs/measure_malloc_stat_cluster_spinlock_wait_160vp.yml"

# measurement of spinlock wait time with benchmark and cluster wp
# CONFIG_FILE = "configs/measure_malloc_stat_cluster_spinlock_wait.yml"

# measurement using cluster wp over more number VPs
# CONFIG_FILE = "configs/measure_malloc_stat_cluster_wp2.yml"

# measurement using cluster wp
# CONFIG_FILE = "configs/measure_malloc_stat_cluster_wp.yml"

# initial measurement
# CONFIG_FILE = "configs/measure_malloc_stat.yml"

# WDT_LONG_MSG = "WDT: Long-running WU warning"

In [None]:
note_str = "Config file: {}".format(CONFIG_FILE)
printmd(note_str)

In [None]:
config = read_config(CONFIG_FILE)

format_patch = config["format_patch"] if "format_patch" in config else True

In [None]:
note_str = "Here are the fod job links used for testing. "
printmd(note_str)

collected_jobs = fod_extract_pages(config)

In [None]:
def generate_summary(collected_jobs: List[Dict[str, Any]], pattern: str, patch: bool=False, plot_summary: bool=True, plot_hist: bool=True, skip_summary_state: bool=False) -> List[pd.DataFrame]:
    dfs = []
    for job in collected_jobs:
        console_response = job["console_response"]
        filtered_lines = fod_extract_lines_with_pattern_from_console(
            console_response, pattern
        )
        extracted_json = fod_extract_json_from_line(filtered_lines, patch=patch)
        df = pd.DataFrame(extracted_json)
        dfs.append(df)
        if not skip_summary_state:
            print("job_no: {}".format(job["job_no"]))
            print("len of df: {}".format(len(df)))
            display(df)
        if plot_summary:
            df.plot(kind="bar", figsize=(20, 10), fontsize=10)
            plt.show()
        if plot_hist:
            bins = 40

            title = "Histogram of avg of job {}".format(job["job_no"])
            plot_histogram(df, "avg", title, "usec", "count", bins=bins)

            title = "Histogram of min of job {}".format(job["job_no"])
            plot_histogram(df, "min", title, "usec", "count", bins=bins)

            title = "Histogram of max of job {}".format(job["job_no"])
            plot_histogram(df, "max", title, "usec", "count", bins=bins)

            title = "Histogram of max of job {}".format(job["job_no"])
            plot_histogram(df, "dev", title, "usec", "count", bins=bins)
    return dfs

In [None]:
# iterate different pattern

def show_all_runs(npars: List[int], col_titles: List[str]):
    all_dfs = []
    for npar in npars:

        search_pattern_format = config["search_pattern"] if "search_pattern" in config else "TEST_2,nparallel {}," 
        if "{}" in search_pattern_format:
            search_pattern = search_pattern_format.format(npar)
        else:
            search_pattern = search_pattern_format
        
        title_default = "Histogram of malloc_forever (2 kB) **durations** with {} parallel vp(s)".format(npar)

        title = config['title'] if 'title' in config else title_default
        title = title.format(npar)

        printmd("### " + title + "\n\n")

        dfs = generate_summary(collected_jobs, search_pattern, patch=True, plot_summary=False, plot_hist=False, skip_summary_state=False)

        bins = config["bins"] if "bins" in config else 20
        xlabel = config["xlabel"] if "xlabel" in config else "usec"
        plot_2_subplot(dfs, title, xlabel, col_titles, ylim=(npar//2 + int(npar * 0.15)), bins=bins)
        all_dfs.append(dfs)
    return all_dfs

In [None]:
def plot_scale_per_num_vp(all_dfs, single_plots, top_title, ylabel):
    cols = ["avg"]
    # cols = ["avg", "min", "max", "dev"]
    fig, axes = plt.subplots(1, 2, figsize=(20, 10))
    fig.suptitle(top_title)
    ymax = 0
    for single_plot in single_plots:
        avgs = []
        for i in range(len(all_dfs)):
            df = all_dfs[i][single_plot]
            avgs.append(df["avg"].values[0])

        ymax = max(max(avgs), ymax)
        axes[single_plot].scatter(npars, avgs)
        xlabel = "number of parallel VPs ({})".format(col_titles[single_plot])
        axes[single_plot].set(xlabel=xlabel, ylabel=ylabel)
    
    for single_plot in single_plots:
        axes[single_plot].set_ylim(0, ymax*1.1)

In [None]:
if SHOW_MALLOC_FOREVER_STAT:
    note_str = "## Experimental results"
    printmd(note_str)

    note_str = "Histogram is based on the performance metrics measured by each VPs for fun_alloc_forever(). Raw metric is measured using [perf_metric_record_uint64()](https://github.com/fungible-inc/FunOS/blob/0956197d6f3d96b1ba8ed08f997ed702467e8251/utils/common/perf_metric.c#L96) runs per each VPs."
    printmd(note_str)

    note_str = "Note: we did not apply x axis limit for when the max x values is too large to make it hard to read small x values."
    printmd(note_str)

    note_str = "\n\n"
    printmd(note_str)

    # npars = [1, 18, 54, 90, 144, 180]
    npars = config["npars"] if "npars" in config else [1, 18, 54, 90, 144, 180]
    # print("npars: {}".format(npars))
    col_titles = config["col_titles"] if "col_titles" in config else ["baseline", "skip unlock"]
    all_dfs = show_all_runs(npars, col_titles)
    # assume all runs have same number runs
    malloc_count = all_dfs[0][0]["count"].values[0]

    note_str = "Number of test done for each VP: {}".format(malloc_count)
    printmd(note_str)


In [None]:
if "single_plots" in config:
    single_plots = config["single_plots"]
    # top_title = "Performance of malloc_forever() with different number of parallel VPs"
    top_title = config["single_plots_top_title"] if "single_plots_top_title" in config else "Performance of malloc_forever() with different number of parallel VPs"
    ylabel = config["single_plots_ylabel"] if "single_plots_ylabel" in config else "alloc_forever() run time avg (usec)"
    plot_scale_per_num_vp(all_dfs, single_plots, top_title, ylabel)

In [None]:
raise SystemExit("Stop here, the following is testing code")
# STOP HERE
# bellow is testing code

In [None]:
df = dfs[0]
df.describe()

In [None]:
df = dfs[1]
df

In [None]:
filtered_line = fod_extract_lines_with_pattern_from_console(console_response, pattern)

In [None]:
d = fod_extract_json_from_line(filtered_line, patch=True)

In [None]:
# convert dict to dataframe
df = pd.DataFrame(d)
df


In [None]:
title = "Histogram of min of job {}".format(collected_jobs[0]["job_no"])
plot_histogram(df, "min", title, "usec", "count", bins=20)

title = "Histogram of avg of job {}".format(collected_jobs[0]["job_no"])
plot_histogram(df, "avg", title, "usec", "count", bins=20)

title = "Histogram of max of job {}".format(collected_jobs[0]["job_no"])
plot_histogram(df, "max", title, "usec", "count", bins=20)

In [None]:
print(filtered_lines[0])

In [None]:
m = re.search('{(.+?)}', filtered_lines[0])
m

In [None]:
j_str = "{"+ m.group(1)+"}"
j_str

In [None]:
# replace substring in string
j_str = j_str.replace('"usecs" "count"', '"usecs","count"')
j_str

In [None]:
# load json string to dict
j = json.loads(j_str)
j

In [None]:
console