# Memory Subsystem Benchmarking - Sysbench: Linux Host
---
## 1. Parse Sysbench Results

In [1]:
import re
import os
import csv
import pandas as pd
import matplotlib.pyplot as plt

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [74]:
df = pd.DataFrame(columns=['Exec Env', '#vCPUs', 'Access Method', 'Number of Threads',
                           'Block Size (KiB)', 'Total Size (MiB)', 'Operation', 'Scope',
                           'Total Ops', 'Ops per sec',
                           'Data Transfered (MiB)', 'Throughput (MiB/sec)',
                           'Total Time', 'Total #events',
                           'Min Latency (ms)',
                           'Avg Latency (ms)',
                           'Max Latency (ms)',
                           '95th Percentile Latency (ms)',
                           'Total Execution Time across Threads (s)',
                           'Avg Number of Events per Thread',
                           'Avg Execution Time per Thread (s)'])

In [75]:
df

Unnamed: 0,Exec Env,#vCPUs,Access Method,Number of Threads,Block Size (KiB),Total Size (MiB),Operation,Scope,Total Ops,Ops per sec,...,Throughput (MiB/sec),Total Time,Total #events,Min Latency (ms),Avg Latency (ms),Max Latency (ms),95th Percentile Latency (ms),Total Execution Time across Threads (s),Avg Number of Events per Thread,Avg Execution Time per Thread (s)


In [78]:
for outputFile in os.listdir('../test_outputs/bareMetalOutputs'):
    linenum = 0
    filepath = os.path.join('../test_outputs/bareMetalOutputs', outputFile)
    if outputFile == '.gitkeep':
        continue
    with open(filepath, 'r') as f:
        testResult = ['Linux Host', 96, outputFile.split('_')[2]]
        for line in f:
            if ':' in line or 'MiB' in line:
                result = line.rsplit(':', 1)
                if len(result) > 1: result = result[1].strip()
                else: result = result[0]
                if 'per second' in result:
                    vals = result.split('(')
                    testResult.append(float(vals[0].strip()))
                    testResult.append(float(vals[1][:-11].strip()))
                elif 'transferred' in result:
                    testResult += map(float, re.findall(r"(?<![a-zA-Z:])[-+]?\d*\.?\d+", result))
                elif 'MiB' in result or 'KiB' in result:
                    testResult.append(float(result[:-3]))
                elif '/' in result:
                    testResult.append(float(result.split('/')[0]))
                elif 's' in result:
                    testResult.append(float(result[:-1]))
                elif result != '':
                    try:
                        testResult.append(float(result))
                    except: testResult.append(result) 
    df.loc[len(df)] = testResult

In [79]:
df.head()

Unnamed: 0,Exec Env,#vCPUs,Access Method,Number of Threads,Block Size (KiB),Total Size (MiB),Operation,Scope,Total Ops,Ops per sec,...,Throughput (MiB/sec),Total Time,Total #events,Min Latency (ms),Avg Latency (ms),Max Latency (ms),95th Percentile Latency (ms),Total Execution Time across Threads (s),Avg Number of Events per Thread,Avg Execution Time per Thread (s)
0,Linux Host,96,rnd,30.0,4096.0,1024000.0,write,global,17264.0,574.89,...,2299.55,30.0285,17264.0,20.87,52.15,425.31,114.72,900309.12,575.4667,30.0103
1,Linux Host,96,rnd,20.0,4096.0,1024000.0,write,global,13376.0,445.46,...,1781.84,30.0253,13376.0,19.34,44.87,308.53,68.05,600235.32,668.8,30.0118
2,Linux Host,96,seq,26.0,4096.0,1024000.0,write,global,146078.0,4868.41,...,19473.64,30.0038,146078.0,0.53,5.12,98.8,15.83,748539.98,5618.3846,28.79
3,Linux Host,96,seq,37.0,4096.0,1024000.0,write,local,255966.0,32259.0,...,129035.99,7.931,255966.0,0.21,0.8,149.31,2.43,205906.0,6918.0,5.565
4,Linux Host,96,seq,14.0,4096.0,1024000.0,read,local,255990.0,83751.57,...,335006.26,3.0525,255990.0,0.09,0.15,37.86,0.11,38811.02,18285.0,2.7722


In [80]:
df = df.astype({'Number of Threads': int})\
       .sort_values(by=['Number of Threads', 'Operation', 'Access Method', 'Scope']).reset_index(drop=True)

df['Total Execution Time across Threads (s)'] = df['Total Execution Time across Threads (s)'] / 1000

In [81]:
df.head(20)

Unnamed: 0,Exec Env,#vCPUs,Access Method,Number of Threads,Block Size (KiB),Total Size (MiB),Operation,Scope,Total Ops,Ops per sec,...,Throughput (MiB/sec),Total Time,Total #events,Min Latency (ms),Avg Latency (ms),Max Latency (ms),95th Percentile Latency (ms),Total Execution Time across Threads (s),Avg Number of Events per Thread,Avg Execution Time per Thread (s)
0,Linux Host,96,rnd,1,4096.0,1024000.0,read,global,12323.0,410.73,...,1642.93,30.001,12323.0,2.41,2.43,7.92,2.48,29.99633,12323.0,29.9963
1,Linux Host,96,rnd,1,4096.0,1024000.0,read,local,12158.0,405.21,...,1620.85,30.0003,12158.0,2.43,2.47,7.41,2.52,29.99468,12158.0,29.9947
2,Linux Host,96,seq,1,4096.0,1024000.0,read,global,256000.0,10748.42,...,42993.67,23.8139,256000.0,0.09,0.09,0.68,0.09,23.77626,256000.0,23.7763
3,Linux Host,96,seq,1,4096.0,1024000.0,read,local,256000.0,10737.39,...,42949.56,23.8383,256000.0,0.09,0.09,0.55,0.09,23.80058,256000.0,23.8006
4,Linux Host,96,rnd,1,4096.0,1024000.0,write,global,5819.0,193.94,...,775.77,30.0004,5819.0,2.2,5.15,10.95,5.18,29.99567,5819.0,29.9957
5,Linux Host,96,rnd,1,4096.0,1024000.0,write,local,5819.0,193.93,...,775.73,30.0015,5819.0,2.19,5.15,9.87,5.18,29.99606,5819.0,29.9961
6,Linux Host,96,seq,1,4096.0,1024000.0,write,global,140837.0,4693.95,...,18775.81,30.0003,140837.0,0.21,0.21,2.14,0.22,29.97677,140837.0,29.9768
7,Linux Host,96,seq,1,4096.0,1024000.0,write,local,140850.0,4694.38,...,18777.53,30.0004,140850.0,0.21,0.21,0.77,0.22,29.97659,140850.0,29.9766
8,Linux Host,96,rnd,2,4096.0,1024000.0,read,global,24773.0,825.66,...,3302.64,30.002,24773.0,2.35,2.42,11.52,2.48,59.989,12386.5,29.9945
9,Linux Host,96,rnd,2,4096.0,1024000.0,read,local,24523.0,817.28,...,3269.12,30.0018,24523.0,2.35,2.45,14.17,2.52,59.98619,12261.5,29.9931


In [82]:
df.to_csv('../result_CSVs/bareMetalResults.csv')

## 2. Visualization
- Variables: operation (read, write), scope (global, local), access method (rnd, seq)
- To visualize: avg latency, number of events per thread, amount of data transferred, ops per sec, throughput
