# Results Analysis Pipeline: Lambda Experiment

In [225]:
import string
import re
import math
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.legend import Legend
from statsmodels.distributions.empirical_distribution import ECDF

## Input handling


In [226]:
lambda_path = "/home/soufianej/Documents/Bachelors_project/serverless-simulator/serverless/Lambda experiment/results"

lambda_results={
    'timer': pd.read_csv(f"{lambda_path}/timer-concurrency.csv"),
    'frequent': pd.read_csv(f"{lambda_path}/frequent-concurrency.csv"),
    'infrequent': pd.read_csv(f"{lambda_path}/infrequent-concurrency.csv"),
    'veryinfrequent': pd.read_csv(f"{lambda_path}/veryinfrequent-concurrency.csv"),
    'timer_noconcurrency': pd.read_csv(f"{lambda_path}/timer-noconcurrency.csv"),
    'frequent_noconcurrency': pd.read_csv(f"{lambda_path}/frequent-noconcurrency.csv"),
    'infrequent_noconcurrency': pd.read_csv(f"{lambda_path}/infrequent-noconcurrency.csv"),
    'veryinfrequent_noconcurrency': pd.read_csv(f"{lambda_path}/veryinfrequent-noconcurrency.csv"),
}

opendc_path = "/home/soufianej/Documents/Bachelors_project/serverless-simulator/data/Lambda-Lambda"

opendc_results = {
    'timer': pd.read_parquet(f"{opendc_path}/timer-concurrency/randomAlloc-randomRouting-fixed-keep-aliveResourceManagement-600000msTimeout-18VM's.parquet", engine="pyarrow"),
    'frequent': pd.read_parquet(f"{opendc_path}/frequent-concurrency/randomAlloc-randomRouting-fixed-keep-aliveResourceManagement-840000msTimeout-18VM's.parquet", engine="pyarrow"),
    'infrequent': pd.read_parquet(f"{opendc_path}/infrequent-concurrency/randomAlloc-randomRouting-fixed-keep-aliveResourceManagement-1740000msTimeout-18VM's.parquet", engine="pyarrow"),
    'veryinfrequent': pd.read_parquet(f"{opendc_path}/veryinfrequent-concurrency/randomAlloc-randomRouting-fixed-keep-aliveResourceManagement-600000msTimeout-18VM's.parquet", engine="pyarrow"),
    'timer_noconcurrency': pd.read_parquet(f"{opendc_path}/timer-noconcurrency/randomAlloc-randomRouting-fixed-keep-aliveResourceManagement-600000msTimeout-18VM's.parquet", engine="pyarrow"),
    'frequent_noconcurrency': pd.read_parquet(f"{opendc_path}/frequent-noconcurrency/randomAlloc-randomRouting-fixed-keep-aliveResourceManagement-300000msTimeout-18VM's.parquet", engine="pyarrow"),
    'infrequent_noconcurrency': pd.read_parquet(f"{opendc_path}/infrequent-noconcurrency/randomAlloc-randomRouting-fixed-keep-aliveResourceManagement-600000msTimeout-18VM's.parquet", engine="pyarrow"),
    'veryinfrequent_noconcurrency': pd.read_parquet(f"{opendc_path}/veryinfrequent-noconcurrency/randomAlloc-randomRouting-fixed-keep-aliveResourceManagement-600000msTimeout-18VM's.parquet", engine="pyarrow")}


# Pre processing

## Matching the timestamp format between both results format

In [227]:
for key in lambda_results.keys():
    lambda_results.update({f'{key}': lambda_results.get(key).rename(columns={
                           'bin(1s)': 'Datetime',
                           'avg(@timestamp)': 'Time',
                           'avg(@duration)': 'ExecutionTime',
                           'avg(@initDuration)': 'ColdStarts',
                           'avg(@maxMemoryUsed /1024 / 1024)': 'MemoryUsage'})})
    lambda_results.get(key).loc[lambda_results.get(key)['ColdStarts'] > 0, 'ColdStarts'] = 1

In [228]:
for key in opendc_results.keys():
    opendc_results.get(key)['Time'] = (1596255027000 + opendc_results.get(key)['Time'] / 1000).astype(int)
    opendc_results.update({f'{key}': opendc_results.get(key)[opendc_results.get(key)['Invocations'] > 0].reset_index(drop=True)})
    

In [229]:
for key in list(opendc_results):
    working_list = opendc_results.get(key).loc[opendc_results.get(key)['Invocations'] > 1]

    for row in working_list.iterrows():
        for i in range(row[1].Invocations -1):
            opendc_results.update({f'{key}': 
                                   opendc_results.get(key).append(row[1])
                                   .sort_values('Time', axis=0)
                                   .reset_index(drop=True)})

In [230]:
opendc_results['frequent']

Unnamed: 0,Time,Function,Invocations,DelayedInvocations,TimelyInvocations,TotalInvocations,ColdStarts,ColdStartsPct,TotalColdStarts,MedianColdStartDelay,RunningInstances,FailedExecutions,IdleInstances,TerminatedInstances,ProvisionedCPU,ProvisionedMemory,CpuUsage,MemoryUsage,WastedMemoryTime,TotalCost
0,1596255027120,1,1,0,0,1,1,100.0,1,591,1,0,0,0,100,128,0.0,121.0,0.0,7.180563e-07
1,1596255027420,1,1,0,1,2,0,50.0,1,0,1,0,0,0,100,128,0.0,121.0,240000.0,1.637788e-07
2,1596255027480,1,1,0,1,3,0,33.0,1,0,1,0,0,0,100,128,0.0,121.0,240000.0,7.388938e-07
3,1596255027660,1,1,0,1,4,0,25.0,1,0,1,0,0,0,100,128,0.0,121.0,360000.0,2.575475e-07
4,1596255027720,1,1,0,1,5,0,20.0,1,0,1,0,0,0,100,128,0.0,121.0,360000.0,5.326025e-07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
549,1596255112920,1,1,0,1,550,0,3.0,19,0,1,0,1,0,100,128,0.0,145.2,126960000.0,8.847563e-07
550,1596255113100,1,1,0,1,551,0,3.0,19,0,1,0,1,0,100,128,0.0,145.2,127260000.0,4.721738e-07
551,1596255113220,1,1,0,2,553,0,3.0,19,0,2,0,0,0,100,128,0.0,242.0,127380000.0,2.448815e-06
552,1596255113220,1,1,0,2,553,0,3.0,19,0,2,0,0,0,100,128,0.0,242.0,127380000.0,2.448815e-06
