In [22]:
import glob, os
import pandas as pd
import numpy as np
import seaborn as sns
from itertools import product
import matplotlib.pyplot as plt
import utils

## Instructions
Set the global variable called SERVER to 'nuc' to obtain the data analysis for the NUC  or to 'server' for the data analysis of the server

In [23]:
SERVER = 'server'

In [24]:
DATAPATH = f"../data/{SERVER}_data.csv"

In [25]:
data = pd.read_csv(DATAPATH)
utils.clean_and_format_df(data)

In [26]:
subjects = list(set(data['subject'].values))
targets = list(set(data['target'].values))

GROUPS = list(
    product(subjects, targets)
)

In [27]:
data['subject'] = pd.Categorical(
    data['subject'],
    ['cpython', 'pypy', 'numba', 'pyston-lite', 'py3.13-jit', 'nuitka', 'cython', 'codon', 'mypyc']
)

In [28]:
# Convert to KJ and add Total Energy
data['PACKAGE_ENERGY (J)'] = data['PACKAGE_ENERGY (J)'] / 1000
data['DRAM_ENERGY (J)'] = data['DRAM_ENERGY (J)'] / 1000
data['TOTAL_ENERGY'] = data['PACKAGE_ENERGY (J)'] + data['DRAM_ENERGY (J)']
data.rename(columns={'PP0_ENERGY (J)': 'PP0_ENERGY (KJ)', 'DRAM_ENERGY (J)': 'DRAM_ENERGY (KJ)'}, inplace=True)

In [29]:
data['execution_time'] = data['execution_time'] / 60
data['PROCESS_MEMORY'] = data['PROCESS_MEMORY'] / 1000000
data['MEMORY_USAGE'] = (data['USED_MEMORY']) / (data["TOTAL_MEMORY"] / 1000000000) * 100

In [30]:
METRICS_INTEREST = ['TOTAL_ENERGY', 'execution_time', 'PROCESS_MEMORY', 'LLC-load-misses_percent']

In [31]:
def reform(dictionary):
    return {(outerKey, innerKey): values for outerKey, innerDict in dictionary.items() for innerKey, values in innerDict.items()}

In [32]:
descriptive = []
for subject in subjects:
    df = data[data['subject'] == subject]
    descriptive.append(
        reform(df[METRICS_INTEREST].describe().to_dict()) | {'subject' : subject}
    )

columns = pd.MultiIndex.from_product([METRICS_INTEREST, ['mean', 'std', 'min', '50%', 'max']])
table = pd.DataFrame(descriptive, columns = columns)
table[('subject', '')] = subjects

table['subject'] = pd.Categorical(
    table['subject'],
    ['cpython', 'pypy', 'numba', 'pyston-lite', 'py3.13-jit', 'nuitka', 'cython', 'codon', 'mypyc']
)

# move last column to first position
last_column = table.iloc[:, -1]
table = pd.concat([last_column, table.iloc[:, :-1]], axis=1)

# sort value according to the one fixed by us
table = table.sort_values(by='subject')

table.head()

Unnamed: 0_level_0,subject,TOTAL_ENERGY,TOTAL_ENERGY,TOTAL_ENERGY,TOTAL_ENERGY,TOTAL_ENERGY,execution_time,execution_time,execution_time,execution_time,...,PROCESS_MEMORY,PROCESS_MEMORY,PROCESS_MEMORY,PROCESS_MEMORY,PROCESS_MEMORY,LLC-load-misses_percent,LLC-load-misses_percent,LLC-load-misses_percent,LLC-load-misses_percent,LLC-load-misses_percent
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,min,50%,max,mean,std,min,50%,...,mean,std,min,50%,max,mean,std,min,50%,max
1,cpython,16.409164,12.968216,5.287662,11.149383,46.065032,24.983116,19.954936,8.165385,17.269434,...,3.554556,2.844827,0.004096,3.899392,7.213056,21.724785,24.486901,0.61292,12.905787,74.47671
4,pypy,1.538881,0.89239,0.356298,1.605287,3.040655,2.358685,1.388681,0.585438,2.24531,...,5.179977,6.974469,0.004096,1.009664,27.267072,13.24655,14.140134,0.154551,5.530717,36.927923
6,numba,1.334541,0.76256,0.229263,1.084086,2.688052,2.145817,1.277018,0.382412,1.735417,...,3.291604,2.722635,0.004096,1.564672,7.22944,18.224406,14.528662,3.519317,15.298604,53.296344
0,pyston-lite,16.866306,13.454379,5.237658,11.117178,46.377225,25.853084,20.829807,8.149443,17.191819,...,2.783056,2.685216,0.004096,1.01376,7.118848,20.67863,24.503908,0.404889,10.538657,74.122257
5,py3.13-jit,12.575454,7.426146,5.790524,10.289033,27.38522,19.413912,11.617345,8.933876,15.661831,...,3.879439,3.113461,0.004096,2.336768,8.798208,18.128894,24.822061,0.327973,5.677034,73.866716


In [33]:
latex_table = table.round(3).to_latex(index=False, float_format="%.2f")
print(latex_table)

\begin{tabular}{lrrrrrrrrrrrrrrrrrrrr}
\toprule
subject & \multicolumn{5}{r}{TOTAL_ENERGY} & \multicolumn{5}{r}{execution_time} & \multicolumn{5}{r}{PROCESS_MEMORY} & \multicolumn{5}{r}{LLC-load-misses_percent} \\
 & mean & std & min & 50% & max & mean & std & min & 50% & max & mean & std & min & 50% & max & mean & std & min & 50% & max \\
\midrule
cpython & 16.41 & 12.97 & 5.29 & 11.15 & 46.06 & 24.98 & 19.95 & 8.16 & 17.27 & 71.53 & 3.56 & 2.85 & 0.00 & 3.90 & 7.21 & 21.73 & 24.49 & 0.61 & 12.91 & 74.48 \\
pypy & 1.54 & 0.89 & 0.36 & 1.60 & 3.04 & 2.36 & 1.39 & 0.58 & 2.25 & 4.77 & 5.18 & 6.97 & 0.00 & 1.01 & 27.27 & 13.25 & 14.14 & 0.15 & 5.53 & 36.93 \\
numba & 1.33 & 0.76 & 0.23 & 1.08 & 2.69 & 2.15 & 1.28 & 0.38 & 1.74 & 4.52 & 3.29 & 2.72 & 0.00 & 1.56 & 7.23 & 18.22 & 14.53 & 3.52 & 15.30 & 53.30 \\
pyston-lite & 16.87 & 13.45 & 5.24 & 11.12 & 46.38 & 25.85 & 20.83 & 8.15 & 17.19 & 71.62 & 2.78 & 2.69 & 0.00 & 1.01 & 7.12 & 20.68 & 24.50 & 0.41 & 10.54 & 74.12 \\
py3.13-jit & 1