In [1]:
import glob, os
import pandas as pd
import numpy as np
import seaborn as sns
from itertools import product
import matplotlib.pyplot as plt
import utils
import statistics

In [2]:
SERVER = 'nuc'
DATAPATH = f"../experiments/{SERVER}_data.csv"

In [3]:
data = pd.read_csv(DATAPATH)

In [4]:
data = data.dropna(subset=['execution_time'])

In [5]:
# Convert to KJ and add Total Energy
data['PACKAGE_ENERGY (J)'] = data['PACKAGE_ENERGY (J)'] / 1000
data['DRAM_ENERGY (J)'] = data['DRAM_ENERGY (J)'] / 1000
data['PP0_ENERGY (J)'] = data['PP0_ENERGY (J)'] / 1000
data['TOTAL_ENERGY'] = data['PACKAGE_ENERGY (J)'] + data['DRAM_ENERGY (J)']
data['CACHE_ENERGY'] = data['PACKAGE_ENERGY (J)'] - data['PP0_ENERGY (J)']
data['execution_time'] = data['execution_time'] / 60
data.rename(
    columns={'PACKAGE_ENERGY (J)': 'PACKAGE_ENERGY (KJ)', 'DRAM_ENERGY (J)': 'DRAM_ENERGY (KJ)', 'PP0_ENERGY (J)': 'PP0_ENERGY (KJ)' }, inplace=True
)

In [6]:
#subjects = list(set(data['subject'].values))
subjects = ['cpython', 'pypy', 'numba', 'pyston-lite', 'py3.13-jit', 'nuitka', 'cython', 'codon', 'mypyc']
targets = ['mandelbrot', 'fannkuch_redux', 'binary_trees', 'n_body', 'spectralnorm', 'k_nucleotide', 'fasta']
GROUPS = list(
    product(list(set(subjects) - {'cpython'}), targets)
)

In [10]:
def pearson_correlation(x, y):
    # Convert inputs to numpy arrays
    x = np.array(x)
    y = np.array(y)
    
    # Calculate the mean of x and y
    x_mean = np.mean(x)
    y_mean = np.mean(y)
    
    # Calculate the numerator
    numerator = np.sum((x - x_mean) * (y - y_mean))
    
    # Calculate the denominator
    denominator = np.sqrt(np.sum((x - x_mean)**2) * np.sum((y - y_mean)**2))
    
    # Calculate the correlation coefficient
    correlation = numerator / denominator
    
    return correlation

In [13]:
rows = []
for x in subjects:
    energy = data[(data['subject'] == x)]['TOTAL_ENERGY'].to_list()
    time = data[(data['subject'] == x)]['execution_time'].to_list()
    correlation = pearson_correlation(energy, time)
    row = {'subject': x , 'correlation': correlation}
    rows.append(row)
    
pd.DataFrame(rows)

Unnamed: 0,subject,correlation
0,cpython,0.999989
1,pypy,0.99924
2,numba,0.997365
3,pyston-lite,0.999989
4,py3.13-jit,0.999884
5,nuitka,0.999233
6,cython,0.998394
7,codon,0.996863
8,mypyc,0.999832
