## Energy consumption of cleaning and preprocessing

In [1]:
import pandas as pd
import numpy as np

tools = ['preprocessing']
datasets = ['Adult', 'Student']

for tool in tools:
    for data in datasets:
        print(data)
        df = pd.read_csv('../../Data/Energy/Measurement_'+str(tool)+'_'+str(data)+'.csv')
        df = df.loc[df['label'] == data]
        duration = df['duration'].mean()
        pkg = df['pkg'].mean()
        dram = df['dram'].mean()

        # convert to seconds and Joules
        duration *= (10**-6)
        pkg *= (10**-6)
        dram *= (10**-6)

        print(f"Average duration for {tool}: {round(duration, 3)} in seconds")
        print(f"Average pkg for {tool}: {round(pkg, 3)} in Joules")
        print(f"Average dram for {tool}: {round(dram, 3)} in Joules\n")

Adult
Average duration for preprocessing: 0.684 in seconds
Average pkg for preprocessing: 19.411 in Joules
Average dram for preprocessing: 2.561 in Joules

Student
Average duration for preprocessing: 0.03 in seconds
Average pkg for preprocessing: 0.815 in Joules
Average dram for preprocessing: 0.113 in Joules



## Accuracy and energy consumption of baseline

In [2]:
models = ['knn','logreg','nn']
datasets = ['Adult', 'Student']

for model in models:
    print(f"###### {model} ###### \n")
    for data in datasets:
        df = pd.read_csv('Energy_'+str(model)+'.csv')
        df_adult = df.loc[df['label'] == data]
        df_student = df.loc[df['label'] == data]

        # start with student
        duration = df_student['duration'].mean()
        pkg = df_student['pkg'].mean()
        dram = df_student['dram'].mean()

        # convert to seconds and Joules
        duration *= (10**-6)
        pkg *= (10**-6)
        dram *= (10**-6)

        print(f"{data}:")
        print(f"Average duration for {model}: {round(duration, 3)} in seconds")
        print(f"Average pkg for {model}: {round(pkg, 3)} in Joules")
        print(f"Average dram for {model}: {round(dram, 3)} in Joules")
        df = pd.read_csv('Accuracy_'+str(model)+'_'+str(data)+'.csv')
        df = df.iloc[::2]
        df['Accuracy'] = df['Accuracy'].astype(float)
        average = round(df['Accuracy'].mean(),3)
        print(f"Average accuracy for {model} on {data} set: {average}\n")

###### knn ###### 

Adult:
Average duration for knn: 22.262 in seconds
Average pkg for knn: 805.91 in Joules
Average dram for knn: 117.471 in Joules
Average accuracy for knn on Adult set: 0.82

Student:
Average duration for knn: 0.081 in seconds
Average pkg for knn: 3.85 in Joules
Average dram for knn: 0.29 in Joules
Average accuracy for knn on Student set: 0.696

###### logreg ###### 

Adult:
Average duration for logreg: 1.414 in seconds
Average pkg for logreg: 66.896 in Joules
Average dram for logreg: 6.809 in Joules
Average accuracy for logreg on Adult set: 0.846

Student:
Average duration for logreg: 0.057 in seconds
Average pkg for logreg: 2.73 in Joules
Average dram for logreg: 0.216 in Joules
Average accuracy for logreg on Student set: 0.719

###### nn ###### 

Adult:
Average duration for nn: 9.85 in seconds
Average pkg for nn: 133.768 in Joules
Average dram for nn: 23.826 in Joules
Average accuracy for nn on Adult set: 0.845

Student:
Average duration for nn: 3.404 in seconds
A