In [13]:
# Imports
import pandas as pd
import os
import re
import statistics


In [14]:
# optimized for 'main' scripts
regex_llm_optimized_main = re.compile(r".*energy_metrics_(.+)-(.+)_optimized_(.+)_(\d+).m_(\d+).csv.*")
## folder / script / llm / main number / repetition_number

In [15]:
# optimized non 'main' in the name of the script
regex_llm_optimized_non_main = re.compile(r".*energy_metrics_(.+)-(.+)_optimized_([^_]+).m_(\d+).csv.*")
## folder / script / llm / repetition_number

In [16]:
# original main
regex_original_main = re.compile(r".*energy_metrics_(.+)-(main).m_(\d+).csv.*")
## folder / script / repetition_number

In [17]:
# original non main
regex_original_non_main = re.compile(r".*energy_metrics_(.+)-(.+).m_(\d+).csv.*") 
## folder / script / repetition_number

In [23]:
def get_name_energy_column_name_from_number_core(number_core):
    return "CORE" + str(number_core) + "_ENERGY (J)"

def get_execution_time(data_raw):
    start_time = data_raw[TIME_COLUMN_NAME].iloc[0]
    end_time = data_raw[TIME_COLUMN_NAME].iloc[-1]
    return int(end_time - start_time)
    

TOTAL_NUMBER_CORES = 10
TOTAL_NUMBER_REPETITIONS = 30
TIME_COLUMN_NAME = "Time"

# folder # script # {'energy' [], 'memory: []} 
df_original = {}
df_optimized_gpt3 = {}
df_optimized_gpt4 = {}
df_optimized_llama = {}
df_optimized_mixtral = {}


#regex_llm = re.compile(.+-(.+)_optimized_(.+)_(\d+).m_(\d+).csv.*) #('energy_metrics_(.*).m_(\d+).csv')
#regex
files_output = os.listdir("../output/")
energy_files = [k for k in files_output if 'energy_metrics_' in k]
energy_consumption_by_script = {}
execution_time_by_script = {}
# Every file in the output directory
for energy_file in energy_files:
    p_llm_optimized_main = re.match(regex_llm_optimized_main, energy_file)
    p_llm_optimized_non_main = re.match(regex_llm_optimized_non_main, energy_file)
    p_original_main = re.match(regex_original_main , energy_file)
    p_original_non_main = re.match(regex_original_non_main , energy_file)
    data_raw = pd.read_csv("../output/" + energy_file)
    execution_time = get_execution_time(data_raw)
    energy_consumption = 0
    folder = None
    script = None
    llm = None
    main_number = None
    repetition_number = None
    for number_core in range(TOTAL_NUMBER_CORES+1):
        start_energy = data_raw[get_name_energy_column_name_from_number_core(number_core)].iloc[0]
        end_energy = data_raw[get_name_energy_column_name_from_number_core(number_core)].iloc[-1]
        energy_consumption_core = float(end_energy - start_energy)
        #print(energy_consumption_core)
        #print("------")
        energy_consumption += float(energy_consumption_core)
        
    if p_llm_optimized_main:
        folder = p_llm_optimized_main.group(1)
        script = p_llm_optimized_main.group(2)
        llm = p_llm_optimized_main.group(3)
        main_number = p_llm_optimized_main.group(4)
        repetition_number = p_llm_optimized_main.group(5)
        
    elif p_llm_optimized_non_main:
        folder = p_llm_optimized_non_main.group(1)
        script = p_llm_optimized_non_main.group(2)
        llm = p_llm_optimized_non_main.group(3)
        repetition_number = p_llm_optimized_non_main.group(4)
        
    elif p_original_main:
        folder = p_original_main.group(1)
        script = p_original_main.group(2)
        repetition_number = p_original_main.group(3)
    elif p_original_non_main:
        folder = p_original_non_main.group(1)
        script = p_original_non_main.group(2)
        repetition_number = p_original_non_main.group(3)
    else:
        print("Error: no file matching the regex patterns!")
        print(energy_file)
    
    
    if str(llm) == "gpt3":
        if folder not in df_optimized_gpt3:
            df_optimized_gpt3[folder] = {script: {'energy': [], 'memory': []}}
        elif script not in df_optimized_gpt3[folder]:
            df_optimized_gpt3[folder].setdefault(script, {'energy': [], 'memory': []})
        else:
            df_optimized_gpt3[folder][script]['energy'].append(energy_consumption)
    elif str(llm) == "gpt4":
        if folder not in df_optimized_gpt4:
            df_optimized_gpt4[folder] = {script: {'energy': [], 'memory': []}}
        elif script not in df_optimized_gpt4[folder]:
            df_optimized_gpt4[folder].setdefault(script, {'energy': [], 'memory': []})
        else:
            df_optimized_gpt4[folder][script]['energy'].append(energy_consumption)
    elif str(llm) == "llama":
        if folder not in df_optimized_llama:
            df_optimized_llama[folder] = {script: {'energy': [], 'memory': []}}
        elif script not in df_optimized_llama[folder]:
            df_optimized_llama[folder].setdefault(script, {'energy': [], 'memory': []})
        else:
            df_optimized_llama[folder][script]['energy'].append(energy_consumption)
    elif str(llm) == "mixtral":
        if folder not in df_optimized_mixtral:
            df_optimized_mixtral[folder] = {script: {'energy': [], 'memory': []}}
        elif script not in df_optimized_mixtral[folder]:
            df_optimized_mixtral[folder].setdefault(script, {'energy': [], 'memory': []})
        else:
            df_optimized_mixtral[folder][script]['energy'].append(energy_consumption)
    else:
        if folder not in df_original:
            df_original[folder] = {script: {'energy': [], 'memory': []}}
        elif script not in df_original[folder]:
            df_original[folder].setdefault(script, {'energy': [], 'memory': []})
        else:
            df_original[folder][script]['energy'].append(energy_consumption)       
columns = ["Folder", "Script", "Energy Original Script", "Energy Optimized gpt3", "Energy Optimized gpt4", "Energy Optimized llama", "Energy Optimized Mixtral"]
df_global_energy = pd.DataFrame(columns=columns)
if folder in df_original and folder in df_optimized_gpt3 and folder in df_optimized_gpt4 and folder in df_optimized_llama and folder in df_optimized_mixtral:
    for folder in df_original:
        for script in df_original[folder]:
            try:
                df_global_energy.loc[len(df_global_energy.index)] = [folder, script, statistics.mean(df_original[folder][script]['energy']), statistics.mean(df_optimized_gpt3[folder][script]['energy']),
                                                            statistics.mean(df_optimized_gpt4[folder][script]['energy']), statistics.mean(df_optimized_llama[folder][script]['energy']),
                                                            statistics.mean(df_optimized_mixtral[folder][script]['energy'])]
            except:
                print("Error: script in one of the dict.")
                print(script)
df_global_energy.to_csv("Energy_data_overview.csv", sep=';', index=False)

Error: no file matching the regex patterns!
energy_metrics_baseline-baseline_21.csv
Error: no file matching the regex patterns!
energy_metrics_baseline-baseline_20.csv
Error: no file matching the regex patterns!
energy_metrics_baseline-baseline_22.csv
Error: no file matching the regex patterns!
energy_metrics_baseline-baseline_23.csv
Error: no file matching the regex patterns!
energy_metrics_baseline-baseline_27.csv
Error: no file matching the regex patterns!
energy_metrics_baseline-baseline_26.csv
Error: no file matching the regex patterns!
energy_metrics_baseline-baseline_9.csv
Error: no file matching the regex patterns!
energy_metrics_baseline-baseline_18.csv
Error: no file matching the regex patterns!
energy_metrics_baseline-baseline_30.csv
Error: no file matching the regex patterns!
energy_metrics_baseline-baseline_24.csv
Error: no file matching the regex patterns!
energy_metrics_baseline-baseline_25.csv
Error: no file matching the regex patterns!
energy_metrics_baseline-baseline_