In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt

data_dir = "/workspaces/analysis/power-data"

In [2]:
def load_power_data(file_path):
    """
    Load power data from a CSV file.
    
    Returns:
        pd.DataFrame: DataFrame containing the power data.
    """

    df = pd.read_csv(file_path)
    # Baseline
    baseline_len = 50000
    baseline_data = df.iloc[:baseline_len]
    # print("Baseline data:")
    # print(baseline_data.describe())
    # print()

    # Rest
    data = df.iloc[baseline_len:]
    
    return data

### Analyse

In [3]:
from pandas import DataFrame

def save_describe_to_file(name: str, data: DataFrame, file_path):
    """
    Save the describe output of the DataFrame to a text file.
    
    Args:
        data (pd.DataFrame): DataFrame containing the power data.
        file_path (str): Path to the output text file.
    """
    with open(file_path, 'a') as f:
        f.write(f"\n\n{name}:\n")
        f.write("-----------------------------\n")
        f.write(str(data.describe()))
        f.write("\n\n")
        print(f"Describe output appended to {file_path}")

In [4]:
files = [f for f in os.listdir('./power-data')]

datasets = {}

for file in files:
    if file.endswith('.csv'):
        print(f"Processing {file}...")
        data = load_power_data(os.path.join(data_dir, file))
        datasets.update({file: data})
        #save_describe_to_file(file, data, 'describe_output.txt')
        print(f"Finished processing {file}.")


Processing python-flask-B-true_2025-04-02_14-37-53.csv...
Finished processing python-flask-B-true_2025-04-02_14-37-53.csv.
Processing python-flask-baseline_1_2025-04-07_10-52-15.csv...
Finished processing python-flask-baseline_1_2025-04-07_10-52-15.csv.
Processing python-flask-B-true_2025-04-02_15-34-59.csv...
Finished processing python-flask-B-true_2025-04-02_15-34-59.csv.
Processing python-flask-baseline_2_2025-04-07_11-08-36.csv...
Finished processing python-flask-baseline_2_2025-04-07_11-08-36.csv.
Processing python-flask-B-true_2025-04-02_15-53-48.csv...
Finished processing python-flask-B-true_2025-04-02_15-53-48.csv.
Processing python-flask-baseline_0_2025-04-07_10-35-46.csv...
Finished processing python-flask-baseline_0_2025-04-07_10-35-46.csv.
Processing python-flask-B-true_2025-04-02_15-17-05.csv...
Finished processing python-flask-B-true_2025-04-02_15-17-05.csv.
Processing python-flask-baseline_4_2025-04-07_11-41-18.csv...
Finished processing python-flask-baseline_4_2025-04-0

### Total energy consumption

In [5]:
for dataset in datasets:
    data = datasets[dataset]
    # print(f"Dataset {dataset}:\n")
    # print(data.describe())
    # print("\n\n")
    # Save the describe output to a text file
    save_describe_to_file(dataset, data, 'describe_output.txt')

Describe output appended to describe_output.txt
Describe output appended to describe_output.txt
Describe output appended to describe_output.txt
Describe output appended to describe_output.txt
Describe output appended to describe_output.txt
Describe output appended to describe_output.txt
Describe output appended to describe_output.txt
Describe output appended to describe_output.txt
Describe output appended to describe_output.txt
Describe output appended to describe_output.txt


In [13]:
import numpy as np

def calculate_energy(df):
    times = df['seconds']
    powers = df['power']
    energy = np.trapz(powers, times)  # Trapezoidal integration
    duration = times.iloc[-1] - times.iloc[0]
    avg_power = energy / duration if duration > 0 else 0
    return energy, avg_power

def calculate_group_energy(datasets, group_prefixes):
    group_energies = {prefix: 0 for prefix in group_prefixes}

    for dataset in datasets:
        data = datasets[dataset]
        energy, avg_power = calculate_energy(data)
        for prefix in group_prefixes:
            if dataset.startswith(prefix):
                group_energies[prefix] += energy
                break
        print(f"Dataset {dataset}:")
        print(f"  Energy: {energy:.2f} J")
        print(f"  Average Power: {avg_power:.2f} W")
        print()

    for prefix in group_prefixes:
        total_energy = group_energies[prefix]
        mean_energy = total_energy / (len(datasets) / len(group_prefixes))
        print(f"Total energy for {prefix}: {total_energy:.2f} J")
        print(f"Mean energy for {prefix}: {mean_energy:.2f} J")


In [14]:
calculate_group_energy(datasets, ["python-flask-baseline", "python-flask-B-true"])

  energy = np.trapz(powers, times)  # Trapezoidal integration


Dataset python-flask-B-true_2025-04-02_14-37-53.csv:
  Energy: 1670.16 J
  Average Power: 2.18 W

Dataset python-flask-baseline_1_2025-04-07_10-52-15.csv:
  Energy: 1616.84 J
  Average Power: 2.20 W

Dataset python-flask-B-true_2025-04-02_15-34-59.csv:
  Energy: 1682.71 J
  Average Power: 2.18 W

Dataset python-flask-baseline_2_2025-04-07_11-08-36.csv:
  Energy: 1615.90 J
  Average Power: 2.20 W

Dataset python-flask-B-true_2025-04-02_15-53-48.csv:
  Energy: 1674.71 J
  Average Power: 2.18 W

Dataset python-flask-baseline_0_2025-04-07_10-35-46.csv:
  Energy: 1609.22 J
  Average Power: 2.20 W

Dataset python-flask-B-true_2025-04-02_15-17-05.csv:
  Energy: 1677.92 J
  Average Power: 2.18 W

Dataset python-flask-baseline_4_2025-04-07_11-41-18.csv:
  Energy: 1619.38 J
  Average Power: 2.20 W

Dataset python-flask-B-true_2025-04-02_14-59-08.csv:
  Energy: 1667.24 J
  Average Power: 2.18 W

Dataset python-flask-baseline_3_2025-04-07_11-24-53.csv:
  Energy: 1616.75 J
  Average Power: 2.20 W



### Plot

In [55]:
from datetime import datetime

def plot(title, set:dict):
    # Generate labels from file names
    labels = [key.split('.')[0] for key in set.keys()]

    # Define colors for the plots
    colors = plt.cm.tab10.colors

    for name, df in set.items():
        #df['minute'] = (df['seconds'] // 60).astype(int)  # Convert seconds to minutes
        grouped = df.groupby('minute')['power'].mean().reset_index()
        set[name] = grouped

    # Plotting
    plt.figure(figsize=(12, 6))

    for i, (name, df) in enumerate(set.items()):
        plt.plot(df['minute'], df['power'], label=labels[i], color=colors[i % len(colors)], marker='o')

    # Add labels and title
    plt.title(f'Average Power Usage Grouped by Minute ({title}): ', fontsize=14)
    plt.xlabel('Minute', fontsize=12)
    plt.xlim(-1, 20)
    plt.ylabel('Average Power (Watts)', fontsize=12)
    plt.legend(title='Dataset')
    plt.grid(True)

    # Save the plot
    output_dir = 'plots'
    os.makedirs(output_dir, exist_ok=True)
    timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    file_path = os.path.join(output_dir, f'{title}_power_usage_by_minute_plot_{timestamp}.png')
    plt.savefig(file_path, dpi=500) 
    plt.close() 

In [None]:
dotnet_datasets = {key: value for key, value in datasets.items() if key.startswith('python-flask-B')}
python_datasets = {key: value for key, value in datasets.items() if key.startswith('python-flask-baseline')}

plot("python -b", dotnet_datasets)
plot("python -baseline", python_datasets)