In [None]:
import os
import pandas as pd
import numpy as np

project = "rust"

folder = "data/" + project

def calculate_energy(df):
    df = df.dropna(subset=['seconds', 'power'])
    times = df['seconds']
    powers = df['power']
    
    energy = np.trapezoid(powers, times)
    duration = times.iloc[-1] - times.iloc[0]
    avg_power = energy / duration if duration > 0 else 0
    return energy, avg_power, duration

# Prepare final results
records = []

files = sorted(os.listdir(folder))
for file in files:
    if "test" in file or "temperature" in file or ".csv" not in file:
        continue
    file_path = os.path.join(folder, file)
    if os.path.getsize(file_path) == 0:
        continue

    df = pd.read_csv(file_path)
    if 'seconds' not in df.columns or 'power' not in df.columns:
        continue
    
    df = df[df['seconds'] <= 5]
    if df.empty:
        continue

    energy, avg_power, duration = calculate_energy(df)
    desc = df['power'].describe()

    # Extract group and name from filename (adjust if needed)
    group = file.split("-")[0]
    name = file.split("_")[0]
    date = file.split("_")[-1]

    records.append({
        "Group": group,
        "Name": name,
        "Date": date,
        "Total energy (J)": round(energy, 4),
        "Average power (W)": round(avg_power, 4),
        "Duration (s)": round(duration, 4),
        "Max (W)": round(desc['max'], 4),
        "Min (W)": round(desc['min'], 4),
        "Std": round(desc['std'], 4),
        "50%": round(desc['50%'], 4)
    })

# Convert to DataFrame
df_out = pd.DataFrame(records)

# Sort and group
df_out = df_out.sort_values(by=["Name"])

# Group summary rows (e.g., by PGO_*)

output_lines = ["Group\tName\tDate\tTotal energy (J)\tAverage power (W)\tDuration (s)\tMax (W)\tMin (W)\tStd\t50%"]
for group, group_df in df_out.groupby(by=["Name"]):
    output_lines.append(f"{project}\t\t\t\t\t\t\t\t\t")
    for _, row in group_df.iterrows():
        output_lines.append(
            f"{row['Group']}\t{row['Name']}\t{row['Date']}\t{row['Total energy (J)']}\t{row['Average power (W)']}"
            f"\t{row['Duration (s)']}\t{row['Max (W)']}\t{row['Min (W)']}\t{row['Std']}\t{row['50%']}"
        )
    # Summary row
    avg_row = group_df.iloc[:, 3:].mean(numeric_only=True).round(3)
    output_lines.append(
        f"\t\t\t{avg_row['Total energy (J)']}\t{avg_row['Average power (W)']}"
        f"\t{avg_row['Duration (s)']}\t{avg_row['Max (W)']}\t{avg_row['Min (W)']}"
        f"\t{avg_row['Std']}\t{avg_row['50%']}"
    )
    output_lines.append("")

# Print the final formatted output
print("\n".join(output_lines))

# Save to file
output_file = f"{folder}_energy_summary.txt"
with open(output_file, "w") as f:
    f.write("\n".join(output_lines))
print(f"Results saved to {output_file}")