# Create Query Performance Summary CSV

This notebook reads the raw query execution data and creates a summary CSV with average CPU and RAM usage by query.

In [20]:
# Import pandas for data processing
import pandas as pd

In [21]:
# Define input and output paths
input_file = '/home/buiduckhanh/fomat_data/duckDB/tpc-ds/1st_query_execution_times.csv'
output_file = '/home/buiduckhanh/fomat_data/query_performance_summary.csv'

# Read the CSV file with pipe delimiter
df = pd.read_csv(input_file, sep='|', skipinitialspace=True)

# Clean column names
df.columns = df.columns.str.strip()

In [22]:
# Convert comma to dot in numeric values and convert to float
df['cpu_used(%)'] = df['cpu_used(%)'].str.replace(',', '.').astype(float).round(2)
df['ram_used(gb)'] = df['ram_used(gb)'].str.replace('S', '').str.replace(',', '.').astype(float).round(2)

# Filter out entries with 0 CPU and RAM usage
df_filtered = df[(df['cpu_used(%)'] > 0) & (df['ram_used(gb)'] > 0)]

print(f"Original rows: {len(df)}, Filtered rows: {len(df_filtered)}")

Original rows: 147, Filtered rows: 126


In [None]:
# Calculate average CPU and RAM usage by query_id
query_summary = df_filtered.groupby('query_id').agg({
    'cpu_used(%)': 'mean',
    'ram_used(gb)': 'mean'
})

# Rename columns
query_summary.columns = ['avg_cpu_percent', 'avg_ram_gb']

# Reset index to make query_id a column
query_summary = query_summary.reset_index()
query_summary['avg_cpu_percent'] = query_summary['avg_cpu_percent'].round(2)
query_summary['avg_ram_gb'] = query_summary['avg_ram_gb'].round(2)
# Reorder columns if needed
query_summary = query_summary[['query_id', 'avg_cpu_percent', 'avg_ram_gb']]

# Preview the summary
query_summary.head()


ValueError: Length mismatch: Expected axis has 2 elements, new values have 3 elements

In [17]:
# Save the summary to CSV
query_summary.to_csv(output_file, index=False)
print(f"Summary saved to {output_file}")
print(f"Total queries: {len(query_summary)}")

Summary saved to /home/buiduckhanh/fomat_data/query_performance_summary.csv
Total queries: 99
