In [106]:
import os
import pandas as pd
import json
import re

In [107]:
def form_logger_df(power_data_logger_file: os.path):
    logger_df = pd.read_csv(power_data_logger_file, sep=r'\s+')
    logger_df = logger_df[['timestamp', 'voltage_V', 'current_A', 'temp_C_ema']]

    # Ensure proper data types
    logger_df['timestamp'] = logger_df['timestamp'].astype(float)
    logger_df['voltage_V'] = logger_df['voltage_V'].astype(float)
    logger_df['current_A'] = logger_df['current_A'].astype(float)
    logger_df['temp_C_ema'] = logger_df['temp_C_ema'].astype(float)

    return logger_df

def form_command_df(command_feedback_file: str):
    command_list = []

    # Updated regular expression pattern to match the example data provided
    output_pattern = re.compile(
        r"Image: (?P<image_file>[^,]+), "
        r"Model Resolution: (?P<resolution_width>\d+)x(?P<resolution_height>\d+), "
        r"Framework: (?P<framework>[A-Z]+), "
        r"Set FPS: (?P<fps>[\d.]+), "
        r"Inference time: (?P<inference_time>[\d.]+) seconds, "
        r"Predicted class: (?P<predicted_class>\d+), "
        r"Average CPU Usage: (?P<cpu_usage>[\d.]+)%, "
        r"Start Time: (?P<start_time>[\d.]+), "
        r"End Time: (?P<end_time>[\d.]+)"
    )

    model_pattern = re.compile(r'run_model\.py\s+([^\s]+)')

    with open(command_feedback_file, 'r') as file:
        for line in file:
            command_data = json.loads(line.strip())

            output_data = command_data.get('output', '')
            matches = output_pattern.findall(output_data)

            # # Debugging output to see if there are matches
            # print("Processing line:", line)
            # print("Matches found:", matches)
            
            model_match = model_pattern.search(command_data['command'])
            model_name = model_match.group(1) if model_match else None
            
            for match in matches:
                image_file = match[0]
                resolution_width = int(match[1])
                resolution_height = int(match[2])
                framework = match[3]
                fps = float(match[4])
                inference_time = float(match[5])
                predicted_class = int(match[6])
                cpu_usage = float(match[7])
                start_time = float(match[8])
                end_time = float(match[9])

                image_data = {
                    "client_id": command_data["client_id"],
                    "command": command_data["command"],
                    "model": model_name,
                    "start_time": start_time,
                    "end_time": end_time,
                    "error": command_data["error"],
                    "image_file": image_file,
                    "resolution_width": resolution_width,
                    "resolution_height": resolution_height,
                    "framework": framework,
                    "fps": fps,
                    "inference_time": inference_time,
                    "cpu_usage": cpu_usage,
                    "predicted_class": predicted_class
                }
                command_list.append(image_data)
    
    command_df = pd.DataFrame(command_list)
    pd.options.display.float_format = '{:.3f}'.format

    command_df = pd.DataFrame(command_list).reset_index(drop=True)

    return command_df

# Function to aggregate data, including average CPU usage
def merge_and_aggregate(command_df, logger_df):
    # Create lists to store aggregated values
    voltage_agg = []
    current_agg = []
    temp_agg = []
    cpu_usage_agg = []  # List for aggregated CPU usage

    buffer = 0.005  # 5ms buffer to capture nearby logger data
    for idx, row in command_df.iterrows():
        # Apply buffer to capture logger data around the exact start_time and end_time
        mask = (logger_df['timestamp'] >= row['start_time'] - buffer) & (logger_df['timestamp'] <= row['end_time'] + buffer)
        subset = logger_df[mask]
    
        if not subset.empty:
            voltage_agg.append(subset['voltage_V'].mean())
            current_agg.append(subset['current_A'].mean())
            temp_agg.append(subset['temp_C_ema'].mean())
            cpu_usage_agg.append(row['cpu_usage'])
        else:
            voltage_agg.append(None)
            current_agg.append(None)
            temp_agg.append(None)
            cpu_usage_agg.append(None)
    

    # Add the aggregated columns to the command_df
    command_df['avg_voltage_V'] = voltage_agg
    command_df['avg_current_A'] = current_agg
    command_df['avg_temp_C'] = temp_agg

    # Add power column
    command_df['power_W'] = command_df['avg_voltage_V'] * command_df['avg_current_A']

    # Remove not needed columns
    command_df = command_df.drop(columns=['client_id', 'command', 'error'])
    
    return command_df

In [108]:
command_df_hailo = form_command_df('command_feedback_hailo.txt')
logger_df_hailo = form_logger_df('data_logger_hailo.txt')
hailo_df = merge_and_aggregate(command_df_hailo, logger_df_hailo)
# hailo_df.to_csv('hailo_df.csv', index=False)

In [109]:
command_df_pt = form_command_df("command_feedback_pt.txt")
logger_df_pt = form_logger_df('data_logger_pt.txt')
pt_df = merge_and_aggregate(command_df_pt, logger_df_pt)
# pt_df.to_csv('pt_df.csv', index=False)

In [110]:
merged_df = pd.concat([pt_df, hailo_df], axis=0)
merged_df.to_csv('updated_merged_dataset.csv', index=False)

In [91]:
import pandas as pd
import matplotlib.pyplot as plt

In [112]:
def aggregate_power_by_fps(data):
    # Group by fps and model
    grouped = data.groupby(['fps', 'model']).agg(
        mean_power=('power_W', 'mean')
    ).reset_index()
    return grouped

# Aggregate power consumption for PyTorch
pytorch_agg = aggregate_power_by_fps(pt_df)

# Aggregate power consumption for Hailo
hailo_agg = aggregate_power_by_fps(hailo_df)

In [113]:
# Aggregate power consumption for PyTorch
pytorch_agg = aggregate_power_by_fps(pt_df)

# Aggregate power consumption for Hailo
hailo_agg = aggregate_power_by_fps(hailo_df)

# Function to plot and save the graphs as PNG
def plot_and_save_power_vs_fps(agg_data, framework_name, fps_range, plot_title, file_name):
    plt.figure(figsize=(10, 6))

    # Filter the data for the given FPS range
    filtered_data = agg_data[(agg_data['fps'] >= fps_range[0]) & (agg_data['fps'] <= fps_range[1])]

    # Get the list of unique models
    models = filtered_data['model'].unique()

    # Plot each model
    for model in models:
        model_data = filtered_data[filtered_data['model'] == model]
        plt.plot(model_data['fps'], model_data['mean_power'], marker='o', label=model)

    plt.xlabel('FPS (Frames per Second)')
    plt.ylabel('Power Consumption (W)')
    plt.title(plot_title)
    plt.legend(title='Model')
    plt.grid(True)

    # Save the plot as a PNG file
    plt.savefig(file_name, format='png', dpi=300)  # 300 dpi for high resolution
    plt.close()  # Close the figure after saving

# Define FPS range from 10 to 700
fps_range_3 = (10, 700)

# Save PyTorch plots for FPS range 10 to 700
plot_and_save_power_vs_fps(pytorch_agg, 'PyTorch', fps_range_3, 'Power Consumption vs FPS (10 to 700) for PyTorch', 'pytorch_power_10_to_700.png')

# Save Hailo plots for FPS range 10 to 700
plot_and_save_power_vs_fps(hailo_agg, 'Hailo', fps_range_3, 'Power Consumption vs FPS (10 to 700) for Hailo', 'hailo_power_10_to_700.png')

In [114]:
def aggregate_cpu_usage_by_fps(data):
    # Group by fps and model
    grouped = data.groupby(['fps', 'model']).agg(
        mean_cpu_usage=('cpu_usage', 'mean')
    ).reset_index()
    return grouped

# Aggregate CPU usage for PyTorch
pytorch_cpu_agg = aggregate_cpu_usage_by_fps(pt_df)

# Aggregate CPU usage for Hailo
hailo_cpu_agg = aggregate_cpu_usage_by_fps(hailo_df)

# Function to plot and save the CPU usage vs FPS graphs as PNG
def plot_and_save_cpu_usage_vs_fps(agg_data, framework_name, fps_range, plot_title, file_name):
    plt.figure(figsize=(10, 6))

    # Filter the data for the given FPS range
    filtered_data = agg_data[(agg_data['fps'] >= fps_range[0]) & (agg_data['fps'] <= fps_range[1])]

    # Get the list of unique models
    models = filtered_data['model'].unique()

    # Plot each model
    for model in models:
        model_data = filtered_data[filtered_data['model'] == model]
        plt.plot(model_data['fps'], model_data['mean_cpu_usage'], marker='o', label=model)

    plt.xlabel('FPS (Frames per Second)')
    plt.ylabel('CPU Usage (%)')
    plt.title(plot_title)
    plt.legend(title='Model')
    plt.grid(True)

    # Save the plot as a PNG file
    plt.savefig(file_name, format='png', dpi=300)  # 300 dpi for high resolution
    plt.close()  # Close the figure after saving

# Define FPS range from 10 to 700
fps_range_3 = (10, 700)

# Save PyTorch CPU usage plots for FPS range 10 to 700
plot_and_save_cpu_usage_vs_fps(pytorch_cpu_agg, 'PyTorch', fps_range_3, 'CPU Usage vs FPS (10 to 700) for PyTorch', 'pytorch_cpu_10_to_700.png')

# Save Hailo CPU usage plots for FPS range 10 to 700
plot_and_save_cpu_usage_vs_fps(hailo_cpu_agg, 'Hailo', fps_range_3, 'CPU Usage vs FPS (10 to 700) for Hailo', 'hailo_cpu_10_to_700.png')
