In [250]:
# read each csv file under "work_output_graphs" into a dataframe 
import os
import pandas as pd

# read CSV files in directory and return dataframe
def read_csv_files(directory):
    # initialize final_df as empty dataframe
    final_df = pd.DataFrame()
    counter = 0
    for filename in os.listdir(directory):
        if filename.endswith(".csv"):
            # extract timestamp from filename and add as column datetime.datetime.now().strftime("%Y%m%d%H%M%S")
            timestamp = filename.split('_')[1].split('.')[0]
            # reverse strftime in format %Y%m%d%H%M%S to epoch timestamp
            epoch_ts = int(pd.to_datetime(timestamp, format='%Y%m%d%H%M%S').timestamp())

            df = pd.read_csv(os.path.join(directory, filename))

            # create new dataframe with timestamp column that has the same number of rows as 'df'
            ts_df = pd.DataFrame([epoch_ts] * len(df), columns=['timestamp'])

            # concatenate 'ts_df' and 'df' along the columns
            df = pd.concat([ts_df, df], axis=1)

            # append df to final_df
            final_df = pd.concat([final_df, df], ignore_index=True)

            # reset df and ts_df
            df = pd.DataFrame()
            ts_df = pd.DataFrame()
            counter += 1
    print(f"Number of CSV files read at path {directory}: {counter}")
    return final_df


In [None]:
print(os.getcwd())
PATH = os.path.join(os.getcwd(), "csmaca_traditional_data_out")
Q_PATH = os.path.join(os.getcwd(), "quantum_1kruns_2.31_5.07_7.02")
# sort dataframe by priority
trad_df = read_csv_files(PATH)
trad_df = trad_df.sort_values(by='priority')
print(trad_df)
q_df = read_csv_files(Q_PATH)
q_df = q_df.sort_values(by='priority')
print(q_df)

# compare length of trad_df and q_df
print(f"Length of traditional dataframe: {len(trad_df)} vs Length of quantum dataframe: {len(q_df)}")
# Total work time of each type of worker as a function
def total_work_time(df, priority):
    return df[df['priority'] == priority]['total_work_time'].sum()

# Average work time of each type of worker as a function
def avg_work_time(df, priority):
    return round(df[df['priority'] == priority]['total_work_time'].mean(), 4)

# Median work time of each type of worker as a function
def median_work_time(df, priority):
    return df[df['priority'] == priority]['total_work_time'].median()

# number of workers of each type with 0 work time
def num_workers_no_work(df, priority):
    return len(df[(df['priority'] == priority) & (df['total_work_time'] == 0)])

def avg_conn_prob(df, priority):
    return round(df[df['priority'] == priority]['connection_probability'].mean(), 4)

def print_worker_stats(df, total_work_time, avg_work_time, median_work_time, num_workers_no_work):
    print(f"Total work time for high priority workers: {total_work_time(df, 'high')} seconds")
    print(f"Total work time for normal priority workers: {total_work_time(df, 'normal')} seconds")
    print(f"Total work time for low priority workers: {total_work_time(df, 'low')} seconds")

    print(f"Average work time for high priority workers: {avg_work_time(df, 'high')} seconds")
    print(f"Average work time for normal priority workers: {avg_work_time(df, 'normal')} seconds")
    print(f"Average work time for low priority workers: {avg_work_time(df, 'low')} seconds")

    print(f"Median work time for high priority workers: {median_work_time(df, 'high')} seconds")
    print(f"Median work time for normal priority workers: {median_work_time(df, 'normal')} seconds")
    print(f"Median work time for low priority workers: {median_work_time(df, 'low')} seconds")

    print(f"Number of high priority workers with 0 work time: {num_workers_no_work(df, 'high')}")
    print(f"Number of normal priority workers with 0 work time: {num_workers_no_work(df, 'normal')}")
    print(f"Number of low priority workers with 0 work time: {num_workers_no_work(df, 'low')}")

    print(f"Average connection probability for high priority workers: {avg_conn_prob(df, 'high')}, backoff times: {df[df['priority'] == 'high']['backoff_time'].mean()}")
    print(f"Average connection probability for normal priority workers: {avg_conn_prob(df, 'normal')}, backoff times: {df[df['priority'] == 'normal']['backoff_time'].mean()}")
    print(f"Average connection probability for low priority workers: {avg_conn_prob(df, 'low')}, backoff times: {df[df['priority'] == 'low']['backoff_time'].mean()}")

print_worker_stats(trad_df, total_work_time, avg_work_time, median_work_time, num_workers_no_work)
print_worker_stats(q_df, total_work_time, avg_work_time, median_work_time, num_workers_no_work)


In [None]:
# Sort the DataFrame by 'timestamp' in descending order
trad_df = trad_df.sort_values(by='timestamp', ascending=False)

# Select the top 5 rows
latest_5_timestamps = trad_df.head(5)

# Print the latest 5 timestamps
print("The latest 5 timestamps in the DataFrame are:")
print(latest_5_timestamps['timestamp'])
# print timestamp datatype
print(trad_df['timestamp'].dtype)
# remove any int64 matching the top timestamp
print(trad_df[trad_df['timestamp'] == latest_5_timestamps['timestamp'].values[0]])

In [None]:
import matplotlib.pyplot as plt
import numpy as np

def plot_worker_stats(df1, df2, labels):
    # Create subplots
    fig, axs = plt.subplots(3, 2, figsize=(8, 8))

    # Define the bar width and positions
    bar_width = 0.35
    index = np.arange(3)  # for 'high', 'normal', 'low'

    # Define the color palette using tab20
    colors = list(plt.cm.tab20.colors)

    # Plot total work time by priority
    axs[0, 0].bar(index, [total_work_time(df1, 'high'), total_work_time(df1, 'normal'), total_work_time(df1, 'low')], bar_width, label=labels[0], color=[colors[0], colors[2], colors[4]])
    axs[0, 0].bar(index + bar_width, [total_work_time(df2, 'high'), total_work_time(df2, 'normal'), total_work_time(df2, 'low')], bar_width, label=labels[1], color=[colors[1], colors[3], colors[5]])
    axs[0, 0].set_title('Total Connected Time by Priority')
    axs[0, 0].set_ylabel('Total Connected Time (seconds)')
    axs[0, 0].set_xticks(index + bar_width / 2)
    axs[0, 0].set_xticklabels(['high', 'medium', 'low'])
    axs[0, 0].legend()
    axs[0, 0].grid()

    # Plot number of workers with 0 work time by priority
    axs[0, 1].bar(index, [num_workers_no_work(df1, 'high'), num_workers_no_work(df1, 'normal'), num_workers_no_work(df1, 'low')], bar_width, label=labels[0], color=[colors[0], colors[2], colors[4]])
    axs[0, 1].bar(index + bar_width, [num_workers_no_work(df2, 'high'), num_workers_no_work(df2, 'normal'), num_workers_no_work(df2, 'low')], bar_width, label=labels[1], color=[colors[1], colors[3], colors[5]])
    axs[0, 1].set_title('Number of Workers with 0 Connected Time by Priority')
    axs[0, 1].set_ylabel('Number of Workers')
    axs[0, 1].set_xticks(index + bar_width / 2)
    axs[0, 1].set_xticklabels(['high', 'medium', 'low'])
    axs[0, 1].legend()
    axs[0, 1].grid()

    # Plot average work time by priority
    axs[1, 0].bar(index, [avg_work_time(df1, 'high'), avg_work_time(df1, 'normal'), avg_work_time(df1, 'low')], bar_width, label=labels[0], color=[colors[0], colors[2], colors[4]])
    axs[1, 0].bar(index + bar_width, [avg_work_time(df2, 'high'), avg_work_time(df2, 'normal'), avg_work_time(df2, 'low')], bar_width, label=labels[1], color=[colors[1], colors[3], colors[5]])
    axs[1, 0].set_title('Average Connected Time by Priority')
    axs[1, 0].set_ylabel('Average Connected Time (seconds)')
    axs[1, 0].set_xticks(index + bar_width / 2)
    axs[1, 0].set_xticklabels(['high', 'medium', 'low'])
    axs[1, 0].legend()
    axs[1, 0].grid()

    # Plot connection attempts by priority
    axs[1, 1].bar(index, [df1[df1['priority'] == 'high']['connection_attempts'].sum(), df1[df1['priority'] == 'normal']['connection_attempts'].sum(), df1[df1['priority'] == 'low']['connection_attempts'].sum()], bar_width, label=labels[0], color=[colors[0], colors[2], colors[4]])
    axs[1, 1].bar(index + bar_width, [df2[df2['priority'] == 'high']['connection_attempts'].sum(), df2[df2['priority'] == 'normal']['connection_attempts'].sum(), df2[df2['priority'] == 'low']['connection_attempts'].sum()], bar_width, label=labels[1], color=[colors[1], colors[3], colors[5]])
    axs[1, 1].set_title('Connection Attempts by Priority')
    axs[1, 1].set_ylabel('Connection Attempts')
    axs[1, 1].set_xticks(index + bar_width / 2)
    axs[1, 1].set_xticklabels(['high', 'medium', 'low'])
    axs[1, 1].legend()
    axs[1, 1].grid()

    # Plot connection probability by priority
    axs[2, 0].bar(index, [df1[df1['priority'] == 'high']['connection_probability'].mean(), df1[df1['priority'] == 'normal']['connection_probability'].mean(), df1[df1['priority'] == 'low']['connection_probability'].mean()], bar_width, label=labels[0], color=[colors[0], colors[2], colors[4]])
    axs[2, 0].bar(index + bar_width, [df2[df2['priority'] == 'high']['connection_probability'].mean(), df2[df2['priority'] == 'normal']['connection_probability'].mean(), df2[df2['priority'] == 'low']['connection_probability'].mean()], bar_width, label=labels[1], color=[colors[1], colors[3], colors[5]])
    axs[2, 0].set_title('Connection Probability by Priority')
    axs[2, 0].set_ylabel('Connection Probability')
    axs[2, 0].set_xticks(index + bar_width / 2)
    axs[2, 0].set_xticklabels(['high', 'medium', 'low'])
    axs[2, 0].legend()
    axs[2, 0].grid()

    # Plot backoff count by priority
    axs[2, 1].bar(index, [df1[df1['priority'] == 'high']['backoff_count'].sum(), df1[df1['priority'] == 'normal']['backoff_count'].sum(), df1[df1['priority'] == 'low']['backoff_count'].sum()], bar_width, label=labels[0], color=[colors[0], colors[2], colors[4]])
    axs[2, 1].bar(index + bar_width, [df2[df2['priority'] == 'high']['backoff_count'].sum(), df2[df2['priority'] == 'normal']['backoff_count'].sum(), df2[df2['priority'] == 'low']['backoff_count'].sum()], bar_width, label=labels[1], color=[colors[1], colors[3], colors[5]])
    axs[2, 1].set_title('Backoff Count by Priority')
    axs[2, 1].set_ylabel('Backoff Count')
    axs[2, 1].set_xticks(index + bar_width / 2)
    axs[2, 1].set_xticklabels(['high', 'medium', 'low'])
    axs[2, 1].legend()
    axs[2, 1].grid()

    # Adjust spacing between subplots
    plt.tight_layout()
    # Save the plot
    plt.savefig('stats_by_priority_comparison.png', dpi=300)
    # Show the plot
    plt.show()

# Example usage
plot_worker_stats(trad_df, q_df, labels=['Traditional', 'Quantum'])


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patheffects as path_effects
from math import pi

def normalize_data(data):
    """Normalize the data to a range of 0 to 1."""
    min_val = np.min(data)
    max_val = np.max(data)
    return (data - min_val) / (max_val - min_val)

def plot_radar_chart(df1, df2, labels):
    # Define the categories and number of variables
    categories = ['Total Connected Time', 'Clients with 0 Connected Time', 'Connection Rejected Count', 'Connection Attempts']
    num_vars = len(categories) + 1  # +1 for Connection Probability which won't be normalized

    # Aggregate the data for each priority level
    priorities = ['high', 'normal', 'low']
    data1 = []
    data2 = []

    for priority in priorities:
        # Debug prints
        print(f"Priority: {priority}")
        
        data1.append([
            total_work_time(df1, priority),
            num_workers_no_work(df1, priority),
            df1[df1['priority'] == priority]['backoff_count'].sum(),
            df1[df1['priority'] == priority]['connection_attempts'].sum(),
            df1[df1['priority'] == priority]['connection_probability'].mean()  # Raw value
        ])
        data2.append([
            total_work_time(df2, priority),
            num_workers_no_work(df2, priority),
            df2[df2['priority'] == priority]['backoff_count'].sum(),
            df2[df2['priority'] == priority]['connection_attempts'].sum(),
            df2[df2['priority'] == priority]['connection_probability'].mean()  # Raw value
        ])

#    print("Raw values for df1:")
#    for priority, values in zip(priorities, data1):
#        print(f"{priority.capitalize()} Priority:")
#        for category, value in zip(categories + ['Connection Probability'], values):
#            print(f"  {category}: {value}")
#
#    print("\nRaw values for df2:")
#    for priority, values in zip(priorities, data2):
#        print(f"{priority.capitalize()} Priority:")
#        for category, value in zip(categories + ['Connection Probability'], values):
#            print(f"  {category}: {value}")

    # Normalize the data except for "Connection Probability"
    data1 = np.array(data1)
    data2 = np.array(data2)
    combined_data = np.concatenate((data1[:, :4], data2[:, :4]), axis=0)  # Exclude "Connection Probability"
    normalized_data = normalize_data(combined_data)
    data1_normalized = np.hstack((normalized_data[:len(data1)], data1[:, 4:5]))
    data2_normalized = np.hstack((normalized_data[len(data1):], data2[:, 4:5]))

    # Create subplots for each priority level
    fig, axs = plt.subplots(3, 1, figsize=(10, 18), subplot_kw=dict(polar=True))

    # Define colors for each priority level
    colors = plt.get_cmap('tab20')

    priority_colors = {
        'high': (colors(0), colors(0)),
        'normal': (colors(2), colors(2)),
        'low': (colors(4), colors(4))
    }

    for i, priority in enumerate(priorities):
        # Create radar chart for each priority level
        values1 = data1_normalized[i].tolist()
        values2 = data2_normalized[i].tolist()

        # Add the first value to the end to close the circle
        values1 += values1[:1]
        values2 += values2[:1]

        # Compute angle for each category
        angles = [n / float(num_vars) * 2 * pi for n in range(num_vars)]
        angles += angles[:1]

        # Plot data
        axs[i].set_theta_offset(pi / 2)
        axs[i].set_theta_direction(-1)

        # Draw one axe per variable and add labels
        axs[i].set_xticks(angles[:-1])
        axs[i].set_xticklabels(categories + ['Connection Probability'], fontsize=12)

        # Plot data for df1 with dashed line
        axs[i].plot(angles, values1, linewidth=2, linestyle='dashed', label=labels[0], color=priority_colors[priority][0])
        axs[i].fill(angles, values1, alpha=0.25, color=priority_colors[priority][0])

        # Plot data for df2 with solid line and darker border
        axs[i].plot(angles, values2, linewidth=2, linestyle='solid', label=labels[1], color=priority_colors[priority][1])
        axs[i].fill(angles, values2, alpha=0.25, color=priority_colors[priority][1])
        axs[i].plot(angles, values2, linewidth=4, linestyle='solid', color=priority_colors[priority][1], alpha=0.5)  # Darker border

        # Add labels for connection probabilities
        #text1 = axs[i].text(angles[-2], values1[-2] + 0.1, f'{values1[-2]:.2f}', horizontalalignment='left', size=10, color=priority_colors[priority][0], weight='semibold')
        #text2 = axs[i].text(angles[-2], values2[-2] - 0.1, f'{values2[-2]:.2f}', horizontalalignment='left', size=10, color=priority_colors[priority][1], weight='semibold')
        #text1.set_path_effects([path_effects.Stroke(linewidth=3, foreground='white'), path_effects.Normal()])
        #text2.set_path_effects([path_effects.Stroke(linewidth=3, foreground='white'), path_effects.Normal()])

        # Add title and legend
        title = 'Medium Priority Clients' if priority == 'normal' else f'{priority.capitalize()} Priority Clients'
        axs[i].set_title(title, size=15, color='black', y=1.05)
        axs[i].legend(loc='upper right', bbox_to_anchor=(1.1, 1.1))

    # Adjust layout
    plt.tight_layout()
    # Save the plot
    plt.savefig('radar_chart_by_priority_comparison.png', dpi=300, bbox_inches='tight')
    # Show the plot
    plt.show()

# Example usage
plot_radar_chart(trad_df, q_df, labels=['TNN', 'QNN'])

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import matplotlib.patches as mpatches
import matplotlib.lines as mlines

def plot_combined_box_plot_by_priority(df1, df2, df1_name, df2_name):
    # Combine both dataframes and add a column to distinguish them
    df1['source'] = df1_name
    df2['source'] = df2_name
    combined_df = pd.concat([df1, df2])

    # Define the order of the categories
    priority_order = ['high', 'normal', 'low']

    # Extract colors from the tab20 palette
    tab20_colors = plt.get_cmap('tab20').colors

    # Define color mapping for each priority and source
    color_mapping = {
        df1_name: {
            'high': tab20_colors[0],
            'normal': tab20_colors[2],
            'low': tab20_colors[4]
        },
        df2_name: {
            'high': tab20_colors[1],
            'normal': tab20_colors[3],
            'low': tab20_colors[5]
        }
    }

    # Create the figure and axis
    fig, ax = plt.subplots(figsize=(8, 6))

    # Plot box plots for each priority and source
    for i, priority in enumerate(priority_order):
        df1_data = combined_df[(combined_df['priority'] == priority) & (combined_df['source'] == df1_name)]['total_work_time']
        df2_data = combined_df[(combined_df['priority'] == priority) & (combined_df['source'] == df2_name)]['total_work_time']
        
        # Plot box plot for df1
        bp1 = ax.boxplot(df1_data, positions=[i - 0.2], widths=0.4, patch_artist=True)
        for box in bp1['boxes']:
            box.set_facecolor(color_mapping[df1_name][priority])
            box.set_alpha(0.7)
        for whisker in bp1['whiskers']:
            whisker.set_color(color_mapping[df1_name][priority])
        for cap in bp1['caps']:
            cap.set_color(color_mapping[df1_name][priority])
        for median in bp1['medians']:
            median.set_color('black')
            # Add median label
            median_value = median.get_ydata()[0]
            ax.text(i - 0.2, median_value, f'{median_value:.2f}', ha='center', va='bottom', color='black')

        # Plot box plot for df2
        bp2 = ax.boxplot(df2_data, positions=[i + 0.2], widths=0.4, patch_artist=True)
        for box in bp2['boxes']:
            box.set_facecolor(color_mapping[df2_name][priority])
            box.set_alpha(0.7)
        for whisker in bp2['whiskers']:
            whisker.set_color(color_mapping[df2_name][priority])
        for cap in bp2['caps']:
            cap.set_color(color_mapping[df2_name][priority])
        for median in bp2['medians']:
            median.set_color('black')
            # Add median label
            median_value = median.get_ydata()[0]
            ax.text(i + 0.2, median_value, f'{median_value:.2f}', ha='center', va='bottom', color='black')

    # Set the x-axis labels and title
    ax.set_xlabel('Priority')
    ax.set_ylabel('Total Connected Time (seconds)')
    ax.set_title('Box Plot of Connected Time by Priority (Traditional vs Quantum Networks)')
    ax.set_xticks(np.arange(len(priority_order)))
    ax.set_xticklabels(priority_order)

    # Create custom legend handles
    legend_handles = []
    for source, priority_colors in color_mapping.items():
        for priority, color in priority_colors.items():
            legend_handles.append(mpatches.Patch(color=color, label=f'{source} ({priority})'))

    # Add a custom legend handle for the median lines
    median_handle = mlines.Line2D([], [], color='black', label='Median')

    # Add the legend
    #ax.legend(handles=legend_handles + [median_handle], title='Source and Priority', bbox_to_anchor=(1.05, 1), loc='upper left')

    plt.show()

# Example usage
plot_combined_box_plot_by_priority(trad_df, q_df, "Traditional network", "Quantum network")

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

def plot_bar_plot_by_priority(df1, df2, df1_name, df2_name):
    # Calculate average work time by priority for both dataframes
    avg_work_time_df1 = df1.groupby('priority')['total_work_time'].mean().reset_index()
    avg_work_time_df2 = df2.groupby('priority')['total_work_time'].mean().reset_index()

    # Add a column to distinguish the dataframes
    avg_work_time_df1['source'] = df1_name
    avg_work_time_df2['source'] = df2_name

    # Combine the dataframes
    combined_df = pd.concat([avg_work_time_df1, avg_work_time_df2])

    # Define the order of the categories
    priority_order = ['high', 'normal', 'low']

    # Sort the combined dataframe by priority
    combined_df['priority'] = pd.Categorical(combined_df['priority'], categories=priority_order, ordered=True)
    combined_df = combined_df.sort_values('priority')

    # Create the bar plot
    fig, ax = plt.subplots(figsize=(15, 6))

    # Define the positions for the bars
    bar_width = 0.35
    index = np.arange(len(priority_order))

    # Extract colors from the tab20 palette
    tab20_colors = plt.get_cmap('tab20').colors

    # Define color mapping for each priority and source
    color_mapping = {
        'high': [tab20_colors[0], tab20_colors[1]],
        'normal': [tab20_colors[2], tab20_colors[3]],
        'low': [tab20_colors[4], tab20_colors[5]]
    }

    # Plot bars for each priority and source
    for i, priority in enumerate(priority_order):
        df1_mean = combined_df[(combined_df['priority'] == priority) & (combined_df['source'] == df1_name)]['total_work_time'].values[0]
        df2_mean = combined_df[(combined_df['priority'] == priority) & (combined_df['source'] == df2_name)]['total_work_time'].values[0]
        ax.bar(index[i], df1_mean, bar_width, label=f'{df1_name} ({priority})' if i == 0 else "", color=color_mapping[priority][0])
        ax.bar(index[i] + bar_width, df2_mean, bar_width, label=f'{df2_name} ({priority})' if i == 0 else "", color=color_mapping[priority][1])

    # Set the x-axis labels and title
    ax.set_xlabel('Priority')
    ax.set_ylabel('Average Connected Time (seconds)')
    ax.set_title('Average Connected Time by Priority')
    ax.set_xticks(index + bar_width / 2)
    ax.set_xticklabels(priority_order)
    ax.legend()

    plt.show()

# Example usage
plot_bar_plot_by_priority(trad_df, q_df, "Traditional network", "Quantum network")

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

def plot_violin_plot_by_priority(df1, df2, df1_name, df2_name):
    fig, ax = plt.subplots(figsize=(15, 6))
    # Combine both dataframes and add a column to distinguish them
    df1['source'] = df1_name
    df2['source'] = df2_name
    combined_df = pd.concat([df1, df2])

    # Define the order of the categories
    priority_order = ['high', 'normal', 'low']

    sns.violinplot(x='priority', y='total_work_time', hue='source', data=combined_df, split=True, ax=ax, palette='tab20', order=priority_order)

    ax.set_title('Violin Plot of Work Time by Priority')
    ax.set_ylabel('Total Work Time (seconds)')
    plt.show()

# Example usage
plot_violin_plot_by_priority(trad_df, q_df, "traditional network", "quantum network")

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

def plot_violin_plot_by_priority(df1, df2, df1_name, df2_name):
    fig, ax = plt.subplots(figsize=(15, 6))
    
    # Combine both dataframes and add a column to distinguish them
    df1['source'] = df1_name
    df2['source'] = df2_name
    combined_df = pd.concat([df1, df2])

    # Define the order of the categories
    priority_order = ['high', 'normal', 'low']

    # Extract colors from the tab20 palette
    tab20_colors = plt.get_cmap('tab20').colors

    # Define color mapping for each priority and source
    color_mapping = {
        'high': [tab20_colors[0], tab20_colors[1]],
        'normal': [tab20_colors[2], tab20_colors[3]],
        'low': [tab20_colors[4], tab20_colors[5]]
    }

    # Plot violins for each priority and source
    for i, priority in enumerate(priority_order):
        df1_data = combined_df[(combined_df['priority'] == priority) & (combined_df['source'] == df1_name)]['total_work_time']
        df2_data = combined_df[(combined_df['priority'] == priority) & (combined_df['source'] == df2_name)]['total_work_time']
        
        # Plot violin for df1
        parts1 = ax.violinplot(df1_data, positions=[i], widths=0.4, showmeans=False, showmedians=False)
        for pc in parts1['bodies']:
            pc.set_facecolor(color_mapping[priority][0])
            pc.set_edgecolor('black')
            pc.set_alpha(0.5)  # Set transparency

        # Plot violin for df2
        parts2 = ax.violinplot(df2_data, positions=[i], widths=0.4, showmeans=False, showmedians=False)
        for pc in parts2['bodies']:
            pc.set_facecolor(color_mapping[priority][1])
            pc.set_edgecolor('black')
            pc.set_alpha(0.5)  # Set transparency

    # Set the x-axis labels and title
    ax.set_xlabel('Priority')
    ax.set_ylabel('Total Work Time (seconds)')
    ax.set_title('Violin Plot of Work Time by Priority')
    ax.set_xticks(np.arange(len(priority_order)))
    ax.set_xticklabels(priority_order)
    ax.legend([df1_name, df2_name], loc='upper right')

    plt.show()

# Example usage
plot_violin_plot_by_priority(trad_df, q_df, "traditional network", "quantum network")

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

def plot_combined_violin_plot_by_priority(df1, df2, df1_name, df2_name):
    # Combine both dataframes and add a column to distinguish them
    df1['source'] = df1_name
    df2['source'] = df2_name
    combined_df = pd.concat([df1, df2])

    # Define the order of the categories
    priority_order = ['high', 'normal', 'low']

    # Create the figure and axis
    fig, ax = plt.subplots(figsize=(7, 6))  # Adjusted figure size to be narrower

    # Plot the violin plot
    sns.violinplot(x='priority', y='connection_probability', hue='source', data=combined_df, order=priority_order, palette='tab20', split=True, ax=ax)

    # Calculate and label medians and averages
    tab20_colors = plt.get_cmap('tab20').colors
    for i, priority in enumerate(priority_order):
        for j, (source, color) in enumerate(zip([df1_name, df2_name], [tab20_colors[0], tab20_colors[1]])):
            median_val = combined_df[(combined_df['priority'] == priority) & (combined_df['source'] == source)]['connection_probability'].median()
            mean_val = combined_df[(combined_df['priority'] == priority) & (combined_df['source'] == source)]['connection_probability'].mean()
            offset = -0.2 if j == 0 else 0.2  # Offset for placing the label on the corresponding side
            ax.text(i + offset, mean_val, f'{mean_val:.2f}', ha='center', va='bottom', color='black', fontsize=10, weight=None, rotation=30)
            ax.hlines(mean_val, i - 0.2, i + 0.2, colors=color, linestyles='dashed', linewidth=1)

    # Set the x-axis labels and title
    ax.set_xlabel('Priority')
    ax.set_ylabel('Connection Probability')
    ax.set_title('Connection Probability by Priority')
    ax.grid(True)


    # Capitalize x-tick labels
    ax.set_xticklabels([label.get_text().capitalize() for label in ax.get_xticklabels()])


    # Add the legend
    # Create custom legend handles
    legend_handles = [
        plt.Line2D([0], [0], color=tab20_colors[0], lw=4, label=f'{df1_name}'),
        plt.Line2D([0], [0], color=tab20_colors[1], lw=4, label=f'{df2_name}'),
        plt.Line2D([0], [0], color='black', lw=2, linestyle='dashed', label='Average')
    ]

    # Add the legend
    ax.legend(handles=legend_handles, title='Source', loc='upper right')  # Adjusted bbox_to_anchor

    plt.savefig('connection_probability_violin_graphs.png')

    plt.show()

# Example usage
plot_combined_violin_plot_by_priority(trad_df, q_df, "Traditional Network", "Quantum Network")

In [None]:
import numpy as np

expected = np.array([
    np.array([0.3, 0.2, 0.05]),
    np.array([0.4, 0.2, 0.1]),
    np.array([0.4, 0.2, 0.1]),
    np.array([0.2, 0.2, 0.2]),
    np.array([0.6, 0.3, 0.1]),
    
])
observed = np.array([
    np.array([0.35, 0.20, 0.14]),
    np.array([0.44, 0.18, 0.11]),
    np.array([0.18, 0.27, 0.23]),
    np.array([0.45, 0.22, 0.18]),
])

for i in range(len(expected)-1):
    print(f"Euclidean distance between expected and observed values for row {i}: {np.linalg.norm(expected[i] - observed[i])}")
    print(f"Cosine similarity between expected and observed values for row {i}: {np.dot(expected[i], observed[i]) / (np.linalg.norm(expected[i]) * np.linalg.norm(observed[i]))}")

tnn = [1.06, 3.8, 6.1]  # Network 1 output
qnn = [1.2, 2.6, 5.0]  # Network 2 output
# calculate euclidean distance and cosine similarity between tnn and qnn
print(f"Euclidean distance between tnn and qnn: {np.linalg.norm(np.array(tnn) - np.array(qnn))}")
print(f"Cosine similarity between tnn and qnn: {np.dot(tnn, qnn) / (np.linalg.norm(tnn) * np.linalg.norm(qnn))}")
