In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os as os

In [None]:
file_names = []
source_folder = 'graph_data'
for file_name in os.listdir(source_folder):
    if file_name.endswith('.csv'):
        file_names.append(file_name)


In [None]:

def df_plot(file_name, name):
    df = (pd.read_csv(source_folder + "/" + file_name))
    # Create bins for the histogram
    bin_edges = pd.cut(df["sum_of_errors"], bins=15)

    # Compute averages of disconnected points per bin
    average_points = df.groupby(bin_edges, observed=False)[
        ["continuous_disconnected_points"]
    ].mean()

   

    fig, ax1 = plt.subplots(figsize=(12, 6))

    # Plot the histogram on the primary axis
    sns.histplot(df["sum_of_errors"], bins=15, kde=False, color="skyblue", ax=ax1, label="Histogram")
    ax1.set_xlabel("Sum of Errors")
    ax1.set_ylabel("Frequency", color="skyblue")
    ax1.tick_params(axis="y", labelcolor="skyblue")

    # Create the secondary axis
    ax2 = ax1.twinx()
    avg_bin_centers = [interval.mid for interval in average_points.index.categories]

    # Plot the average disconnected points on the secondary axis
    ax2.plot(avg_bin_centers, average_points["continuous_disconnected_points"], marker="o", color="red", label="Avg Continuous Disconnected")
    ax2.set_ylabel("Average Disconnected Points", color="black")
    ax2.tick_params(axis="y", labelcolor="black")

    # Combine legends
    lines, labels = ax1.get_legend_handles_labels()
    lines2, labels2 = ax2.get_legend_handles_labels()
    ax2.legend(lines + lines2, labels + labels2, loc="upper left")

    # Add a grid for better readability
    plt.grid(axis="y", linestyle="--", alpha=0.7)

    # Title and layout adjustments
    plt.title(name)
    plt.tight_layout()

    #save the plot and overwrite the old one
    plt.savefig(source_folder + '/' + name + '.png')

    # Show the plot
    plt.show()

In [None]:
for f in file_names:
    #df_plot(file_name='montreal_betweenness.csv', name='montreal_betweenness')
    df_plot(file_name=f, name=f.split('.')[0])