In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os as os

In [None]:
file_names = []
source_folder = 'graph_data'
for file_name in os.listdir(source_folder):
    if file_name.endswith('.csv'):
        file_names.append(file_name)


In [None]:
def erros_vs_discon_points(file_name, name):
    df = pd.read_csv(source_folder +"/"+ file_name)
# Sort data by sum_of_errors in descending order
    df_sorted = df.sort_values(by="sum_of_errors", ascending=False).reset_index(drop=True)

    # Create the figure and primary axis
    fig, ax1 = plt.subplots(figsize=(12, 6))

    # Plot the histogram of sum_of_errors on the primary y-axis
    sns.histplot(df_sorted["sum_of_errors"], bins=10, color="gray", alpha=0.5, ax=ax1, label="Histogram of Errors")
    ax1.set_xlabel("Sum of Errors (Sorted Highest to Lowest)")
    ax1.set_ylabel("Frequency of Errors", color="gray")
    ax1.tick_params(axis="y", labelcolor="gray")

    # Create a secondary y-axis for disconnected points
    ax2 = ax1.twinx()
    ax2.plot(df_sorted["sum_of_errors"], df_sorted["continuous_disconnected_points"], marker="o", label="Continuous Disconnected Points", color="blue")
    ax2.set_ylabel("Disconnected Points", color="black")
    ax2.tick_params(axis="y", labelcolor="black")

    # Combine legends
    lines1, labels1 = ax1.get_legend_handles_labels()
    lines2, labels2 = ax2.get_legend_handles_labels()
    ax2.legend(lines1 + lines2, labels1 + labels2, loc="upper left")

    # Add a title and grid
    plt.title("Disconnected Points vs. Histogram of Sum of Errors with " + name)    
    plt.grid(axis="y", linestyle="--", alpha=0.5)

    # Show the plot
    plt.tight_layout()
    plt.show() 

In [None]:
for f in file_names:
    #do_everything(f, f.split(".")[0])
    erros_vs_discon_points(f, f.split(".")[0])

In [None]:
# new way


# Replace with the path to your CSV file
def scatterplot(file_path, name):
    df = pd.read_csv(file_path)

    # Scatter plot with regression line for continuous_disconnected_points vs. sum_of_errors
    plt.figure(figsize=(12, 6))
    sns.regplot(x="sum_of_errors", y="continuous_disconnected_points", data=df, scatter_kws={'color':'blue'}, line_kws={'color':'red'})
    plt.xlabel("Sum of Errors")
    plt.ylabel("Continuous Disconnected Points")
    plt.title("Sum of Errors vs. Continuous Disconnected Points with " + name)
    plt.show()

for f in file_names:
    scatterplot(source_folder +"/"+ f, f.split(".")[0])

In [None]:


def heatmap(file_path, name):
    df = pd.read_csv(file_path)

    # Calculate the correlation matrix
    corr_matrix = df.corr()

    # Plot the heatmap
    plt.figure(figsize=(10, 6))
    sns.heatmap(corr_matrix, annot=True, cmap="coolwarm", fmt=".2f", linewidths=0.5)
    plt.title("Correlation Heatmap of Variables with " + name)
    plt.show()

for f in file_names:
    heatmap(source_folder +"/"+ f, f.split(".")[0])