To test: bigger range of edge values; 1 central node; different params: density, ...; different graph families; how algorithms behave for different terminals (maybe one of them gives more consistant results than other?)

USE TO PLOT DATA FROM DATABASE GRAPHS

In [11]:
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Folder paths
folder_path = './results/GraphInstances/B'
output_folder = os.path.join(folder_path, 'plots')

# Create the output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# List to accumulate DataFrames
data_frames = []

# Iterate over all CSV files in the folder and accumulate data
for filename in os.listdir(folder_path):
    if filename.endswith('.results'):
        file_path = os.path.join(folder_path, filename)
        df = pd.read_csv(file_path)
        print(f"Processing file: {filename}")
        data_frames.append(df)

# Combine all DataFrames into one
if data_frames:
    combined_df = pd.concat(data_frames, ignore_index=True)
else:
    print("No files to process.")
    exit()
    
# Function to create and save plots
def plot_data(df, x_col, y_cols, title, x_label, y_label, output_file, log_scale=False):
    plt.figure(figsize=(12, 6))
    for y in y_cols:
        sns.lineplot(data=df, x=x_col, y=y, label=y, ci=None)
    plt.title(title)
    plt.xlabel(x_label)
    plt.ylabel(y_label)
    plt.legend()
    if log_scale:
        plt.yscale('log')  # Apply log scale to the y-axis
    plt.savefig(output_file)
    plt.close()

# Plot Costs by Number of Terminals
plot_data(
    combined_df,
    x_col="NumberOfTerminals",
    y_cols=["TakahashiMatsuyamaCost", "KouMarkowskyBermanCost"],
    title="Costs vs. Number of Terminals (All Files)",
    x_label="Number of Terminals",
    y_label="Cost",
    output_file=os.path.join(output_folder, "all_terminals_cost.png")
)

# Plot Times by Number of Terminals
plot_data(
    combined_df,
    x_col="NumberOfTerminals",
    y_cols=["TakahashiMatsuyamaTime", "KouMarkowskyBermanTime"],
    title="Times vs. Number of Terminals (All Files)",
    x_label="Number of Terminals",
    y_label="Time",
    output_file=os.path.join(output_folder, "all_terminals_time.png")
)

# Plot Costs by Number of Nodes
plot_data(
    combined_df,
    x_col="NumberOfNodes",
    y_cols=["TakahashiMatsuyamaCost", "KouMarkowskyBermanCost"],
    title="Costs vs. Number of Nodes (All Files)",
    x_label="Number of Nodes",
    y_label="Cost",
    output_file=os.path.join(output_folder, "all_nodes_cost.png")
)

# Plot Times by Number of Nodes
plot_data(
    combined_df,
    x_col="NumberOfNodes",
    y_cols=["TakahashiMatsuyamaTime", "KouMarkowskyBermanTime"],
    title="Times vs. Number of Nodes (All Files)",
    x_label="Number of Nodes",
    y_label="Time",
    output_file=os.path.join(output_folder, "all_nodes_time.png")
)

# Additional Plots: TMduration* and KMBduration*
tm_duration_columns = [col for col in combined_df.columns if col.startswith("TMduration")]
kmb_duration_columns = [col for col in combined_df.columns if col.startswith("KMBduration")]

# Plot TMduration* metrics
plot_data(
    combined_df,
    x_col="NumberOfNodes",
    y_cols=tm_duration_columns,
    title="TM Durations vs. Number of Nodes (All Files)",
    x_label="Number of Nodes",
    y_label="Duration",
    output_file=os.path.join(output_folder, "all_nodes_tm_durations.png")
)

# Plot KMBduration* metrics
plot_data(
    combined_df,
    x_col="NumberOfNodes",
    y_cols=kmb_duration_columns,
    title="KMB Durations vs. Number of Nodes (All Files)",
    x_label="Number of Nodes",
    y_label="Duration",
    output_file=os.path.join(output_folder, "all_nodes_kmb_durations.png")
)

# Plot KMBduration* metrics with log scale
plot_data(
    combined_df,
    x_col="NumberOfNodes",
    y_cols=kmb_duration_columns,
    title="KMB Durations vs. Number of Nodes (All Files)",
    x_label="Number of Nodes",
    y_label="Duration",
    output_file=os.path.join(output_folder, "all_nodes_kmb_durations_log_scale.png"),
    log_scale=True  # Enable log scale
)

combined_df["KMBTime_Without_Step1"] = combined_df["KouMarkowskyBermanTime"] - combined_df["KMBdurationStep1"]

# Plot KMBTime, TMTime, Step1 duration, and the difference
plot_data(
    combined_df,
    x_col="NumberOfNodes",
    y_cols=["KouMarkowskyBermanTime", "TakahashiMatsuyamaTime", "KMBdurationStep1", "KMBTime_Without_Step1"],
    title="Comparison of Total Times and KMB Step 1 Durations",
    x_label="Number of Nodes",
    y_label="Time",
    output_file=os.path.join(output_folder, "kmb_vs_tm_time_breakdown.png")
)

print("All plots have been generated.")

Processing file: b04.stp.results
Processing file: b02.stp.results
Processing file: b14.stp.results
Processing file: b09.stp.results
Processing file: b05.stp.results
Processing file: b08.stp.results
Processing file: b15.stp.results
Processing file: b10.stp.results
Processing file: b12.stp.results
Processing file: b17.stp.results
Processing file: b11.stp.results
Processing file: b07.stp.results
Processing file: b18.stp.results
Processing file: b03.stp.results
Processing file: b06.stp.results
Processing file: b13.stp.results
Processing file: b01.stp.results
Processing file: b16.stp.results



The `ci` parameter is deprecated. Use `errorbar=None` for the same effect.

  sns.lineplot(data=df, x=x_col, y=y, label=y, ci=None)

The `ci` parameter is deprecated. Use `errorbar=None` for the same effect.

  sns.lineplot(data=df, x=x_col, y=y, label=y, ci=None)

The `ci` parameter is deprecated. Use `errorbar=None` for the same effect.

  sns.lineplot(data=df, x=x_col, y=y, label=y, ci=None)

The `ci` parameter is deprecated. Use `errorbar=None` for the same effect.

  sns.lineplot(data=df, x=x_col, y=y, label=y, ci=None)

The `ci` parameter is deprecated. Use `errorbar=None` for the same effect.

  sns.lineplot(data=df, x=x_col, y=y, label=y, ci=None)

The `ci` parameter is deprecated. Use `errorbar=None` for the same effect.

  sns.lineplot(data=df, x=x_col, y=y, label=y, ci=None)

The `ci` parameter is deprecated. Use `errorbar=None` for the same effect.

  sns.lineplot(data=df, x=x_col, y=y, label=y, ci=None)

The `ci` parameter is deprecated. Use `errorbar=None` for the same e

All plots have been generated.


In [6]:
combined_df

Unnamed: 0,NumberOfTerminals,NumberOfNodes,NumberOfEdges,DreyfusWagnerCost,DreyfusWagnerTime,TakahashiMatsuyamaCost,TakahashiMatsuyamaTime,TMdurationNotNecessary,TMdurationInit,TMdurationPrepareForNextIteration,TMdurationMainLoop,KouMarkowskyBermanCost,KouMarkowskyBermanTime,KMBdurationNotNecessary,KMBdurationStep1,KMBdurationStep2,KMBdurationStep3,KMBdurationStep4,KMBdurationStep5
0,9,50,100,0,0,86,2692,431,8,606,1385,62,4337,66,3749,69,187,81,83
1,13,50,63,0,0,89,1886,333,7,418,923,90,3121,77,2448,89,224,88,99
2,25,100,125,0,0,236,6882,597,5,1554,3864,237,18642,141,16662,270,946,167,178
3,38,75,94,0,0,220,9374,495,6,2700,4994,226,22441,161,18400,494,2453,188,197
4,13,50,100,0,0,62,3507,423,5,925,1861,62,7393,76,6674,105,247,101,102
5,19,75,94,0,0,114,3486,434,6,875,1747,104,8509,94,7246,220,550,117,113
6,50,100,125,0,0,336,16645,730,15,4706,9343,325,41461,209,33380,858,5396,263,293
7,13,75,150,0,0,110,5322,651,10,1286,2914,98,13091,83,12290,111,253,108,130
8,38,75,150,0,0,188,12812,697,8,4871,6109,174,52101,157,47889,731,2367,183,199
9,25,100,200,0,0,133,11701,847,5,3903,5979,134,37358,133,35253,273,984,177,187


###USE IT FOR GENERATED GRAPHS###

In [4]:
cimport os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Folder paths
folder_path = './results'
# folder_path = './results/GraphInstances/B'
output_folder = os.path.join(folder_path, 'plots')

# Create the output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# Function to create and save plots for costs and times
def plot_data(df, file_name):
    # Plot Costs by Number of Terminals
    plt.figure(figsize=(12, 6))
    for y in ["TakahashiMatsuyamaCost", "KouMarkowskyBermanCost"]:
        sns.lineplot(data=df, x="NumberOfTerminals", y=y, label=y)
    plt.title(f"Costs vs. Number of Terminals ({file_name})")
    plt.xlabel("Number of Terminals")
    plt.ylabel("Cost")
    plt.legend()
    output_file = os.path.join(output_folder, f"{file_name}_terminals_cost.png")
    plt.savefig(output_file)
    plt.close()

    # Plot Times by Number of Terminals
    plt.figure(figsize=(12, 6))
    for y in ["TakahashiMatsuyamaTime", "KouMarkowskyBermanTime"]:
        sns.lineplot(data=df, x="NumberOfTerminals", y=y, label=y)
    plt.title(f"Times vs. Number of Terminals ({file_name})")
    plt.xlabel("Number of Terminals")
    plt.ylabel("Time")
    plt.legend()
    output_file = os.path.join(output_folder, f"{file_name}_terminals_time.png")
    plt.savefig(output_file)
    plt.close()

    # Plot Costs by Number of Nodes
    plt.figure(figsize=(12, 6))
    for y in ["TakahashiMatsuyamaCost", "KouMarkowskyBermanCost"]:
        sns.lineplot(data=df, x="NumberOfNodes", y=y, label=y)
    plt.title(f"Costs vs. Number of Nodes ({file_name})")
    plt.xlabel("Number of Nodes")
    plt.ylabel("Cost")
    plt.legend()
    output_file = os.path.join(output_folder, f"{file_name}_nodes_cost.png")
    plt.savefig(output_file)
    plt.close()

    # Plot Times by Number of Nodes
    plt.figure(figsize=(12, 6))
    for y in ["TakahashiMatsuyamaTime", "KouMarkowskyBermanTime"]:
        sns.lineplot(data=df, x="NumberOfNodes", y=y, label=y)
    plt.title(f"Times vs. Number of Nodes ({file_name})")
    plt.xlabel("Number of Nodes")
    plt.ylabel("Time")
    plt.legend()
    output_file = os.path.join(output_folder, f"{file_name}_nodes_time.png")
    plt.savefig(output_file)
    plt.close()

# Iterate over all CSV files in the folder
for filename in os.listdir(folder_path):
    if filename.endswith('.results'c):
        file_path = os.path.join(folder_path, filename)
        df = pd.read_csv(file_path)
        print(f"Processing file: {filename}")
        plot_data(df, filename.split('.')[0])  # Pass filename without extension


Processing file: b04.stp.results
Processing file: b02.stp.results
Processing file: b14.stp.results
Processing file: b09.stp.results
Processing file: b05.stp.results
Processing file: b08.stp.results
Processing file: b15.stp.results
Processing file: b10.stp.results
Processing file: b12.stp.results
Processing file: b17.stp.results
Processing file: b11.stp.results
Processing file: b07.stp.results
Processing file: b18.stp.results
Processing file: b03.stp.results
Processing file: b06.stp.results
Processing file: b13.stp.results
Processing file: b01.stp.results
Processing file: b16.stp.results


In [13]:
import os
import requests
from bs4 import BeautifulSoup
import pandas as pd

# URL for the testset B page
base_url = "https://steinlib.zib.de/"
testset_url = "https://steinlib.zib.de/showset.php?B"
output_folder = "./steinlib_data"

# Create the output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# Fetch the webpage
response = requests.get(testset_url)
soup = BeautifulSoup(response.content, "html.parser")

# Extract table rows
table_rows = soup.find_all("tr")[1:]  # Skip the header row

# Initialize a list to store the data
opt_values = []

for row in table_rows:
    cols = row.find_all("td")
    if len(cols) < 5:
        continue

    name = cols[0].text.strip()
    opt_value = cols[4].text.strip()

    # Check if the name is a valid instance (e.g., b01, b02)
    if name.startswith("b"):
        opt_values.append({"Name": name, "Opt": opt_value})

        # Check if there is a link in the first column
        link_tag = cols[0].find("a")
        if link_tag:
            link = link_tag["href"]
            file_url = base_url + link
            file_response = requests.get(file_url)

            # Save the file locally
            with open(os.path.join(output_folder, f"{name}.gr"), "wb") as f:
                f.write(file_response.content)
        else:
            print(f"Warning: No download link found for {name}")

# Save the optimal values to a DataFrame
opt_df = pd.DataFrame(opt_values)

# Save to CSV for later use
opt_df.to_csv(os.path.join(output_folder, "optimal_values.csv"), index=False)

print(f"Downloaded {len(opt_values)} files and saved optimal values to 'optimal_values.csv'")


Downloaded 18 files and saved optimal values to 'optimal_values.csv'


In [14]:
import requests

# URL for the testset B page
testset_url = "https://steinlib.zib.de/showset.php?B"

# Fetch the webpage
response = requests.get(testset_url)

# Check if the request was successful
if response.status_code == 200:
    # Print the raw HTML content
    print(response.text)
else:
    print(f"Failed to fetch the page, status code: {response.status_code}")


<html lang="en">
<head><title>Testset B</title></head>
<body bgcolor="#DDDDDD" text="#000000" link="#0000FF" alink="#FF0000" vlink="#800080"
><p>
<!-- <img src="http://www.zib.de/global/images/zib_logo1.gif" -->

<a href="http://www.zib.de/index.en.html">
   <img src="images/zib_logo1.gif" 
   alt="Goto ZIB" border=0 align=right width=86 height=118>
</a>
<a href="http://www.winforms.phil.tu-bs.de/winforms">
   <img src="images/tulggrbg.gif" 
   alt="Goto TU-Braunschweig" border=0 align=right width=92 height=106> 
</a>
<a href="http://www.mathematik.tu-darmstadt.de/ags/ag7">
   <img src="images/tu-da2.gif" 
   alt="Goto TU-Darmstadt" border=0 align=right width=93 height=110> 
</a>
</p>  
<h1>Testset B</h1>
<!-- $Id: set_B.php,v 1.1 2001/05/14 23:48:19 thor Exp $ -->
<p>These instances are random generated sparse graphs with edge
weights between 1 and 10.</p>
<p>The were introduced in 
<a href="biblio.php?Bea84"><cite>Bea84</cite></a>
and were generated following a scheme outlined in   


In [15]:
import requests
from bs4 import BeautifulSoup

# URL for the testset B page
testset_url = "https://steinlib.zib.de/showset.php?B"

# Fetch the webpage
response = requests.get(testset_url)

# Check if the request was successful
if response.status_code == 200:
    # Parse the HTML content with BeautifulSoup
    soup = BeautifulSoup(response.text, "html.parser")

    # Find all table rows (skip the header row)
    table_rows = soup.find_all("tr")[1:]

    # Extract the "Opt" value (the last value in each row)
    opt_values = []
    for row in table_rows:
        # Find all table data cells (td)
        cols = row.find_all("td")
        if len(cols) > 5:  # Ensure the row has enough columns
            # Extract the value from the last column
            opt_value = cols[-1].text.strip().replace('\xa0', '')  # Clean up any non-breaking spaces
            opt_values.append(opt_value)
    
    # Print the "Opt" values
    for i, opt in enumerate(opt_values):
        print(f"Opt value for instance {i + 1}: {opt}")
else:
    print(f"Failed to fetch the page, status code: {response.status_code}")


Opt value for instance 1: 82
Opt value for instance 2: 83
Opt value for instance 3: 138
Opt value for instance 4: 59
Opt value for instance 5: 61
Opt value for instance 6: 122
Opt value for instance 7: 111
Opt value for instance 8: 104
Opt value for instance 9: 220
Opt value for instance 10: 86
Opt value for instance 11: 88
Opt value for instance 12: 174
Opt value for instance 13: 165
Opt value for instance 14: 235
Opt value for instance 15: 318
Opt value for instance 16: 127
Opt value for instance 17: 131
Opt value for instance 18: 218


In [42]:
import os
import requests
from bs4 import BeautifulSoup
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

def process_and_plot(testset, results_folder="./results/GraphInstances", base_url="https://steinlib.zib.de/showset.php?", output_folder="./plots"):
    """
    Process .results files for a given testset, fetch "Opt" values from the website,
    and generate plots including the "Opt" values.

    Parameters:
        testset (str): The testset identifier (e.g., 'B', 'C').
        results_folder (str): Path to the folder containing the .results files.
        base_url (str): Base URL for fetching "Opt" values from the website.
        output_folder (str): Path to save the generated plots.
    """
    # Prepare URLs and paths
    testset_url = base_url + testset
    folder_path = os.path.join(results_folder, testset)
    output_folder = os.path.join(output_folder, testset)

    # Create the output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)

    # Fetch the webpage to get "Opt" values
    response = requests.get(testset_url)
    opt_values = {}
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, "html.parser")
        table_rows = soup.find_all("tr")[1:]  # Skip header row
        for row in table_rows:
            cols = row.find_all("td")
            if len(cols) > 5:
                instance_name = cols[0].text.strip()
                opt_value = cols[-1].text.strip().replace('\xa0', '')  # Clean up non-breaking spaces
                opt_values[instance_name] = float(opt_value) if opt_value.isdigit() else None
    else:
        print(f"Failed to fetch 'Opt' values for testset {testset}, status code: {response.status_code}")
        return

    # Read all .results files in the folder
    data_frames = []
    for filename in os.listdir(folder_path):
        if filename.endswith('.results'):
            file_path = os.path.join(folder_path, filename)
            df = pd.read_csv(file_path)
            instance_name = filename.split('.')[0]  # Extract instance name (e.g., "b01" from "b01.results")
            if instance_name in opt_values:
                df["Opt"] = opt_values[instance_name]  # Add the "Opt" value to the DataFrame

            # Convert all time columns from microseconds to seconds
            time_columns = [col for col in df.columns if "Time" in col or "duration" in col]
            for col in time_columns:
                df[col] = df[col] / 1_000_000  # Convert microseconds to seconds
            data_frames.append(df)

    # Combine all DataFrames
    if not data_frames:
        print(f"No .results files found in {folder_path}.")
        return
    combined_df = pd.concat(data_frames, ignore_index=True)

    # Function to create and save plots
    def plot_data(df, x_col, y_cols, title, x_label, y_label, output_file, opt_col=None, log_scale=False):
        plt.figure(figsize=(12, 6))
        for y in y_cols:
            sns.lineplot(data=df, x=x_col, y=y, label=y, errorbar=None)
        if opt_col and opt_col in df.columns:
            sns.lineplot(data=df, x=x_col, y=opt_col, label="Opt", linestyle="--", color="red", errorbar=None)
        plt.title(title)
        plt.xlabel(x_label)
        plt.ylabel(y_label)
        plt.legend()
        if log_scale:
            plt.yscale('log')  # Apply log scale to the y-axis
        plt.savefig(output_file)
        plt.close()

    # Generate plots
    # Plot Costs by Number of Terminals
    plot_data(
        combined_df,
        x_col="NumberOfTerminals",
        y_cols=["TakahashiMatsuyamaCost", "KouMarkowskyBermanCost"],
        title=f"Costs vs. Number of Terminals ({testset})",
        x_label="Number of Terminals",
        y_label="Cost",
        output_file=os.path.join(output_folder, "terminals_cost.png"),
        opt_col="Opt"
    )

    # Plot Times by Number of Terminals
    plot_data(
        combined_df,
        x_col="NumberOfTerminals",
        y_cols=["TakahashiMatsuyamaTime", "KouMarkowskyBermanTime"],
        title=f"Times vs. Number of Terminals ({testset})",
        x_label="Number of Terminals",
        y_label="Time (seconds)",
        output_file=os.path.join(output_folder, "terminals_time.png")
    )

    # Plot Costs by Number Of Nodes
    plot_data(
        combined_df,
        x_col="NumberOfNodes",
        y_cols=["TakahashiMatsuyamaCost", "KouMarkowskyBermanCost"],
        title=f"Costs vs. Number of Nodes ({testset})",
        x_label="Number of Nodes",
        y_label="Cost",
        output_file=os.path.join(output_folder, "nodes_cost.png"),
        opt_col="Opt"
    )

    # Plot Times by Number Of Nodes
    plot_data(
        combined_df,
        x_col="NumberOfNodes",
        y_cols=["TakahashiMatsuyamaTime", "KouMarkowskyBermanTime"],
        title=f"Times vs. Number of Nodes ({testset})",
        x_label="Number of Nodes",
        y_label="Time (seconds)",
        output_file=os.path.join(output_folder, "nodes_time.png")
    )

    # Plot Costs by Number Of Edges
    plot_data(
        combined_df,
        x_col="NumberOfEdges",
        y_cols=["TakahashiMatsuyamaCost", "KouMarkowskyBermanCost"],
        title=f"Costs vs. Number of Nodes ({testset})",
        x_label="Number of Edges",
        y_label="Cost",
        output_file=os.path.join(output_folder, "edges_cost.png"),
        opt_col="Opt"
    )

    # Plot Times by Number Of Edges
    plot_data(
        combined_df,
        x_col="NumberOfEdges",
        y_cols=["TakahashiMatsuyamaTime", "KouMarkowskyBermanTime"],
        title=f"Times vs. Number of Nodes ({testset})",
        x_label="Number of Edges",
        y_label="Time (seconds)",
        output_file=os.path.join(output_folder, "edges_time.png")
    )

    # Additional plots for TMduration* and KMBduration*
    tm_duration_columns = [col for col in combined_df.columns if col.startswith("TMduration")]
    kmb_duration_columns = [col for col in combined_df.columns if col.startswith("KMBduration")]
    
    # Inner Times by Number Of Terminals
    plot_data(
        combined_df,
        x_col="NumberOfTerminals",
        y_cols=tm_duration_columns,
        title=f"TM Durations vs. Number of Terminals ({testset})",
        x_label="Number of Terminals",
        y_label="Time (seconds)",
        output_file=os.path.join(output_folder, "terminals_tm_durations.png")
    )

    plot_data(
        combined_df,
        x_col="NumberOfTerminals",
        y_cols=kmb_duration_columns,
        title=f"KMB Durations vs. Number of Terminals ({testset})",
        x_label="Number of Terminals",
        y_label="Time (seconds)",
        output_file=os.path.join(output_folder, "terminals_kmb_durations.png"),
    )
    
    plot_data(
        combined_df,
        x_col="NumberOfTerminals",
        y_cols=kmb_duration_columns,
        title=f"KMB Durations vs. Number of Terminals ({testset})",
        x_label="Number of Terminals",
        y_label="Time (seconds)",
        output_file=os.path.join(output_folder, "terminals_kmb_durations_log_scale.png"),
        log_scale=True
    )

    # Inner Times by Number Of Nodes
    plot_data(
        combined_df,
        x_col="NumberOfNodes",
        y_cols=tm_duration_columns,
        title=f"TM Durations vs. Number of Nodes ({testset})",
        x_label="Number of Nodes",
        y_label="Time (seconds)",
        output_file=os.path.join(output_folder, "nodes_tm_durations.png")
    )
    
    plot_data(
        combined_df,
        x_col="NumberOfNodes",
        y_cols=kmb_duration_columns,
        title=f"KMB Durations vs. Number of Nodes ({testset})",
        x_label="Number of Nodes",
        y_label="Time (seconds)",
        output_file=os.path.join(output_folder, "nodes_kmb_durations.png"),
    )
    
    plot_data(
        combined_df,
        x_col="NumberOfNodes",
        y_cols=kmb_duration_columns,
        title=f"KMB Durations vs. Number of Nodes ({testset})",
        x_label="Number of Nodes",
        y_label="Time (seconds)",
        output_file=os.path.join(output_folder, "nodes_kmb_durations_log_scale.png"),
        log_scale=True
    )

    # Inner Times by Number Of Edges
    plot_data(
        combined_df,
        x_col="NumberOfEdges",
        y_cols=tm_duration_columns,
        title=f"TM Durations vs. Number of Nodes ({testset})",
        x_label="Number of Edges",
        y_label="Time (seconds)",
        output_file=os.path.join(output_folder, "edges_tm_durations.png")
    )
    
    plot_data(
        combined_df,
        x_col="NumberOfEdges",
        y_cols=kmb_duration_columns,
        title=f"KMB Durations vs. Number of Nodes ({testset})",
        x_label="Number of Edges",
        y_label="Time (seconds)",
        output_file=os.path.join(output_folder, "edges_kmb_durations.png"),
    )
    
    plot_data(
        combined_df,
        x_col="NumberOfEdges",
        y_cols=kmb_duration_columns,
        title=f"KMB Durations vs. Number of Nodes ({testset})",
        x_label="Number of Edges",
        y_label="Time (seconds)",
        output_file=os.path.join(output_folder, "edges_kmb_durations_log_scale.png"),
        log_scale=True
    )


    print(f"All plots for testset {testset} have been generated in {output_folder}.")

In [43]:
# Example usage
process_and_plot("B")

All plots for testset B have been generated in ./plots/B.


In [44]:
process_and_plot("C")

All plots for testset C have been generated in ./plots/C.


In [36]:
combined_df

Unnamed: 0,NumberOfTerminals,NumberOfNodes,NumberOfEdges,DreyfusWagnerCost,DreyfusWagnerTime,TakahashiMatsuyamaCost,TakahashiMatsuyamaTime,TMdurationNotNecessary,TMdurationInit,TMdurationPrepareForNextIteration,TMdurationMainLoop,KouMarkowskyBermanCost,KouMarkowskyBermanTime,KMBdurationNotNecessary,KMBdurationStep1,KMBdurationStep2,KMBdurationStep3,KMBdurationStep4,KMBdurationStep5,KMBTime_Without_Step1
0,9,50,100,0,0,86,2692,431,8,606,1385,62,4337,66,3749,69,187,81,83,588
1,13,50,63,0,0,89,1886,333,7,418,923,90,3121,77,2448,89,224,88,99,673
2,25,100,125,0,0,236,6882,597,5,1554,3864,237,18642,141,16662,270,946,167,178,1980
3,38,75,94,0,0,220,9374,495,6,2700,4994,226,22441,161,18400,494,2453,188,197,4041
4,13,50,100,0,0,62,3507,423,5,925,1861,62,7393,76,6674,105,247,101,102,719
5,19,75,94,0,0,114,3486,434,6,875,1747,104,8509,94,7246,220,550,117,113,1263
6,50,100,125,0,0,336,16645,730,15,4706,9343,325,41461,209,33380,858,5396,263,293,8081
7,13,75,150,0,0,110,5322,651,10,1286,2914,98,13091,83,12290,111,253,108,130,801
8,38,75,150,0,0,188,12812,697,8,4871,6109,174,52101,157,47889,731,2367,183,199,4212
9,25,100,200,0,0,133,11701,847,5,3903,5979,134,37358,133,35253,273,984,177,187,2105
