In [3]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

In [4]:
benchmark = pd.read_csv('15-benchmark.csv')
benchmark.insert(1, 'Path Present', benchmark['Path Length'] != 0, True)

benchmark['Duration (µs)'] /= 1e6
benchmark.rename(columns={'Duration (µs)': 'Duration (s)'}, inplace=True)

benchmark.replace({
    'Algorithm': {
        'BreadthFirstSearch': 'BFS',
        'DepthFirstSearch': 'DFS',
        'AStar': 'A*',
        'BidiBestFirstSearch': 'Bidi A*',
    }
}, inplace=True)

benchmark.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 8 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Algorithm            1000 non-null   object 
 1   Path Present         1000 non-null   bool   
 2   Path Length          1000 non-null   int64  
 3   Path Weight          1000 non-null   float64
 4   Duration (s)         1000 non-null   float64
 5   Visited Vertices     1000 non-null   int64  
 6   Average Degree       1000 non-null   float64
 7   Average Path Degree  1000 non-null   float64
dtypes: bool(1), float64(4), int64(2), object(1)
memory usage: 55.8+ KB


In [5]:
benchmark_path = benchmark[benchmark['Path Present']]
benchmark_path.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 8 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Algorithm            1000 non-null   object 
 1   Path Present         1000 non-null   bool   
 2   Path Length          1000 non-null   int64  
 3   Path Weight          1000 non-null   float64
 4   Duration (s)         1000 non-null   float64
 5   Visited Vertices     1000 non-null   int64  
 6   Average Degree       1000 non-null   float64
 7   Average Path Degree  1000 non-null   float64
dtypes: bool(1), float64(4), int64(2), object(1)
memory usage: 55.8+ KB


In [6]:
benchmark_nopath = benchmark[benchmark['Path Present'] == False]
benchmark_nopath.info()

<class 'pandas.core.frame.DataFrame'>
Index: 0 entries
Data columns (total 8 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Algorithm            0 non-null      object 
 1   Path Present         0 non-null      bool   
 2   Path Length          0 non-null      int64  
 3   Path Weight          0 non-null      float64
 4   Duration (s)         0 non-null      float64
 5   Visited Vertices     0 non-null      int64  
 6   Average Degree       0 non-null      float64
 7   Average Path Degree  0 non-null      float64
dtypes: bool(1), float64(4), int64(2), object(1)
memory usage: 0.0+ bytes


# Duration of algorithms where a path is present vs not present

In [None]:
sns.barplot(benchmark, x='Algorithm', y='Duration (s)', hue='Path Present', hue_order=[False, True],
            palette=['red', 'green'])

# Visit counts of algorithms where a path is present vs not present

In [None]:
sns.barplot(benchmark, x='Algorithm', y='Visited Vertices', hue='Path Present', hue_order=[False, True],
            palette=['red', 'green'])

# Relationship between algorithm and path length

In [None]:
sns.barplot(benchmark, x='Algorithm', y='Path Length')

# Relationship between visit count and duration

In [None]:
sns.scatterplot(benchmark, x='Visited Vertices', y='Duration (s)', hue='Algorithm')

# Relationship between path length and visit count

In [None]:
sns.lineplot(benchmark_path, x='Path Length', y='Visited Vertices', hue='Algorithm', errorbar=('ci', 95))

# Relationship between path length and duration

In [None]:
sns.lineplot(benchmark_path, x='Path Length', y='Duration (s)', hue='Algorithm', errorbar=('ci', 95))

# Relationship between average degree and visit count

In [None]:
sns.scatterplot(benchmark, x='Average Path Degree', y='Visited Vertices', hue='Algorithm')

# Relationship between path length and average path degree

In [None]:
sns.lineplot(benchmark, x='Path Length', y='Average Path Degree')

# Distribution of durations for each algorithm

In [None]:
sns.histplot(benchmark[benchmark['Duration (s)'] < 1500], x='Duration (s)', hue='Algorithm')

# Distribution of visited vertices for each algorithm

In [None]:
sns.histplot(benchmark, x='Visited Vertices', hue='Algorithm')

# Correlation Heatmap

In [None]:
sns.heatmap(benchmark.corr(method='pearson', numeric_only=True), annot=True)

In [11]:
benchmark.corr(method='spearman', numeric_only=True)

Unnamed: 0,Path Present,Path Length,Path Weight,Duration (s),Visited Vertices,Average Degree,Average Path Degree
Path Present,,,,,,,
Path Length,,1.0,1.0,0.80965,0.823911,,0.087173
Path Weight,,1.0,1.0,0.80965,0.823911,,0.087173
Duration (s),,0.80965,0.80965,1.0,0.996146,,0.068337
Visited Vertices,,0.823911,0.823911,0.996146,1.0,,0.062163
Average Degree,,,,,,,
Average Path Degree,,0.087173,0.087173,0.068337,0.062163,,1.0
