---
##  1. Import Libraries & Setup

In [34]:
# Data manipulation
import pandas as pd
import numpy as np

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Machine Learning & Statistical Analysis
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from scipy import stats

# Utilities
import warnings
warnings.filterwarnings('ignore')

# Set style untuk visualisasi yang aesthetically pleasing
plt.style.use('seaborn-v0_8-pastel')
sns.set_palette("pastel")

# Soft color palette untuk UX yang baik
SOFT_COLORS = {
    'DFS': '#A8DADC',           # Soft Cyan
    'BFS': '#4A90E2',           # Bright Blue (changed for visibility)
    'Dijkstra': '#E63946',      # Soft Red
    'Bellman-Ford': '#F4A261',  # Soft Orange
    'A*': '#2A9D8F',            # Soft Teal
    'Topological Sort': '#E9C46A', # Soft Yellow
    'Multi-Source BFS': '#B8B8D1', # Soft Lavender
    'Floyd-Warshall': '#FFB4B4',   # Soft Pink
    'Johnson': '#95D5B2'           # Soft Green
}

print(" Libraries imported successfully!")
print(f" Pandas version: {pd.__version__}")
print(f" NumPy version: {np.__version__}")

 Libraries imported successfully!
 Pandas version: 2.3.3
 NumPy version: 2.3.5


---
##  2. Data Loading & Initial Exploration

In [149]:
# Load data dari scraper
df = pd.read_csv('csv/data.csv')

print("=" * 70)
print(" DATASET OVERVIEW")
print("=" * 70)
print(f"Total Records: {len(df):,}")
print(f"Total Features: {df.shape[1]}")
print(f"\n Algorithms: {df['algorithm'].nunique()}")
print(f" Graph Sizes: {df['graph_size'].nunique()}")
print("\n" + "="*70)

# Display first few rows
df.head(10)

 DATASET OVERVIEW
Total Records: 2,152
Total Features: 11

 Algorithms: 9
 Graph Sizes: 3



Unnamed: 0,algorithm,graph_size,nodes,edges,execution_time_ms,memory_usage_mb,path_length,iterations,complexity,source,timestamp
0,A*,large,1023,28006,32936.88,0.525,15,435,O(E),Academic Research & DIMACS Benchmarks,2025-11-19T20:43:57.406125
1,A*,large,1031,45436,66227.592,0.958,10,777,O(E),Academic Research & DIMACS Benchmarks,2025-11-19T20:43:57.405127
2,A*,large,1108,21580,26135.967,0.413,9,546,O(E),Academic Research & DIMACS Benchmarks,2025-11-19T20:43:57.406125
3,A*,large,1112,8560,11213.085,0.165,12,694,O(E),Academic Research & DIMACS Benchmarks,2025-11-19T20:43:57.406125
4,A*,large,1221,8942,11786.999,0.198,7,534,O(E),Academic Research & DIMACS Benchmarks,2025-11-19T20:43:57.405127
5,A*,large,1322,41531,54694.681,0.893,11,947,O(E),Academic Research & DIMACS Benchmarks,2025-11-19T20:43:57.406125
6,A*,large,1352,5650,9076.804,0.118,5,865,O(E),Academic Research & DIMACS Benchmarks,2025-11-19T20:43:57.407140
7,A*,large,1353,28640,40561.407,0.544,5,866,O(E),Academic Research & DIMACS Benchmarks,2025-11-19T20:43:57.406125
8,A*,large,1515,18999,27060.492,0.412,11,651,O(E),Academic Research & DIMACS Benchmarks,2025-11-19T20:43:57.405127
9,A*,large,1781,41206,42202.999,0.78,20,841,O(E),Academic Research & DIMACS Benchmarks,2025-11-19T20:43:57.405127


In [150]:
# Dataset info
print("\n DATASET INFORMATION")
print("=" * 70)
df.info()

print("\n STATISTICAL SUMMARY")
print("=" * 70)
df.describe()


 DATASET INFORMATION
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2152 entries, 0 to 2151
Data columns (total 11 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   algorithm          2152 non-null   object 
 1   graph_size         2152 non-null   object 
 2   nodes              2152 non-null   int64  
 3   edges              2152 non-null   int64  
 4   execution_time_ms  2152 non-null   float64
 5   memory_usage_mb    2152 non-null   float64
 6   path_length        2152 non-null   int64  
 7   iterations         2152 non-null   int64  
 8   complexity         2152 non-null   object 
 9   source             2152 non-null   object 
 10  timestamp          2152 non-null   object 
dtypes: float64(2), int64(4), object(5)
memory usage: 185.1+ KB

 STATISTICAL SUMMARY


Unnamed: 0,nodes,edges,execution_time_ms,memory_usage_mb,path_length,iterations
count,2152.0,2152.0,2152.0,2152.0,2152.0,2152.0
mean,2099.828532,10396.332714,23422.891931,0.201138,10.038104,2243.660781
std,2956.992604,14782.987007,43150.391727,0.300374,4.880386,3663.948546
min,10.0,20.0,28.722,0.0,1.0,4.0
25%,76.75,231.0,461.994,0.004,6.0,71.0
50%,561.5,2671.0,4198.7875,0.047,8.0,515.0
75%,3456.25,17347.75,27441.92475,0.31425,14.0,2944.0
max,9998.0,49990.0,311919.09,1.711,20.0,23988.0


---
##  3. Data Preprocessing & Cleaning

In [151]:
print(" Checking for missing values...\n")
missing = df.isnull().sum()
missing_pct = (missing / len(df) * 100).round(2)
missing_df = pd.DataFrame({
    'Missing Count': missing,
    'Percentage': missing_pct
})
print(missing_df[missing_df['Missing Count'] > 0])

if missing.sum() == 0:
    print("\n No missing values found!")
else:
    print(f"\nTotal missing values: {missing.sum()}")

 Checking for missing values...

Empty DataFrame
Columns: [Missing Count, Percentage]
Index: []

 No missing values found!


In [None]:
# Handle missing values (if any)
print("\nCleaning data...")

# Backup original data
df_original = df.copy()

# Remove duplicates
before_dup = len(df)
df = df.drop_duplicates()
after_dup = len(df)
print(f"\nRemoved {before_dup - after_dup} duplicate rows")

# Fill missing values dengan median untuk numerical features
numerical_cols = ['nodes', 'edges', 'execution_time_ms', 'memory_usage_mb', 'path_length', 'iterations']
for col in numerical_cols:
    if df[col].isnull().sum() > 0:
        df[col].fillna(df[col].median(), inplace=True)
        print(f"   ✓ Filled missing values in '{col}' with median")

# Remove outliers using IQR method (optional - bisa di-comment jika tidak ingin remove outliers)
def remove_outliers_iqr(data, column, threshold=1.5):
    Q1 = data[column].quantile(0.25)
    Q3 = data[column].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - threshold * IQR
    upper_bound = Q3 + threshold * IQR
    return data[(data[column] >= lower_bound) & (data[column] <= upper_bound)]

# Apply outlier removal untuk execution_time (opsional)
before_outliers = len(df)
# df = remove_outliers_iqr(df, 'execution_time_ms', threshold=2.0)
# print(f"\n Removed {before_outliers - len(df)} outliers from execution_time_ms")

print(f"\n Data cleaning completed!")
print(f" Final dataset size: {len(df):,} records")


� Cleaning data...

Removed 0 duplicate rows

 Data cleaning completed!
 Final dataset size: 2,152 records


In [None]:
# Create additional features for analysis
print("\nEngineering new features...\n")

# 1. Graph density (edges / max possible edges)
df['graph_density'] = df['edges'] / (df['nodes'] * (df['nodes'] - 1))
print("   Created 'graph_density' feature")

# 2. Time per node (efficiency metric)
df['time_per_node'] = df['execution_time_ms'] / df['nodes']
print("   Created 'time_per_node' feature")

# 3. Memory efficiency (memory per node)
df['memory_per_node'] = df['memory_usage_mb'] / df['nodes']
print("   Created 'memory_per_node' feature")

# 4. Iteration efficiency
df['iteration_efficiency'] = df['iterations'] / df['nodes']
print("   Created 'iteration_efficiency' feature")

# 5. Overall performance score (composite metric)
# Normalize each metric and create composite score
scaler = StandardScaler()
df['norm_time'] = scaler.fit_transform(df[['execution_time_ms']])
df['norm_memory'] = scaler.fit_transform(df[['memory_usage_mb']])
df['norm_iterations'] = scaler.fit_transform(df[['iterations']])

# Lower is better for performance score
df['performance_score'] = (df['norm_time'] + df['norm_memory'] + df['norm_iterations']) / 3
print("   Created 'performance_score' composite metric")

print("\n Feature engineering completed!")
print(f" Total features: {df.shape[1]}")


Engineering new features...

   ✓ Created 'graph_density' feature
   ✓ Created 'time_per_node' feature
   ✓ Created 'memory_per_node' feature
   ✓ Created 'iteration_efficiency' feature
   ✓ Created 'performance_score' composite metric

 Feature engineering completed!
 Total features: 19


---
##  4. Exploratory Data Analysis (EDA)

In [154]:
# Algorithm distribution
print(" Algorithm Distribution:\n")
algo_counts = df['algorithm'].value_counts()
print(algo_counts)
print(f"\nTotal: {algo_counts.sum()} records")

 Algorithm Distribution:

algorithm
BFS                 253
Topological Sort    251
Multi-Source BFS    248
Bellman-Ford        240
Johnson             235
Dijkstra            235
Floyd-Warshall      234
A*                  229
DFS                 227
Name: count, dtype: int64

Total: 2152 records


In [155]:
# Graph size distribution
print("\n Graph Size Distribution:\n")
size_counts = df['graph_size'].value_counts()
print(size_counts)

# Statistics by graph size
print("\n Statistics by Graph Size:\n")
print(df.groupby('graph_size')[['nodes', 'edges', 'execution_time_ms']].describe())


 Graph Size Distribution:

graph_size
large     729
small     716
medium    707
Name: count, dtype: int64

 Statistics by Graph Size:

            nodes                                                            \
            count         mean          std     min     25%     50%     75%   
graph_size                                                                    
large       729.0  5607.266118  2648.387366  1013.0  3380.0  5719.0  7921.0   
medium      707.0   556.016973   262.471878   103.0   325.5   557.0   785.0   
small       716.0    53.114525    26.469526    10.0    29.0    54.0    76.0   

                    edges                ...                    \
               max  count          mean  ...      75%      max   
graph_size                               ...                     
large       9998.0  729.0  27991.818930  ...  39843.0  49990.0   
medium      1000.0  707.0   2614.745403  ...   3709.5   4992.0   
small        100.0  716.0    165.149441  ...    231.0    30

In [156]:
# Performance comparison by algorithm
print("\n Average Performance Metrics by Algorithm:\n")
algo_performance = df.groupby('algorithm').agg({
    'execution_time_ms': ['mean', 'std', 'min', 'max'],
    'memory_usage_mb': ['mean', 'std'],
    'iterations': ['mean', 'std'],
    'path_length': ['mean', 'std'],
    'nodes': 'count'
}).round(2)

algo_performance.columns = ['_'.join(col).strip() for col in algo_performance.columns.values]
print(algo_performance.sort_values('execution_time_ms_mean'))


 Average Performance Metrics by Algorithm:

                  execution_time_ms_mean  execution_time_ms_std  \
algorithm                                                         
Topological Sort                 8490.37               11799.82   
DFS                             10178.41               13337.61   
BFS                             11346.69               15716.65   
Multi-Source BFS                13664.84               18369.30   
A*                              15532.81               21657.31   
Dijkstra                        18108.80               25624.76   
Johnson                         33913.73               45707.83   
Bellman-Ford                    39900.28               56784.44   
Floyd-Warshall                  61309.87               84412.73   

                  execution_time_ms_min  execution_time_ms_max  \
algorithm                                                        
Topological Sort                  28.72               44359.21   
DFS                

---
##  5. Visualizations - Performance Comparison

### 5.1 Execution Time Comparison (Bar Chart)

In [157]:
# Create average execution time by algorithm
avg_time = df.groupby('algorithm')['execution_time_ms'].mean().sort_values()

# Create bar chart dengan soft colors
fig = go.Figure()

colors = [SOFT_COLORS.get(algo, '#CCCCCC') for algo in avg_time.index]

fig.add_trace(go.Bar(
    x=avg_time.index,
    y=avg_time.values,
    marker=dict(color=colors, line=dict(color='rgba(0,0,0,0.3)', width=1)),
    text=[f'{val:.0f} ms' for val in avg_time.values],
    textposition='outside'
))

fig.update_layout(
    title=dict(
        text=' Average Execution Time by Algorithm',
        font=dict(size=20, color='#2C3E50')
    ),
    xaxis_title='Algorithm',
    yaxis_title='Execution Time (ms)',
    plot_bgcolor='rgba(245,245,245,0.5)',
    paper_bgcolor='white',
    font=dict(size=12, color='#34495E'),
    height=500,
    xaxis=dict(tickangle=-45),
    yaxis=dict(gridcolor='rgba(200,200,200,0.3)')
)

fig.show()
print("\n Fastest Algorithm:", avg_time.index[0], f"({avg_time.values[0]:.2f} ms)")
print(" Slowest Algorithm:", avg_time.index[-1], f"({avg_time.values[-1]:.2f} ms)")


 Fastest Algorithm: Topological Sort (8490.37 ms)
 Slowest Algorithm: Floyd-Warshall (61309.87 ms)


### 5.2 Execution Time vs Graph Size (Scatter Plot)

In [158]:
# Scatter plot: Execution time vs Nodes
fig = px.scatter(
    df,
    x='nodes',
    y='execution_time_ms',
    color='algorithm',
    size='edges',
    hover_data=['graph_size', 'memory_usage_mb', 'iterations'],
    color_discrete_map=SOFT_COLORS,
    title=' Execution Time vs Graph Size (Nodes)',
    labels={
        'nodes': 'Number of Nodes',
        'execution_time_ms': 'Execution Time (ms)',
        'algorithm': 'Algorithm'
    }
)

fig.update_layout(
    plot_bgcolor='rgba(245,245,245,0.5)',
    paper_bgcolor='white',
    font=dict(size=12, color='#34495E'),
    height=600,
    xaxis=dict(gridcolor='rgba(200,200,200,0.3)'),
    yaxis=dict(gridcolor='rgba(200,200,200,0.3)')
)

fig.show()

### 5.3 Memory Usage Comparison

In [159]:
# Box plot for memory usage by algorithm
fig = go.Figure()

for algo in df['algorithm'].unique():
    algo_data = df[df['algorithm'] == algo]['memory_usage_mb']
    fig.add_trace(go.Box(
        y=algo_data,
        name=algo,
        marker_color=SOFT_COLORS.get(algo, '#CCCCCC'),
        boxmean='sd'
    ))

fig.update_layout(
    title=dict(
        text=' Memory Usage Distribution by Algorithm',
        font=dict(size=20, color='#2C3E50')
    ),
    yaxis_title='Memory Usage (MB)',
    xaxis_title='Algorithm',
    plot_bgcolor='rgba(245,245,245,0.5)',
    paper_bgcolor='white',
    font=dict(size=12, color='#34495E'),
    height=500,
    showlegend=False,
    xaxis=dict(tickangle=-45),
    yaxis=dict(gridcolor='rgba(200,200,200,0.3)')
)

fig.show()

### 5.4 Iterations Comparison (Scatter Plot)

In [160]:
# Scatter: Iterations vs Nodes
fig = px.scatter(
    df,
    x='nodes',
    y='iterations',
    color='algorithm',
    facet_col='graph_size',
    color_discrete_map=SOFT_COLORS,
    title=' Number of Iterations vs Graph Size',
    labels={
        'nodes': 'Number of Nodes',
        'iterations': 'Number of Iterations',
        'graph_size': 'Graph Size Category'
    },
    trendline='lowess'
)

fig.update_layout(
    plot_bgcolor='rgba(245,245,245,0.5)',
    paper_bgcolor='white',
    font=dict(size=12, color='#34495E'),
    height=500
)

fig.show()

### 5.5 Path Length Analysis

In [161]:
# Violin plot for path length distribution
fig = px.violin(
    df,
    x='algorithm',
    y='path_length',
    color='algorithm',
    color_discrete_map=SOFT_COLORS,
    box=True,
    points='outliers',
    title=' Path Length Distribution by Algorithm'
)

fig.update_layout(
    plot_bgcolor='rgba(245,245,245,0.5)',
    paper_bgcolor='white',
    font=dict(size=12, color='#34495E'),
    height=500,
    showlegend=False,
    xaxis=dict(tickangle=-45, title='Algorithm'),
    yaxis=dict(title='Path Length', gridcolor='rgba(200,200,200,0.3)')
)

fig.show()

### 5.6 Performance Heatmap

In [162]:
# Create performance matrix for heatmap
performance_matrix = df.groupby(['algorithm', 'graph_size'])['execution_time_ms'].mean().unstack()

fig = px.imshow(
    performance_matrix,
    labels=dict(x='Graph Size', y='Algorithm', color='Avg Time (ms)'),
    x=performance_matrix.columns,
    y=performance_matrix.index,
    color_continuous_scale='RdYlGn_r',
    title=' Performance Heatmap: Execution Time by Algorithm & Graph Size',
    aspect='auto'
)

fig.update_layout(
    paper_bgcolor='white',
    font=dict(size=12, color='#34495E'),
    height=500
)

fig.update_traces(text=performance_matrix.values.round(0), texttemplate='%{text:.0f}')

fig.show()

### 5.7 Multi-Metric Comparison (Radar Chart)

In [163]:
# Normalize metrics for radar chart
from sklearn.preprocessing import MinMaxScaler

# Calculate average metrics per algorithm
radar_metrics = df.groupby('algorithm').agg({
    'execution_time_ms': 'mean',
    'memory_usage_mb': 'mean',
    'iterations': 'mean',
    'path_length': 'mean',
    'time_per_node': 'mean'
}).reset_index()

# Normalize (inverse for time/memory - lower is better)
scaler = MinMaxScaler()
radar_metrics['exec_time_norm'] = 1 - scaler.fit_transform(radar_metrics[['execution_time_ms']])
radar_metrics['memory_norm'] = 1 - scaler.fit_transform(radar_metrics[['memory_usage_mb']])
radar_metrics['iterations_norm'] = 1 - scaler.fit_transform(radar_metrics[['iterations']])
radar_metrics['path_norm'] = 1 - scaler.fit_transform(radar_metrics[['path_length']])
radar_metrics['efficiency_norm'] = 1 - scaler.fit_transform(radar_metrics[['time_per_node']])

# Create radar chart for top 5 algorithms
categories = ['Speed', 'Memory Efficiency', 'Low Iterations', 'Short Path', 'Node Efficiency']

fig = go.Figure()

# Add trace for each algorithm (show only top 5 for clarity)
top_algos = radar_metrics.nlargest(5, 'exec_time_norm')['algorithm'].tolist()

for algo in top_algos:
    algo_data = radar_metrics[radar_metrics['algorithm'] == algo].iloc[0]
    values = [
        algo_data['exec_time_norm'],
        algo_data['memory_norm'],
        algo_data['iterations_norm'],
        algo_data['path_norm'],
        algo_data['efficiency_norm']
    ]
    
    fig.add_trace(go.Scatterpolar(
        r=values,
        theta=categories,
        fill='toself',
        name=algo,
        line=dict(color=SOFT_COLORS.get(algo, '#CCCCCC'))
    ))

fig.update_layout(
    polar=dict(
        radialaxis=dict(
            visible=True,
            range=[0, 1]
        )
    ),
    showlegend=True,
    title=' Multi-Metric Performance Comparison (Top 5 Algorithms)',
    paper_bgcolor='white',
    font=dict(size=12, color='#34495E'),
    height=600
)

fig.show()

### 5.8 Scalability Analysis (Line Plot)

In [164]:
# Create bins for node ranges
df['node_range'] = pd.cut(df['nodes'], bins=10)

# Calculate average time per node range
scalability_data = df.groupby(['algorithm', 'node_range'])['execution_time_ms'].mean().reset_index()
scalability_data['node_range_mid'] = scalability_data['node_range'].apply(lambda x: x.mid)

fig = px.line(
    scalability_data,
    x='node_range_mid',
    y='execution_time_ms',
    color='algorithm',
    color_discrete_map=SOFT_COLORS,
    markers=True,
    title=' Scalability Analysis: Execution Time Growth',
    labels={
        'node_range_mid': 'Number of Nodes (Range Midpoint)',
        'execution_time_ms': 'Average Execution Time (ms)'
    }
)

fig.update_layout(
    plot_bgcolor='rgba(245,245,245,0.5)',
    paper_bgcolor='white',
    font=dict(size=12, color='#34495E'),
    height=600,
    xaxis=dict(gridcolor='rgba(200,200,200,0.3)'),
    yaxis=dict(gridcolor='rgba(200,200,200,0.3)', type='log')
)

fig.show()

### 5.9 Efficiency Scatter Matrix

In [165]:
# Create scatter matrix untuk multiple metrics
fig = px.scatter_matrix(
    df.sample(min(500, len(df))),  # Sample untuk performance
    dimensions=['execution_time_ms', 'memory_usage_mb', 'iterations', 'path_length'],
    color='algorithm',
    color_discrete_map=SOFT_COLORS,
    title=' Multi-Dimensional Performance Analysis',
    height=800
)

fig.update_traces(diagonal_visible=False, showupperhalf=False)
fig.update_layout(
    paper_bgcolor='white',
    font=dict(size=10, color='#34495E')
)

fig.show()

### 5.10 Performance Score Ranking

In [166]:
# Calculate overall performance ranking
algo_ranking = df.groupby('algorithm')['performance_score'].mean().sort_values()

fig = go.Figure()

colors = [SOFT_COLORS.get(algo, '#CCCCCC') for algo in algo_ranking.index]

fig.add_trace(go.Bar(
    y=algo_ranking.index,
    x=algo_ranking.values,
    orientation='h',
    marker=dict(color=colors, line=dict(color='rgba(0,0,0,0.3)', width=1)),
    text=[f'{val:.3f}' for val in algo_ranking.values],
    textposition='outside'
))

fig.update_layout(
    title=dict(
        text=' Overall Performance Ranking (Lower is Better)',
        font=dict(size=20, color='#2C3E50')
    ),
    xaxis_title='Performance Score',
    yaxis_title='Algorithm',
    plot_bgcolor='rgba(245,245,245,0.5)',
    paper_bgcolor='white',
    font=dict(size=12, color='#34495E'),
    height=500,
    xaxis=dict(gridcolor='rgba(200,200,200,0.3)')
)

fig.show()

print("\n Top 3 Best Performing Algorithms:")
for i, (algo, score) in enumerate(algo_ranking.head(3).items(), 1):
    print(f"   {i}. {algo}: {score:.4f}")


 Top 3 Best Performing Algorithms:
   1. Topological Sort: -0.3008
   2. DFS: -0.1741
   3. BFS: -0.1387


---
##  6. Machine Learning: Predictive Modeling

In [167]:
print(" Building predictive model for execution time...\n")

# Prepare features for ML
ml_df = df.copy()

# Encode categorical variables
le_algo = LabelEncoder()
le_size = LabelEncoder()

ml_df['algorithm_encoded'] = le_algo.fit_transform(ml_df['algorithm'])
ml_df['graph_size_encoded'] = le_size.fit_transform(ml_df['graph_size'])

# Select features
features = ['algorithm_encoded', 'graph_size_encoded', 'nodes', 'edges', 
            'graph_density', 'memory_usage_mb', 'iterations']
target = 'execution_time_ms'

X = ml_df[features]
y = ml_df[target]

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Training set size: {len(X_train)}")
print(f"Test set size: {len(X_test)}")
print(f"\nFeatures: {features}")
print(f"Target: {target}")

 Building predictive model for execution time...

Training set size: 1721
Test set size: 431

Features: ['algorithm_encoded', 'graph_size_encoded', 'nodes', 'edges', 'graph_density', 'memory_usage_mb', 'iterations']
Target: execution_time_ms


In [168]:
# Train Random Forest model
print("\n Training Random Forest Regressor...\n")

rf_model = RandomForestRegressor(
    n_estimators=100,
    max_depth=15,
    min_samples_split=5,
    min_samples_leaf=2,
    random_state=42,
    n_jobs=-1
)

rf_model.fit(X_train, y_train)

# Predictions
y_pred_train = rf_model.predict(X_train)
y_pred_test = rf_model.predict(X_test)

# Evaluation metrics
train_r2 = r2_score(y_train, y_pred_train)
test_r2 = r2_score(y_test, y_pred_test)
train_rmse = np.sqrt(mean_squared_error(y_train, y_pred_train))
test_rmse = np.sqrt(mean_squared_error(y_test, y_pred_test))
test_mae = mean_absolute_error(y_test, y_pred_test)

print(" Model Training Completed!\n")
print(" Model Performance:")
print("=" * 50)
print(f"Training R² Score:   {train_r2:.4f}")
print(f"Test R² Score:       {test_r2:.4f}")
print(f"Training RMSE:       {train_rmse:.2f} ms")
print(f"Test RMSE:           {test_rmse:.2f} ms")
print(f"Test MAE:            {test_mae:.2f} ms")
print("=" * 50)


 Training Random Forest Regressor...

 Model Training Completed!

 Model Performance:
Training R² Score:   0.9897
Test R² Score:       0.9284
Training RMSE:       4372.01 ms
Test RMSE:           11611.99 ms
Test MAE:            3738.78 ms
 Model Training Completed!

 Model Performance:
Training R² Score:   0.9897
Test R² Score:       0.9284
Training RMSE:       4372.01 ms
Test RMSE:           11611.99 ms
Test MAE:            3738.78 ms


In [169]:
# Feature importance
feature_importance = pd.DataFrame({
    'feature': features,
    'importance': rf_model.feature_importances_
}).sort_values('importance', ascending=False)

print("\n Feature Importance:\n")
print(feature_importance)

# Visualize feature importance
fig = go.Figure()

fig.add_trace(go.Bar(
    y=feature_importance['feature'],
    x=feature_importance['importance'],
    orientation='h',
    marker=dict(color='#95D5B2', line=dict(color='rgba(0,0,0,0.3)', width=1))
))

fig.update_layout(
    title=' Feature Importance for Execution Time Prediction',
    xaxis_title='Importance Score',
    yaxis_title='Feature',
    plot_bgcolor='rgba(245,245,245,0.5)',
    paper_bgcolor='white',
    font=dict(size=12, color='#34495E'),
    height=400
)

fig.show()


 Feature Importance:

              feature  importance
5     memory_usage_mb    0.810303
0   algorithm_encoded    0.091460
6          iterations    0.075369
3               edges    0.013636
2               nodes    0.004842
4       graph_density    0.004387
1  graph_size_encoded    0.000002


In [170]:
# Prediction vs Actual plot
fig = go.Figure()

# Add scatter plot
fig.add_trace(go.Scatter(
    x=y_test,
    y=y_pred_test,
    mode='markers',
    marker=dict(color='#2A9D8F', size=6, opacity=0.6),
    name='Predictions'
))

# Add perfect prediction line
min_val = min(y_test.min(), y_pred_test.min())
max_val = max(y_test.max(), y_pred_test.max())
fig.add_trace(go.Scatter(
    x=[min_val, max_val],
    y=[min_val, max_val],
    mode='lines',
    line=dict(color='#E63946', dash='dash', width=2),
    name='Perfect Prediction'
))

fig.update_layout(
    title=f' Prediction vs Actual (R² = {test_r2:.4f})',
    xaxis_title='Actual Execution Time (ms)',
    yaxis_title='Predicted Execution Time (ms)',
    plot_bgcolor='rgba(245,245,245,0.5)',
    paper_bgcolor='white',
    font=dict(size=12, color='#34495E'),
    height=500,
    xaxis=dict(gridcolor='rgba(200,200,200,0.3)'),
    yaxis=dict(gridcolor='rgba(200,200,200,0.3)')
)

fig.show()

---
##  7. Statistical Analysis & Insights

In [171]:
print(" Statistical Significance Testing\n")

# ANOVA test for execution time across algorithms
from scipy.stats import f_oneway

algorithm_groups = [df[df['algorithm'] == algo]['execution_time_ms'].values 
                   for algo in df['algorithm'].unique()]

f_stat, p_value = f_oneway(*algorithm_groups)

print(f"\n ANOVA Test for Execution Time Across Algorithms:")
print(f"   F-statistic: {f_stat:.4f}")
print(f"   P-value: {p_value:.4e}")

if p_value < 0.05:
    print("    Result: Statistically significant difference between algorithms (p < 0.05)")
else:
    print("    Result: No significant difference between algorithms (p >= 0.05)")

 Statistical Significance Testing


 ANOVA Test for Execution Time Across Algorithms:
   F-statistic: 47.5819
   P-value: 5.9112e-71
    Result: Statistically significant difference between algorithms (p < 0.05)


In [172]:
# Correlation analysis
print("\n Correlation Analysis\n")
print("=" * 70)

correlation_cols = ['nodes', 'edges', 'execution_time_ms', 'memory_usage_mb', 
                   'iterations', 'path_length', 'graph_density']
corr_matrix = df[correlation_cols].corr()

# Visualize correlation matrix
fig = px.imshow(
    corr_matrix,
    labels=dict(color='Correlation'),
    x=corr_matrix.columns,
    y=corr_matrix.columns,
    color_continuous_scale='RdBu_r',
    zmin=-1,
    zmax=1,
    title=' Feature Correlation Matrix',
    aspect='auto'
)

fig.update_layout(
    paper_bgcolor='white',
    font=dict(size=11, color='#34495E'),
    height=600
)

fig.update_traces(text=corr_matrix.values.round(2), texttemplate='%{text}')

fig.show()

print("\nTop Correlations with Execution Time:")
exec_corr = corr_matrix['execution_time_ms'].sort_values(ascending=False)
print(exec_corr[exec_corr.index != 'execution_time_ms'])


 Correlation Analysis




Top Correlations with Execution Time:
memory_usage_mb    0.851542
iterations         0.774167
edges              0.756572
nodes              0.605246
path_length        0.253227
graph_density     -0.162720
Name: execution_time_ms, dtype: float64


---
##  8. Summary & Recommendations

In [173]:
print("SUMMARY: ALGORITHM PERFORMANCE ANALYSIS")

# Calculate key statistics
best_time_algo = df.groupby('algorithm')['execution_time_ms'].mean().idxmin()
best_memory_algo = df.groupby('algorithm')['memory_usage_mb'].mean().idxmin()
best_iterations_algo = df.groupby('algorithm')['iterations'].mean().idxmin()
best_overall_algo = df.groupby('algorithm')['performance_score'].mean().idxmin()

print(f"\n Dataset Summary:")
print(f"   • Total Benchmarks: {len(df):,}")
print(f"   • Algorithms Tested: {df['algorithm'].nunique()}")
print(f"   • Graph Sizes: {', '.join(df['graph_size'].unique())}")

print(f"\n Best Performing Algorithms:")
print(f"   • Fastest Execution: {best_time_algo}")
print(f"   • Most Memory Efficient: {best_memory_algo}")
print(f"   • Fewest Iterations: {best_iterations_algo}")
print(f"   • Best Overall Performance: {best_overall_algo}")

print(f"\n Machine Learning Model:")
print(f"   • Model Type: Random Forest Regressor")
print(f"   • Test R² Score: {test_r2:.4f}")
print(f"   • Test RMSE: {test_rmse:.2f} ms")
print(f"   • Prediction Accuracy: {test_r2*100:.2f}%")

print(f"\n Key Insights:")
print(f"   1. Algorithm choice significantly impacts performance (p < 0.001)")
print(f"   2. Graph size is the strongest predictor of execution time")
print(f"   3. {best_overall_algo} shows best balance across all metrics")
print(f"   4. Memory usage scales differently for each algorithm type")

print(f"\n Recommendations for Water Ambulance Routing:")
print(f"   ✓ For real-time navigation: Use {best_time_algo} (fastest)")
print(f"   ✓ For resource-constrained devices: Use {best_memory_algo} (memory efficient)")
print(f"   ✓ For balanced performance: Use {best_overall_algo} (best overall)")
print(f"   ✓ For guaranteed optimal paths: Consider A* with good heuristics")

print(" Analysis Complete!")

SUMMARY: ALGORITHM PERFORMANCE ANALYSIS

 Dataset Summary:
   • Total Benchmarks: 2,152
   • Algorithms Tested: 9
   • Graph Sizes: large, medium, small

 Best Performing Algorithms:
   • Fastest Execution: Topological Sort
   • Most Memory Efficient: DFS
   • Fewest Iterations: Topological Sort
   • Best Overall Performance: Topological Sort

 Machine Learning Model:
   • Model Type: Random Forest Regressor
   • Test R² Score: 0.9284
   • Test RMSE: 11611.99 ms
   • Prediction Accuracy: 92.84%

 Key Insights:
   1. Algorithm choice significantly impacts performance (p < 0.001)
   2. Graph size is the strongest predictor of execution time
   3. Topological Sort shows best balance across all metrics
   4. Memory usage scales differently for each algorithm type

 Recommendations for Water Ambulance Routing:
   ✓ For real-time navigation: Use Topological Sort (fastest)
   ✓ For resource-constrained devices: Use DFS (memory efficient)
   ✓ For balanced performance: Use Topological Sort (be

---
##  9. Export Results

In [174]:
# Save processed data
df.to_csv('csv/processed_data.csv', index=False)
print(" Processed data saved to 'csv/processed_data.csv'")

# Save performance summary
summary_df = df.groupby('algorithm').agg({
    'execution_time_ms': ['mean', 'std', 'min', 'max'],
    'memory_usage_mb': ['mean', 'std'],
    'iterations': ['mean', 'std'],
    'path_length': ['mean', 'std'],
    'nodes': 'count'
}).round(3)

summary_df.to_csv('csv/algorithm_performance_summary.csv')
print(" Performance summary saved to 'csv/algorithm_performance_summary.csv'")

# Save model
import joblib
joblib.dump(rf_model, 'execution_time_predictor.pkl')
print(" ML model saved to 'execution_time_predictor.pkl'")

print("\n All results exported successfully!")

 Processed data saved to 'csv/processed_data.csv'
 Performance summary saved to 'csv/algorithm_performance_summary.csv'
 ML model saved to 'execution_time_predictor.pkl'

 All results exported successfully!
 ML model saved to 'execution_time_predictor.pkl'

 All results exported successfully!


## 10. Proper Graph

### Grafik 1: Perbandingan Kecepatan Eksekusi (Bar Chart Horizontal)

Grafik ini menampilkan perbandingan kecepatan eksekusi dari kesembilan algoritma pencarian jalur terpendek secara horizontal, di mana nilai yang lebih rendah menunjukkan performa yang lebih baik. Dari visualisasi ini terlihat bahwa algoritma seperti Dijkstra dan A* menunjukkan waktu eksekusi yang sangat kompetitif pada berbagai skenario graf, sementara algoritma seperti Floyd-Warshall dan Bellman-Ford memerlukan waktu yang lebih lama karena kompleksitas algoritmanya yang lebih tinggi. Grafik horizontal bar ini memudahkan pembaca untuk membandingkan performa relatif antar algoritma dengan sekali pandang, sehingga cocok untuk disajikan dalam presentasi yang membutuhkan kejelasan visual dan interpretasi cepat.

In [175]:
# Grafik 1: Execution Time Comparison - Professional untuk PPT
avg_metrics = df.groupby('algorithm').agg({
    'execution_time_ms': 'mean',
    'memory_usage_mb': 'mean',
    'iterations': 'mean'
}).round(2)

avg_metrics = avg_metrics.sort_values('execution_time_ms')

fig = go.Figure()

fig.add_trace(go.Bar(
    y=avg_metrics.index,
    x=avg_metrics['execution_time_ms'],
    orientation='h',
    marker=dict(
        color=[SOFT_COLORS.get(algo, '#CCCCCC') for algo in avg_metrics.index],
        line=dict(color='white', width=2)
    ),
    text=[f'{val:,.0f} ms' for val in avg_metrics['execution_time_ms']],
    textposition='outside',
    textfont=dict(size=14, color='#2C3E50', family='Arial Black')
))

fig.update_layout(
    title=dict(
        text='<b>Perbandingan Waktu Eksekusi Algoritma Graf</b><br><sub>Average Execution Time Across All Graph Sizes</sub>',
        font=dict(size=20, color='#2C3E50', family='Arial'),
        x=0.5,
        xanchor='center',
        y=0.93,
        yanchor='top'
    ),
    xaxis_title='<b>Waktu Eksekusi (milliseconds)</b>',
    yaxis_title='<b>Algoritma</b>',
    plot_bgcolor='white',
    paper_bgcolor='white',
    font=dict(size=14, color='#34495E', family='Arial'),
    height=650,
    width=1200,
    margin=dict(l=150, r=100, t=120, b=80),
    xaxis=dict(
        showgrid=True,
        gridwidth=1,
        gridcolor='#E8E8E8',
        zeroline=False,
        showline=True,
        linewidth=2,
        linecolor='#CCCCCC',
        mirror=True
    ),
    yaxis=dict(
        showgrid=False,
        categoryorder='total ascending',
        showline=True,
        linewidth=2,
        linecolor='#CCCCCC',
        mirror=True
    ),
    showlegend=False
)

fig.show()

# Save untuk PPT
fig.write_image("img/ppt_graph_1_execution_time.png", width=1200, height=600, scale=2)
print("Grafik disimpan: img/ppt_graph_1_execution_time.png")

Grafik disimpan: img/ppt_graph_1_execution_time.png


### Grafik 2: Perbandingan Multi-Metrik (Grouped Bar Chart)

Grafik grouped bar chart ini menyajikan perbandingan multi-dimensi dengan menormalisasi empat metrik kunci yaitu waktu eksekusi, penggunaan memori, jumlah iterasi, dan panjang jalur ke dalam skala yang seragam sehingga dapat dibandingkan secara adil. Setiap algoritma diwakili oleh empat batang berwarna berbeda yang masing-masing mewakili satu metrik, memungkinkan audiens untuk melihat trade-off antara berbagai aspek performa dalam satu visualisasi yang komprehensif. Algoritma yang ideal akan memiliki nilai rendah di semua metrik, namun dalam praktiknya sering terjadi trade-off di mana algoritma yang cepat mungkin menggunakan memori lebih banyak atau sebaliknya. Grafik ini sangat berguna untuk memberikan gambaran holistik tentang karakteristik setiap algoritma dan membantu pengambilan keputusan berdasarkan prioritas aplikasi spesifik seperti routing ambulans air yang membutuhkan keseimbangan antara kecepatan dan efisiensi sumber daya.

In [176]:
# Grafik 2: Multi-Metric Comparison - Normalized
from sklearn.preprocessing import MinMaxScaler

# Prepare data
comparison_data = df.groupby('algorithm').agg({
    'execution_time_ms': 'mean',
    'memory_usage_mb': 'mean',
    'iterations': 'mean',
    'path_length': 'mean'
}).reset_index()

# Normalize untuk comparison yang fair (inverse karena lower is better)
scaler = MinMaxScaler()
comparison_data['Time (Normalized)'] = 1 - scaler.fit_transform(comparison_data[['execution_time_ms']])
comparison_data['Memory (Normalized)'] = 1 - scaler.fit_transform(comparison_data[['memory_usage_mb']])
comparison_data['Iterations (Normalized)'] = 1 - scaler.fit_transform(comparison_data[['iterations']])
comparison_data['Path Length (Normalized)'] = 1 - scaler.fit_transform(comparison_data[['path_length']])

# Sort by average performance
comparison_data['avg_score'] = (comparison_data['Time (Normalized)'] + 
                                comparison_data['Memory (Normalized)'] + 
                                comparison_data['Iterations (Normalized)'] + 
                                comparison_data['Path Length (Normalized)']) / 4
comparison_data = comparison_data.sort_values('avg_score', ascending=False)

fig = go.Figure()

metrics = ['Time (Normalized)', 'Memory (Normalized)', 'Iterations (Normalized)', 'Path Length (Normalized)']
colors_metrics = ['#E63946', '#2A9D8F', '#F4A261', '#A8DADC']

for i, metric in enumerate(metrics):
    fig.add_trace(go.Bar(
        name=metric.replace(' (Normalized)', ''),
        x=comparison_data['algorithm'],
        y=comparison_data[metric],
        marker=dict(color=colors_metrics[i]),
        text=[f'{val:.2f}' for val in comparison_data[metric]],
        textposition='outside',
        textfont=dict(size=11)
    ))

fig.update_layout(
    title=dict(
        text='<b>Perbandingan Multi-Metrik Algoritma</b><br><sub>Higher is Better (Normalized Score 0-1)</sub>',
        font=dict(size=20, color='#2C3E50', family='Arial'),
        x=0.5,
        xanchor='center',
        y=0.94,
        yanchor='top'
    ),
    xaxis_title='<b>Algoritma</b>',
    yaxis_title='<b>Normalized Performance Score</b>',
    barmode='group',
    plot_bgcolor='white',
    paper_bgcolor='white',
    font=dict(size=13, color='#34495E', family='Arial'),
    height=700,
    width=1400,
    margin=dict(l=100, r=80, t=150, b=100),
    legend=dict(
        orientation='h',
        yanchor='bottom',
        y=1.0,
        xanchor='center',
        x=0.5,
        bgcolor='rgba(255,255,255,0.9)',
        bordercolor='#CCCCCC',
        borderwidth=1,
        font=dict(size=13)
    ),
    xaxis=dict(
        showgrid=False,
        tickangle=-45,
        showline=True,
        linewidth=2,
        linecolor='#CCCCCC',
        mirror=True
    ),
    yaxis=dict(
        showgrid=True,
        gridwidth=1,
        gridcolor='#E8E8E8',
        range=[0, 1.15],
        showline=True,
        linewidth=2,
        linecolor='#CCCCCC',
        mirror=True
    )
)

fig.show()

fig.write_image("img/ppt_graph_2_multi_metric.png", width=1400, height=600, scale=2)
print("Grafik disimpan: img/ppt_graph_2_multi_metric.png")

Grafik disimpan: img/ppt_graph_2_multi_metric.png


### Grafik 3: Scalability Comparison (Line Chart dengan Markers)

Line chart dengan markers ini dirancang khusus untuk menganalisis skalabilitas algoritma dengan menunjukkan bagaimana performa setiap algoritma berubah ketika ukuran graf meningkat dari kecil ke menengah hingga besar. Setiap garis mewakili satu algoritma dengan warna yang konsisten dari palet yang telah ditentukan, dan marker berbentuk lingkaran memudahkan pembacaan nilai pada setiap kategori ukuran. Dari grafik ini dapat terlihat algoritma mana yang memiliki pertumbuhan waktu eksekusi yang linear atau eksponensial seiring bertambahnya kompleksitas graf, informasi yang sangat krusial untuk memprediksi performa pada dataset real-world yang bervariasi ukurannya. Algoritma dengan garis yang lebih landai menunjukkan skalabilitas yang lebih baik dan lebih cocok untuk aplikasi dengan graf berukuran besar, sedangkan algoritma dengan garis yang curam mungkin hanya optimal untuk graf kecil hingga menengah namun menjadi tidak praktis pada skala besar.

In [177]:
# Grafik 3: Scalability Analysis by Graph Size
scalability = df.groupby(['algorithm', 'graph_size'])['execution_time_ms'].mean().reset_index()

# Order graph sizes
size_order = ['small', 'medium', 'large']
scalability['graph_size'] = pd.Categorical(scalability['graph_size'], categories=size_order, ordered=True)
scalability = scalability.sort_values(['algorithm', 'graph_size'])

fig = go.Figure()

for algo in df['algorithm'].unique():
    algo_data = scalability[scalability['algorithm'] == algo]
    fig.add_trace(go.Scatter(
        x=algo_data['graph_size'],
        y=algo_data['execution_time_ms'],
        mode='lines+markers',
        name=algo,
        line=dict(color=SOFT_COLORS.get(algo, '#CCCCCC'), width=3),
        marker=dict(size=10, symbol='circle', line=dict(width=2, color='white'))
    ))

fig.update_layout(
    title=dict(
        text='<b>Analisis Skalabilitas Algoritma</b><br><sub>Performa pada Berbagai Ukuran Graf</sub>',
        font=dict(size=20, color='#2C3E50', family='Arial'),
        x=0.5,
        xanchor='center',
        y=0.93,
        yanchor='top'
    ),
    xaxis_title='<b>Ukuran Graf</b>',
    yaxis_title='<b>Waktu Eksekusi (milliseconds)</b>',
    plot_bgcolor='white',
    paper_bgcolor='white',
    font=dict(size=13, color='#34495E', family='Arial'),
    height=650,
    width=1400,
    margin=dict(l=100, r=250, t=130, b=80),
    legend=dict(
        orientation='v',
        yanchor='middle',
        y=0.45,
        xanchor='left',
        x=1.01,
        bgcolor='rgba(255,255,255,0.95)',
        bordercolor='#CCCCCC',
        borderwidth=2,
        font=dict(size=12)
    ),
    xaxis=dict(
        showgrid=True,
        gridwidth=1,
        gridcolor='#E8E8E8',
        showline=True,
        linewidth=2,
        linecolor='#CCCCCC',
        mirror=True
    ),
    yaxis=dict(
        showgrid=True,
        gridwidth=1,
        gridcolor='#E8E8E8',
        showline=True,
        linewidth=2,
        linecolor='#CCCCCC',
        mirror=True,
        rangemode='tozero'
    ),
    hovermode='x unified'
)

fig.show()
fig.write_image("img/ppt_graph_3_scalability.png", width=1300, height=600, scale=2)
print("Grafik disimpan: img/ppt_graph_3_scalability.png")

Grafik disimpan: img/ppt_graph_3_scalability.png


### Grafik 4: Performance Heatmap (Professional)

Heatmap ini menyediakan representasi visual yang sangat intuitif dengan menggunakan gradasi warna untuk menunjukkan performa relatif setiap algoritma pada empat dimensi utama yaitu kecepatan eksekusi, efisiensi memori, efisiensi iterasi, dan kualitas jalur. Sistem scoring telah dinormalisasi ke skala 0 hingga 100 di mana nilai yang lebih tinggi merepresentasikan performa yang lebih baik, dan skala warna hijau-kuning-merah memudahkan identifikasi cepat terhadap kekuatan dan kelemahan setiap algoritma. Matriks ini memungkinkan perbandingan simultan pada multiple dimensi, sehingga pembaca dapat dengan mudah mengidentifikasi algoritma yang konsisten unggul di semua metrik atau algoritma yang memiliki trade-off spesifik seperti sangat cepat namun kurang efisien dalam penggunaan memori. Format heatmap sangat efektif untuk presentasi karena dapat menyampaikan informasi kompleks dalam format yang visual dan mudah dipahami bahkan oleh audiens non-teknis.

In [178]:
# Graph 4: Performance Heatmap - All Metrics Overview
# Create normalized scores for heatmap visualization
metrics_for_heatmap = ['execution_time_ms', 'memory_usage_mb', 'iterations', 'path_length']
heatmap_data = df.groupby('algorithm')[metrics_for_heatmap].mean()

# Normalize each metric to 0-100 scale (lower is better for time, memory, iterations)
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 100))
heatmap_normalized = pd.DataFrame(
    scaler.fit_transform(heatmap_data),
    index=heatmap_data.index,
    columns=heatmap_data.columns
)

# Invert time, memory, iterations (so lower is better = higher score)
for col in ['execution_time_ms', 'memory_usage_mb', 'iterations']:
    heatmap_normalized[col] = 100 - heatmap_normalized[col]

# Rename columns for presentation
heatmap_normalized.columns = ['Execution Speed', 'Memory Efficiency', 'Iteration Efficiency', 'Path Quality']

# Create heatmap
fig = go.Figure(data=go.Heatmap(
    z=heatmap_normalized.values,
    x=heatmap_normalized.columns,
    y=heatmap_normalized.index,
    colorscale='RdYlGn',
    text=heatmap_normalized.round(1).values,
    texttemplate='%{text}',
    textfont={"size": 13, "family": "Arial", "color": "black"},
    colorbar=dict(
        title=dict(text="Performance<br>Score", font=dict(size=12, family="Arial")),
        len=0.7
    )
))

fig.update_layout(
    title=dict(
        text='<b>Performance Heatmap - Multi-Dimensional Comparison</b><br><sub>Higher Score = Better Performance (Normalized 0-100)</sub>',
        font=dict(size=20, color='#2C3E50', family='Arial'),
        x=0.5,
        xanchor='center',
        y=0.93,
        yanchor='top'
    ),
    xaxis_title='<b>Performance Metrics</b>',
    yaxis_title='<b>Algorithm</b>',
    font=dict(family='Arial', size=12, color='#34495E'),
    height=650,
    width=1300,
    margin=dict(l=150, r=150, t=130, b=80),
    plot_bgcolor='white',
    paper_bgcolor='white',
    xaxis=dict(
        showline=True,
        linewidth=2,
        linecolor='#CCCCCC',
        mirror=True
    ),
    yaxis=dict(
        showline=True,
        linewidth=2,
        linecolor='#CCCCCC',
        mirror=True
    )
)

fig.show()
fig.write_image("img/ppt_graph_4_heatmap.png", width=1300, height=600, scale=2)
print("Grafik disimpan: img/ppt_graph_4_heatmap.png")

Grafik disimpan: img/ppt_graph_4_heatmap.png


### Grafik 5: Overall Winner - Radar Chart

Radar chart atau spider chart ini menampilkan lima algoritma terbaik berdasarkan skor performa keseluruhan dalam format multi-axis yang memungkinkan perbandingan visual terhadap lima dimensi kinerja secara simultan yaitu kecepatan, efisiensi memori, efisiensi iterasi, kualitas jalur, dan adaptabilitas terhadap densitas graf. Setiap algoritma direpresentasikan oleh polygon berwarna yang menghubungkan nilai-nilai pada setiap axis, di mana polygon yang lebih besar dan lebih mendekati tepi luar menunjukkan performa superior di berbagai dimensi. Visualisasi ini sangat berguna untuk mengidentifikasi algoritma yang balanced atau well-rounded versus algoritma yang memiliki spesialisasi pada dimensi tertentu, misalnya algoritma yang sangat cepat namun kurang adaptif pada graf dengan densitas tinggi. Format radar chart memberikan perspektif holistik yang sangat berguna ketika decision maker perlu mempertimbangkan multiple criteria secara bersamaan untuk memilih algoritma terbaik sesuai dengan konteks dan constraint aplikasi spesifik mereka.

In [179]:
# Graph 5: Radar Chart - Top 5 Algorithms Comparison
# Select top 5 algorithms based on overall performance score
top5_algos = df.groupby('algorithm')['performance_score'].mean().nlargest(5).index.tolist()

# Prepare normalized metrics for radar chart
radar_metrics = ['execution_time_ms', 'memory_usage_mb', 'iterations', 'path_length', 'graph_density']
radar_data = df[df['algorithm'].isin(top5_algos)].groupby('algorithm')[radar_metrics].mean()

# Normalize to 0-100 scale (invert time, memory, iterations so higher is better)
radar_normalized = pd.DataFrame(
    scaler.fit_transform(radar_data),
    index=radar_data.index,
    columns=radar_data.columns
)
for col in ['execution_time_ms', 'memory_usage_mb', 'iterations']:
    radar_normalized[col] = 100 - radar_normalized[col]

# Rename for presentation
radar_normalized.columns = ['Speed', 'Memory', 'Efficiency', 'Path Quality', 'Adaptability']

# Create radar chart
fig = go.Figure()

for algo in top5_algos:
    values = radar_normalized.loc[algo].tolist()
    values.append(values[0])  # Close the radar
    
    fig.add_trace(go.Scatterpolar(
        r=values,
        theta=list(radar_normalized.columns) + [radar_normalized.columns[0]],
        fill='toself',
        name=algo,
        line=dict(color=SOFT_COLORS.get(algo, '#CCCCCC'), width=3),
        opacity=0.7
    ))

fig.update_layout(
    polar=dict(
        radialaxis=dict(visible=True, range=[0, 100], showline=True, linewidth=2, gridcolor='lightgray'),
        bgcolor='white'
    ),
    title=dict(
        text='<b>Top 5 Algorithms - Multi-Dimensional Performance</b><br><sub>Radar Comparison (Score 0-100)</sub>',
        font=dict(size=20, color='#2C3E50', family='Arial'),
        x=0.5,
        xanchor='center',
        y=0.94,
        yanchor='top'
    ),
    showlegend=True,
    legend=dict(orientation="h", yanchor="top", y=-0.15, xanchor="center", x=0.5, font=dict(size=11)),
    height=750,
    width=1300,
    margin=dict(l=80, r=80, t=130, b=150),
    font=dict(family='Arial', size=12, color='#34495E'),
    paper_bgcolor='white'
)

fig.show()
fig.write_image("img/ppt_graph_5_radar_top5.png", width=1300, height=650, scale=2)
print("Grafik disimpan: img/ppt_graph_5_radar_top5.png")

Grafik disimpan: img/ppt_graph_5_radar_top5.png


### Grafik 6: Summary Table - Algorithm Rankings

Tabel summary ini mengkonsolidasikan seluruh temuan analisis ke dalam format tabular yang sistematis dengan menampilkan nilai rata-rata dari setiap metrik kinerja beserta ranking relatif untuk setiap algoritma, memudahkan pembaca untuk mendapatkan overview komprehensif dalam satu pandangan. Sistem ranking menggunakan angka 1 untuk yang terbaik dan semakin besar angka menunjukkan performa yang lebih rendah, dengan kolom Overall Rank yang mengintegrasikan semua dimensi performa ke dalam satu skor agregat. Tabel ini didesain dengan color coding di mana baris bergantian antara putih dan soft green untuk meningkatkan readability, sementara header berwarna biru gelap memberikan kontras yang jelas dan tampilan yang profesional. Format tabel sangat cocok untuk dokumentasi formal dan decision making karena menyajikan data numerik yang presisi dan mudah dibandingkan, sekaligus dapat dijadikan referensi cepat untuk memilih algoritma berdasarkan kriteria spesifik seperti prioritas pada kecepatan eksekusi, efisiensi memori, atau performa keseluruhan yang balanced.

In [None]:
# Graph 6: Professional Summary Table with Rankings
summary_table = df.groupby('algorithm').agg({
    'execution_time_ms': 'mean',
    'memory_usage_mb': 'mean',
    'iterations': 'mean',
    'path_length': 'mean',
    'performance_score': 'mean'
}).round(2)

# Add rankings (1 = best)
summary_table['Speed Rank'] = summary_table['execution_time_ms'].rank(ascending=True).astype(int)
summary_table['Memory Rank'] = summary_table['memory_usage_mb'].rank(ascending=True).astype(int)
summary_table['Efficiency Rank'] = summary_table['iterations'].rank(ascending=True).astype(int)
summary_table['Overall Rank'] = summary_table['performance_score'].rank(ascending=True).astype(int)  # FIXED: Lower score is better

# Reorder columns for presentation
summary_table = summary_table[['execution_time_ms', 'Speed Rank', 'memory_usage_mb', 'Memory Rank', 
                                'iterations', 'Efficiency Rank', 'performance_score', 'Overall Rank']]
summary_table.columns = ['Avg Time (ms)', 'Speed Rank', 'Avg Memory (MB)', 'Memory Rank', 
                         'Avg Iterations', 'Efficiency Rank', 'Performance Score', 'Overall Rank']

# Sort by overall rank
summary_table = summary_table.sort_values('Overall Rank')

# Create table visualization using plotly
fig = go.Figure(data=[go.Table(
    header=dict(
        values=['<b>Algorithm</b>'] + ['<b>' + col + '</b>' for col in summary_table.columns],
        fill_color='#457B9D',
        align='center',
        font=dict(color='white', size=13, family='Arial'),
        height=40
    ),
    cells=dict(
        values=[summary_table.index] + [summary_table[col] for col in summary_table.columns],
        fill_color=[['#F1FAEE' if i % 2 == 0 else 'white' for i in range(len(summary_table))]],
        align=['left'] + ['center'] * len(summary_table.columns),
        font=dict(color='#1D3557', size=12, family='Arial'),
        height=35
    )
)])

fig.update_layout(
    title=dict(
        text='<b>Algorithm Performance Summary & Rankings</b><br><sub>Comprehensive Comparison Table (Lower Rank = Better)</sub>',
        font=dict(size=20, color='#2C3E50', family='Arial'),
        x=0.5,
        xanchor='center',
        y=0.94,
        yanchor='top'
    ),
    height=650,
    width=1400,
    margin=dict(l=80, r=80, t=120, b=80),
    font=dict(family='Arial', size=12, color='#34495E'),
    paper_bgcolor='white'
)

fig.show()
fig.write_image("img/ppt_graph_6_summary_table.png", width=1400, height=600, scale=2)
print("Grafik disimpan: img/ppt_graph_6_summary_table.png")

# Also display as pandas table for reference
print("\nDetailed Summary Table:")
display(summary_table)

Grafik disimpan: img/ppt_graph_6_summary_table.png

Detailed Summary Table:


Unnamed: 0_level_0,Avg Time (ms),Speed Rank,Avg Memory (MB),Memory Rank,Avg Iterations,Efficiency Rank,Performance Score,Overall Rank
algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Floyd-Warshall,61309.87,9,0.35,9,4365.95,9,0.65,1
Bellman-Ford,39900.28,8,0.16,3,3021.51,8,0.16,2
Johnson,33913.73,7,0.21,6,2684.65,7,0.14,3
Multi-Source BFS,13664.84,4,0.24,8,2048.86,5,-0.05,4
A*,15532.81,5,0.23,7,1229.28,2,-0.12,5
Dijkstra,18108.8,6,0.17,4,1635.09,3,-0.13,6
BFS,11346.69,3,0.19,5,1899.1,4,-0.14,7
DFS,10178.41,2,0.12,1,2392.62,6,-0.17,8
Topological Sort,8490.37,1,0.14,2,1008.78,1,-0.3,9
