In [None]:
import os
import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Path to the results folder
results_folder = './results'

# List to store model results
models_data = []

# Reading all JSON files from the folder
for filename in os.listdir(results_folder):
    if filename.endswith('.json'):
        filepath = os.path.join(results_folder, filename)
        with open(filepath, 'r') as file:
            data = json.load(file)
            # Extracting flat information from JSON
            flattened_data = {
                'model_name': data['model_name'],
                'accuracy': data['accuracy'],
                'weighted_avg_f1_score': data['classification_report']['weighted avg']['f1-score'],
                'total_training_time': pd.to_timedelta(data['total_training_time']).total_seconds()
            }
            models_data.append(flattened_data)

# Creating a DataFrame from the data
comparison_df = pd.DataFrame(models_data)

# Converting training_time to seconds for comparison
comparison_df['total_training_time'] = pd.to_timedelta(comparison_df['total_training_time']).dt.total_seconds()

# Displaying the comparison
print(comparison_df)

In [None]:
# Creating a DataFrame from the data
comparison_df = pd.DataFrame(models_data)

# Displaying the comparison
print(comparison_df)

# Selecting the model with the highest accuracy
best_model_accuracy = comparison_df.loc[comparison_df['accuracy'].idxmax()]
print(f'\nBest model based on accuracy: \n{best_model_accuracy}')

# Selecting the model with the highest weighted average F1-score
best_model_f1 = comparison_df.loc[comparison_df['weighted_avg_f1_score'].idxmax()]
print(f'\nBest model based on weighted average F1-score:\n{best_model_f1}')

In [None]:
min_accuracy = comparison_df['accuracy'].min() - 0.001
max_accuracy = comparison_df['accuracy'].max() + 0.001
min_f1 = comparison_df['weighted_avg_f1_score'].min() - 0.001
max_f1 = comparison_df['weighted_avg_f1_score'].max() + 0.001
min_time = comparison_df['total_training_time'].min() - 0.1
max_time = comparison_df['total_training_time'].max() + 0.1

In [None]:
# Creating bar plots
plt.figure(figsize=(14, 7))

# Bar plot for accuracy
sns.barplot(data=comparison_df, x='accuracy', y='model_name', palette='viridis', hue='model_name', legend=False)
plt.title('Model Accuracy Comparison')
plt.xlabel('Accuracy')
plt.ylabel('Model Name')
plt.xlim(min_accuracy, max_accuracy)

# Adding values on bars
for index, value in enumerate(comparison_df['accuracy']):
    plt.text(value, index, f'{value:.4f}')

plt.show()

# Bar plot for weighted average F1-score
plt.figure(figsize=(14, 7))
sns.barplot(data=comparison_df, x='weighted_avg_f1_score', y='model_name', palette='viridis', hue='model_name', legend=False)
plt.title('Model Weighted Average F1-Score Comparison')
plt.xlabel('Weighted Avg F1-Score')
plt.ylabel('Model Name')
plt.xlim(min_f1, max_f1)

# Adding values on bars
for index, value in enumerate(comparison_df['weighted_avg_f1_score']):
    plt.text(value, index, f'{value:.4f}')

plt.show()

In [None]:
# Bar plot for training time
plt.figure(figsize=(14, 7))
sns.barplot(data=comparison_df, x='total_training_time', y='model_name', hue='model_name', legend=False)
plt.title('Model Training Time Comparison')
plt.xlabel('Training Time (s)')
plt.ylabel('Model Name')
plt.xlim(min_time - 0.9, max_time + 0.9)
# Adding values on bars
for index, value in enumerate(comparison_df['total_training_time']):
    plt.text(value, index, f'{value:.2f}s')

plt.show()

In [None]:
plt.figure(figsize=(14, 7))

# Scatter plot for accuracy
plt.figure(figsize=(14, 7))
sns.scatterplot(data=comparison_df, x='accuracy', y='model_name', hue='model_name', palette='viridis', s=100, legend=False)
plt.title('Model Accuracy Comparison (Scatter Plot)')
plt.xlabel('Accuracy')
plt.ylabel('Model Name')
plt.xlim(min_accuracy, max_accuracy)

plt.show()

# Scatter plot for weighted average F1-score
plt.figure(figsize=(14, 7))
sns.scatterplot(data=comparison_df, x='weighted_avg_f1_score', y='model_name', hue='model_name', palette='viridis', s=100, legend=False)
plt.title('Model Weighted Average F1-Score Comparison (Scatter Plot)')
plt.xlabel('Weighted Avg F1-Score')
plt.ylabel('Model Name')
plt.xlim(min_f1, max_f1)

plt.show()

# Scatter plot for training time
plt.figure(figsize=(14, 7))
sns.scatterplot(data=comparison_df, x='total_training_time', y='model_name', hue='model_name', palette='viridis', s=100, legend=False)
plt.title('Model Training Time Comparison (Scatter Plot)')
plt.xlabel('Training Time (s)')
plt.ylabel('Model Name')
plt.xlim(min_time, max_time)

plt.tight_layout()
plt.show()