In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## BBC Dataset

In [None]:
data = {
    'Model': ['LDA', 'NMF', 'mpnet_v1', 'mpnet_v2', 'fasttext', 'Llama-8B', 'Gemini pro'],
    'Purity': [0.72, 0.70, 0.88, 0.87, 0.87, 0.93, 0.96],
    'NMI': [0.59, 0.45, 0.66, 0.58, 0.74, 0.83, 0.89],
    'F1-score': [0.66, 0.70, 0.88, 0.86, 0.86, 0.93, 0.96],
    'Accuracy': [0.72, 0.70, 0.87, 0.85, 0.87, 0.93, 0.96]
}
# Data from the table

# Convert to DataFrame
df = pd.DataFrame(data)
metrics = ['Purity', 'NMI', 'F1-score', 'Accuracy']



# Set up the figure and axes
fig, ax = plt.subplots(figsize=(12, 6))

# Number of models and metrics for grouping
num_models = len(data['Model'])
num_metrics = len(metrics)
bar_width = 0.2  # Width of each bar
x = np.arange(num_models)  # Locations for the groups

# Plot each metric as a bar chart with reduced opacity and custom colors
for i, metric in enumerate(metrics):
    ax.bar(x + i * bar_width, df[metric], width=bar_width,  alpha=1, label=metric)

# Labeling
ax.set_xlabel("Model")
ax.set_ylabel("Scores")
ax.set_title("Model Performance Metrics in BBC Dataset")
ax.set_xticks(x + bar_width * (num_metrics - 1) / 2)
ax.set_xticklabels(data['Model'])
ax.legend(title="Metrics")

plt.show()


## newsgroup Dataset

In [None]:
# Data from the table

data = {
    'Model': ['LDA', 'NMF', 'mpnet_v1', 'mpnet_v2', 'fasttext', 'Llama-8B', 'Gemini pro'],
    'Purity': [0.55, 0.37, 0.67, 0.68, 0.60, 0.71, 0.79],
    'NMI': [0.38, 0.16, 0.38, 0.38, 0.34, 0.50, 0.67],
    'F1-score': [0.51, 0.48, 0.65, 0.66, 0.60, 0.68, 0.79],
    'Accuracy': [0.56, 0.37, 0.66, 0.68, 0.60, 0.68, 0.79]
}

# Convert to DataFrame
df = pd.DataFrame(data)
metrics = ['Purity', 'NMI', 'F1-score', 'Accuracy']



# Set up the figure and axes
fig, ax = plt.subplots(figsize=(12, 6))

# Number of models and metrics for grouping
num_models = len(data['Model'])
num_metrics = len(metrics)
bar_width = 0.2  # Width of each bar
x = np.arange(num_models)  # Locations for the groups

# Plot each metric as a bar chart with reduced opacity and custom colors
for i, metric in enumerate(metrics):
    ax.bar(x + i * bar_width, df[metric], width=bar_width,  alpha=1, label=metric)

# Labeling
ax.set_xlabel("Model")
ax.set_ylabel("Scores")
ax.set_title("Model Performance Metrics in newsgroups Dataset")
ax.set_xticks(x + bar_width * (num_metrics - 1) / 2)
ax.set_xticklabels(data['Model'])
ax.legend(title="Metrics")

plt.show()