In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import ast

In [None]:
evaluation = pd.read_csv('evaluation.csv')
evaluation.head()

In [None]:
evaluation_unique = evaluation.drop_duplicates(subset=['Process', 'Cluster'])

# Function to count articles
def count_articles(x):
    try:
        articles = ast.literal_eval(x)
        return len(articles)
    except Exception:
        return len([a.strip() for a in x.strip('{}').split(',') if a.strip()])

# Assign n_articles
evaluation_unique.loc[:, 'n_articles'] = evaluation_unique['Found Wikipedia articles'].apply(count_articles)

# Group by Process
article_stats_per_method = evaluation_unique.groupby('Process')['n_articles'].agg(['mean', 'sum'])

print(article_stats_per_method)

##  TEXT Method

In [None]:
evaluation_text = evaluation[evaluation['Process'] == 'TEXT']

columns_to_plot = [
    'Themnerelevanz(1-5)',
    'Relevanz zu Wikipedia Verlinkung (1-5)',
    'Zusammenfasung (1-5)'
]

sns.set(style='whitegrid')

# Create boxplots
plt.figure(figsize=(12, 6))
for i, col in enumerate(columns_to_plot, 1):
    plt.subplot(1, 3, i)
    sns.boxplot(y=evaluation_text[col])
    plt.title(col)
    plt.ylim(0.5, 5.5)

plt.suptitle('Boxplots for TEXT Process Evaluation (1–5 Scale)', fontsize=16)
plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.savefig('text_process_boxplots.png', dpi=300)
plt.show()

In [None]:
evaluation_text = evaluation[evaluation['Process'] == 'TEXT']

columns_to_plot = [
    'Themnerelevanz(1-5)',
    'Relevanz zu Wikipedia Verlinkung (1-5)',
    'Zusammenfasung (1-5)'
]

sns.set(style='whitegrid')

# Create swarm plots
plt.figure(figsize=(12, 6))
for i, col in enumerate(columns_to_plot, 1):
    plt.subplot(1, 3, i)
    sns.swarmplot(y=evaluation_text[col])
    plt.title(col)
    plt.ylim(0.5, 5.5)

plt.suptitle('Swarm Plots for TEXT Process Evaluation (1–5 Scale)', fontsize=16)
plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.savefig('text_process_swarmplots.png', dpi=300)
plt.show()

## NET Method

In [None]:
evaluation_text = evaluation[evaluation['Process'] == 'NET']

columns_to_plot = [
    'Themnerelevanz(1-5)',
    'Relevanz zu Wikipedia Verlinkung (1-5)',
    'Zusammenfasung (1-5)'
]

sns.set(style='whitegrid')

# Create boxplots
plt.figure(figsize=(12, 6))
for i, col in enumerate(columns_to_plot, 1):
    plt.subplot(1, 3, i)
    sns.boxplot(y=evaluation_text[col])
    plt.title(col)
    plt.ylim(0.5, 5.5)

plt.suptitle('Boxplots for NET Process Evaluation (1–5 Scale)', fontsize=16)
plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.savefig('net_process_boxplots.png', dpi=300)
plt.show()

In [None]:
evaluation_text = evaluation[evaluation['Process'] == 'NET']

columns_to_plot = [
    'Themnerelevanz(1-5)',
    'Relevanz zu Wikipedia Verlinkung (1-5)',
    'Zusammenfasung (1-5)'
]

sns.set(style='whitegrid')

# Create swarm plots
plt.figure(figsize=(12, 6))
for i, col in enumerate(columns_to_plot, 1):
    plt.subplot(1, 3, i)
    sns.swarmplot(y=evaluation_text[col])
    plt.title(col)
    plt.ylim(0.5, 5.5)

plt.suptitle('Swarm Plots for NET Process Evaluation (1–5 Scale)', fontsize=16)
plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.savefig('net_process_swarmplots.png', dpi=300)
plt.show()

## SUM Method

In [None]:
evaluation_text = evaluation[evaluation['Process'] == 'SUM']

columns_to_plot = [
    'Themnerelevanz(1-5)',
    'Relevanz zu Wikipedia Verlinkung (1-5)',
    'Zusammenfasung (1-5)'
]

sns.set(style='whitegrid')

# Create boxplots
plt.figure(figsize=(12, 6))
for i, col in enumerate(columns_to_plot, 1):
    plt.subplot(1, 3, i)
    sns.boxplot(y=evaluation_text[col])
    plt.title(col)
    plt.ylim(0.5, 5.5)

plt.suptitle('Boxplots for SUM Process Evaluation (1–5 Scale)', fontsize=16)
plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.savefig('sum_process_boxplots.png', dpi=300)
plt.show()

In [None]:
evaluation_text = evaluation[evaluation['Process'] == 'SUM']

columns_to_plot = [
    'Themnerelevanz(1-5)',
    'Relevanz zu Wikipedia Verlinkung (1-5)',
    'Zusammenfasung (1-5)'
]

sns.set(style='whitegrid')

# Create swarm plots
plt.figure(figsize=(12, 6))
for i, col in enumerate(columns_to_plot, 1):
    plt.subplot(1, 3, i)
    sns.swarmplot(y=evaluation_text[col])
    plt.title(col)
    plt.ylim(0.5, 5.5)

plt.suptitle('Swarm Plots for SUM Process Evaluation (1–5 Scale)', fontsize=16)
plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.savefig('sum_process_swarmplots.png', dpi=300)
plt.show()