In [1]:
import pandas as pd
import numpy as np

# Zunächst erstellen wir einen DataFrame mit den Daten aus der Tabelle
# Ich werde mich auf die numerischen Spalten konzentrieren (Present, Absent, Diff für jede Metrik)

dimensions = [
    "General Description", "Brevity Style", "Language Formality", "Narrative Voice", 
    "Vocabulary Range", "Punctuation Style", "Controversy Handling", "Community Role",
    "Content Triggers", "Reaction Patterns", "Message Effectiveness", "Opinion Expression",
    "Emotional Expression", "Cognitive Patterns", "Social Orientation", "Conflict Approach",
    "Value Signals", "Identity Projection", "Belief Expression", "Stress Indicators",
    "Adaptability Signs", "Authenticity Markers"
]

# Numerische Werte für jede Dimension
cosine_sim_present = [0.767, 0.768, 0.771, 0.767, 0.766, 0.771, 0.764, 0.762, 0.766, 0.766, 0.767, 0.766, 0.767, 0.765, 0.769, 0.764, 0.766, 0.763, 0.764, 0.767, 0.766, 0.768]
cosine_sim_absent = [0.766, 0.766, 0.765, 0.766, 0.766, 0.765, 0.767, 0.767, 0.766, 0.766, 0.766, 0.766, 0.766, 0.767, 0.766, 0.766, 0.766, 0.767, 0.766, 0.766, 0.766, 0.766]
cosine_sim_diff = [0.001, 0.002, 0.006, 0.001, 0.000, 0.006, -0.003, -0.005, 0.000, 0.000, 0.001, 0.000, 0.001, -0.002, 0.003, -0.002, 0.000, -0.004, -0.002, 0.001, 0.000, 0.002]

rouge1_present = [0.213, 0.218, 0.214, 0.207, 0.209, 0.220, 0.210, 0.205, 0.210, 0.206, 0.214, 0.212, 0.212, 0.211, 0.210, 0.211, 0.205, 0.211, 0.207, 0.210, 0.214, 0.211]
rouge1_absent = [0.210, 0.209, 0.210, 0.211, 0.211, 0.209, 0.211, 0.211, 0.211, 0.211, 0.210, 0.210, 0.211, 0.210, 0.211, 0.210, 0.212, 0.210, 0.210, 0.210, 0.210, 0.210]
rouge1_diff = [0.003, 0.009, 0.004, -0.004, -0.002, 0.011, -0.001, -0.006, -0.001, -0.005, 0.004, 0.002, 0.001, 0.001, -0.001, 0.001, -0.006, 0.001, -0.003, 0.000, 0.004, 0.001]

rouge2_present = [0.059, 0.059, 0.056, 0.055, 0.055, 0.060, 0.053, 0.056, 0.055, 0.053, 0.058, 0.057, 0.053, 0.055, 0.058, 0.055, 0.055, 0.056, 0.054, 0.054, 0.055, 0.056]
rouge2_absent = [0.055, 0.055, 0.056, 0.056, 0.056, 0.055, 0.056, 0.056, 0.056, 0.056, 0.055, 0.055, 0.056, 0.056, 0.056, 0.056, 0.056, 0.056, 0.056, 0.056, 0.056, 0.056]
rouge2_diff = [0.004, 0.004, 0.000, -0.001, -0.001, 0.005, -0.003, 0.000, -0.001, -0.003, 0.003, 0.002, -0.003, -0.001, 0.002, -0.001, -0.001, 0.000, -0.002, -0.002, -0.001, 0.000]

rougeL_present = [0.154, 0.162, 0.153, 0.149, 0.147, 0.157, 0.147, 0.147, 0.150, 0.150, 0.154, 0.152, 0.152, 0.150, 0.150, 0.151, 0.145, 0.151, 0.146, 0.151, 0.151, 0.151]
rougeL_absent = [0.150, 0.148, 0.150, 0.151, 0.151, 0.149, 0.151, 0.151, 0.150, 0.150, 0.150, 0.150, 0.150, 0.150, 0.150, 0.150, 0.152, 0.150, 0.150, 0.150, 0.150, 0.150]
rougeL_diff = [0.004, 0.014, 0.003, -0.002, -0.004, 0.008, -0.004, -0.004, 0.000, 0.000, 0.004, 0.002, 0.002, 0.000, 0.000, 0.001, -0.005, 0.001, -0.004, 0.001, 0.001, 0.001]

auth_present = [6.627, 6.401, 6.518, 6.611, 6.567, 6.634, 6.511, 6.521, 6.441, 6.509, 6.552, 6.566, 6.611, 6.480, 6.548, 6.545, 6.488, 6.519, 6.503, 6.427, 6.433, 6.738]
auth_absent = [6.514, 6.565, 6.542, 6.524, 6.530, 6.497, 6.550, 6.542, 6.544, 6.546, 6.536, 6.529, 6.522, 6.545, 6.533, 6.532, 6.549, 6.541, 6.549, 6.544, 6.545, 6.518]
auth_diff = [0.113, -0.164, -0.024, 0.087, 0.037, 0.137, -0.039, -0.021, -0.103, -0.037, 0.016, 0.037, 0.089, -0.065, 0.015, 0.013, -0.061, -0.022, -0.046, -0.117, -0.112, 0.220]

style_present = [6.516, 6.456, 6.695, 6.588, 6.605, 6.996, 6.448, 6.434, 6.302, 6.443, 6.498, 6.457, 6.490, 6.444, 6.460, 6.446, 6.330, 6.479, 6.251, 6.316, 6.313, 6.457]
style_absent = [6.455, 6.472, 6.417, 6.431, 6.450, 6.376, 6.476, 6.478, 6.490, 6.475, 6.455, 6.471, 6.458, 6.474, 6.469, 6.473, 6.478, 6.463, 6.521, 6.474, 6.475, 6.466]
style_diff = [0.061, -0.016, 0.278, 0.157, 0.155, 0.620, -0.028, -0.044, -0.188, -0.032, 0.043, -0.014, 0.032, -0.030, -0.009, -0.027, -0.148, 0.016, -0.225, -0.158, -0.162, -0.009]

# DataFrame erstellen
df = pd.DataFrame({
    'Dimension': dimensions,
    'Cosine_Present': cosine_sim_present,
    'Cosine_Absent': cosine_sim_absent,
    'Cosine_Diff': cosine_sim_diff,
    'ROUGE1_Present': rouge1_present,
    'ROUGE1_Absent': rouge1_absent,
    'ROUGE1_Diff': rouge1_diff,
    'ROUGE2_Present': rouge2_present,
    'ROUGE2_Absent': rouge2_absent,
    'ROUGE2_Diff': rouge2_diff,
    'ROUGEL_Present': rougeL_present,
    'ROUGEL_Absent': rougeL_absent,
    'ROUGEL_Diff': rougeL_diff,
    'Auth_Present': auth_present,
    'Auth_Absent': auth_absent,
    'Auth_Diff': auth_diff,
    'Style_Present': style_present,
    'Style_Absent': style_absent,
    'Style_Diff': style_diff,
})

# Numerische Spalten auswählen (alle außer 'Dimension')
numeric_columns = df.columns.tolist()[1:]

# Varianz für jede Spalte berechnen
variances = df[numeric_columns].var()

# Durchschnittliche Varianz über alle Spalten
avg_variance = variances.mean()

# Ergebnisse ausgeben
print("Varianz für jede Spalte:")
for col, var in variances.items():
    print(f"{col}: {var:.6f}")

print("\nDurchschnittliche Varianz über alle Spalten: {:.6f}".format(avg_variance))

# Separate Berechnungen für die verschiedenen Metriktypen
present_columns = [col for col in numeric_columns if 'Present' in col]
absent_columns = [col for col in numeric_columns if 'Absent' in col]
diff_columns = [col for col in numeric_columns if 'Diff' in col]

# Durchschnittliche Varianz für Present-, Absent- und Diff-Spalten
avg_var_present = df[present_columns].var().mean()
avg_var_absent = df[absent_columns].var().mean()
avg_var_diff = df[diff_columns].var().mean()

print("\nDurchschnittliche Varianz für 'Present'-Spalten: {:.6f}".format(avg_var_present))
print("Durchschnittliche Varianz für 'Absent'-Spalten: {:.6f}".format(avg_var_absent))
print("Durchschnittliche Varianz für 'Diff'-Spalten: {:.6f}".format(avg_var_diff))

# Varianz für jede Metrik-Gruppe
metrics = ['Cosine', 'ROUGE1', 'ROUGE2', 'ROUGEL', 'Auth', 'Style']
for metric in metrics:
    metric_cols = [col for col in numeric_columns if metric in col]
    avg_var_metric = df[metric_cols].var().mean()
    print(f"\nDurchschnittliche Varianz für {metric}-Spalten: {avg_var_metric:.6f}")

# Gesamtvarianz (Varianz aller numerischen Werte)
all_values = df[numeric_columns].values.flatten()
total_variance = np.var(all_values)
print("\nGesamtvarianz (Varianz aller numerischen Werte): {:.6f}".format(total_variance))

Varianz für jede Spalte:
Cosine_Present: 0.000005
Cosine_Absent: 0.000000
Cosine_Diff: 0.000007
ROUGE1_Present: 0.000014
ROUGE1_Absent: 0.000001
ROUGE1_Diff: 0.000019
ROUGE2_Present: 0.000004
ROUGE2_Absent: 0.000000
ROUGE2_Diff: 0.000006
ROUGEL_Present: 0.000014
ROUGEL_Absent: 0.000001
ROUGEL_Diff: 0.000019
Auth_Present: 0.006222
Auth_Absent: 0.000221
Auth_Diff: 0.008424
Style_Present: 0.024336
Style_Absent: 0.000788
Style_Diff: 0.032329

Durchschnittliche Varianz über alle Spalten: 0.004023

Durchschnittliche Varianz für 'Present'-Spalten: 0.005099
Durchschnittliche Varianz für 'Absent'-Spalten: 0.000168
Durchschnittliche Varianz für 'Diff'-Spalten: 0.006801

Durchschnittliche Varianz für Cosine-Spalten: 0.000004

Durchschnittliche Varianz für ROUGE1-Spalten: 0.000011

Durchschnittliche Varianz für ROUGE2-Spalten: 0.000003

Durchschnittliche Varianz für ROUGEL-Spalten: 0.000011

Durchschnittliche Varianz für Auth-Spalten: 0.004956

Durchschnittliche Varianz für Style-Spalten: 0.019151