In [17]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report

In [None]:
df = pd.read_csv('final-features-rgb.csv')
X_full = df[[f'feature_{i}' for i in range(13)]]
y_full = df['party']
relevant_full = df['relevant']

In [None]:
# choose data for training and test sets (depending on column 'relevant')
train_relevant = 0  
test_relevant = 0  

if train_relevant == test_relevant:
    mask = (relevant_full == train_relevant)
    X_selected = X_full[mask]
    y_selected = y_full[mask]
    X_train, X_test, y_train, y_test = train_test_split(
        X_selected, y_selected, stratify=y_selected, test_size=0.2, random_state=42)
else:
    X_train = X_full[relevant_full == train_relevant]
    y_train = y_full[relevant_full == train_relevant]
    X_test = X_full[relevant_full == test_relevant]
    y_test = y_full[relevant_full == test_relevant]

In [None]:
def extract_metrics(report):
    metrics = {}
    lines = report.split("\n")
    for line in lines:
        parts = line.split()
        if len(parts) >= 5:
            label = parts[0]
            try:
                precision = float(parts[1])
                recall = float(parts[2])
                f1_score = float(parts[3])
                support = int(parts[4])
                metrics[label] = {
                    'Precision': precision,
                    'Recall': recall,
                    'F1-Score': f1_score,
                    'Support': support
                }
            except ValueError:
                continue
    return metrics

In [None]:
# 1. Random Forest 
rf = RandomForestClassifier(class_weight='balanced', random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
report_rf = classification_report(y_test, y_pred_rf)
metrics_rf = extract_metrics(report_rf)
print("=== Random Forest ===")
print(report_rf)

=== Random Forest ===
              precision    recall  f1-score   support

         afd       0.59      0.35      0.44       172
         cdu       0.35      0.53      0.43       525
         csu       0.36      0.42      0.39       498
         fdp       0.60      0.22      0.33       161
      gruene       0.50      0.24      0.33       211
       linke       0.48      0.48      0.48       406
         spd       0.32      0.23      0.27       256

    accuracy                           0.40      2229
   macro avg       0.46      0.36      0.38      2229
weighted avg       0.42      0.40      0.39      2229



In [None]:
# 2. Support Vector Machine (SVM) 
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

svm = SVC(kernel='rbf', class_weight='balanced', random_state=42)
svm.fit(X_train_scaled, y_train)
y_pred_svm = svm.predict(X_test_scaled)
report_svm = classification_report(y_test, y_pred_svm)
metrics_svm = extract_metrics(report_svm)
print("=== Support Vector Machine (SVM) ===")
print(report_svm)

=== Support Vector Machine (SVM) ===
              precision    recall  f1-score   support

         afd       0.58      0.69      0.63       239
         cdu       0.53      0.70      0.60       613
         csu       0.69      0.66      0.67       857
         fdp       0.93      0.86      0.89       680
      gruene       0.83      0.68      0.75       321
       linke       0.67      0.50      0.57       440
         spd       0.61      0.64      0.62       307

    accuracy                           0.69      3457
   macro avg       0.69      0.67      0.68      3457
weighted avg       0.70      0.69      0.69      3457



In [None]:
# 3. MLP Classifier 
mlp = MLPClassifier(
    hidden_layer_sizes=(128, 64),
    activation='relu',
    learning_rate_init=0.001,
    max_iter=1000,
    alpha=0.0001,
    random_state=42
)

mlp.fit(X_train_scaled, y_train)
y_pred_mlp = mlp.predict(X_test_scaled)
report_mlp = classification_report(y_test, y_pred_mlp)
metrics_mlp = extract_metrics(report_mlp)
print("=== MLP Classifier ===")
print(report_mlp)

=== MLP Classifier ===
              precision    recall  f1-score   support

         afd       0.72      0.62      0.66       239
         cdu       0.55      0.68      0.61       613
         csu       0.65      0.69      0.67       857
         fdp       0.91      0.86      0.89       680
      gruene       0.78      0.67      0.72       321
       linke       0.66      0.55      0.60       440
         spd       0.61      0.61      0.61       307

    accuracy                           0.69      3457
   macro avg       0.70      0.67      0.68      3457
weighted avg       0.70      0.69      0.69      3457



In [None]:
metrics_all = {}

for label in metrics_rf:
    metrics_all[label] = {
        'Random Forest': metrics_rf[label]['F1-Score'],
        'SVM': metrics_svm.get(label, {}).get('F1-Score', 0),
        'MLP': metrics_mlp.get(label, {}).get('F1-Score', 0)
    }

metrics_df = pd.DataFrame(metrics_all).T

metrics_df["Average"] = metrics_df.mean(axis=1)

average_row = metrics_df.mean(numeric_only=True)
average_row.name = "Average"
metrics_df = pd.concat([metrics_df, average_row.to_frame().T])

metrics_df = metrics_df.sort_values(by="Average", ascending=False)

# Output
print("=== Modellvergleich ===")
print(metrics_df.round(2))

=== Modellvergleich ===
         Random Forest   SVM   MLP  Average
fdp               0.92  0.89  0.89     0.90
gruene            0.80  0.75  0.72     0.76
Average           0.74  0.68  0.68     0.70
csu               0.72  0.67  0.67     0.69
afd               0.71  0.63  0.66     0.67
spd               0.70  0.62  0.61     0.64
cdu               0.65  0.60  0.61     0.62
linke             0.69  0.57  0.60     0.62
