# 1) Laden der Daten und trainierten Modelle

In [None]:
import pickle
import pandas as pd

In [None]:
X_test = pd.read_parquet("../../data/day_3/X_test.parquet")
y_test = pd.read_parquet("../../data/day_3/y_test.parquet")

In [None]:
with open("../../models/random_forest_model.pkl", "rb") as f:
    base_rfc = pickle.load(f)

with open("../../models/tuned_rfc_model.pkl", "rb") as f:
    tuned_rfc = pickle.load(f)

with open("../../models/KNN_model.pkl", "rb") as f:
    base_knn = pickle.load(f)

with open("../../models/tuned_knn_model.pkl", "rb") as f:
    tuned_knn = pickle.load(f)

with open("../../models/SVC_model.pkl", "rb") as f:
    base_svc = pickle.load(f)

with open("../../models/tuned_svc_model.pkl", "rb") as f:
    tuned_svc = pickle.load(f)

In [None]:
MODELS = {"base_rfc": base_rfc, "tuned_rfc": tuned_rfc, "base_knn": base_knn,
          "tuned_knn": tuned_knn, "base_svc": base_svc, "tuned_svc": tuned_svc}

# 2) Vorhersage der Testdaten

In [None]:
def predict(model, X_test):
    return model.predict(X_test)

In [None]:
predictions = {}
for model_name, model in MODELS.items():
    print(f"Predicting with {model_name}...")
    y_pred = predict(model, X_test)
    predictions[model_name] = y_pred

### Abspeichern der Vorhersagen

In [None]:
predictions_df = pd.DataFrame(predictions)
predictions_df['groundtruth'] = y_test['Weather Type'].values
predictions_df

# 3) Visualisieren der Vorhersagen

## 3.1) Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

def create_confusion_matrix(model_name, y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(10, 7))
    plt.title(f'Confusion Matrix des Modells {model_name}')
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.xlabel('Vorhergesagt')
    plt.ylabel('Tatsächlich')
    plt.show()

In [None]:
for model_name, y_pred in predictions.items():
    create_confusion_matrix(model_name, y_test, y_pred)

## 3.2) Precision Recall curve

In [None]:
from sklearn.metrics import precision_recall_curve, average_precision_score
from sklearn.preprocessing import label_binarize
import matplotlib.pyplot as plt

def plot_precision_recall_curve(model_name, model, target_classes):
    y_test_bin = label_binarize(y_test, classes=range(len(target_classes)))
    y_score = model.predict_proba(X_test)

    # Plot PR curve for each class
    plt.figure(figsize=(10, 7))
    colors = ['blue', 'red', 'green', 'orange']
    for i, color in zip(range(4), colors):
        precision, recall, _ = precision_recall_curve(y_test_bin[:, i], y_score[:, i])
        ap = average_precision_score(y_test_bin[:, i], y_score[:, i])
        plt.plot(recall, precision, color=color, lw=2,
                label=f"{target_classes[i]} (AP = {ap:.2f})")

    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.title(f"Precision-Recall Curve (One-vs-Rest) for model {model_name}")
    plt.legend(loc="lower left")
    plt.grid(True)
    plt.tight_layout()
    plt.show()

In [None]:
with open('../../data/day_3/label_encoders.pkl', 'rb') as f:
    encoders_loaded = pickle.load(f)

encoders_loaded["Weather Type"].classes_

In [None]:
for model_name, trained_model in MODELS.items():
    try:
        plot_precision_recall_curve(model_name, trained_model, list(encoders_loaded['Weather Type'].classes_))
    except Exception as e:
        print(f"Error plotting precision-recall curve for {model_name}: {e}")

## 3.3) ROC AUC Curve

In [None]:
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import label_binarize
import matplotlib.pyplot as plt

def plot_roc_auc_curve(model_name, model, target_classes):
    # Binarize the true labels
    y_test_bin = label_binarize(y_test, classes=range(len(target_classes)))
    y_score = model.predict_proba(X_test)

    # Plot ROC curve for each class
    plt.figure(figsize=(10, 7))
    colors = ['blue', 'red', 'green', 'orange']
    for i, color in zip(range(len(target_classes)), colors):
        fpr, tpr, _ = roc_curve(y_test_bin[:, i], y_score[:, i])
        roc_auc = auc(fpr, tpr)
        plt.plot(fpr, tpr, color=color, lw=2,
                label=f"{target_classes[i]} (AUC = {roc_auc:.2f})")

    # Add diagonal reference line
    plt.plot([0, 1], [0, 1], 'k--', lw=2)

    # Formatting
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.title(f"ROC Curve (One-vs-Rest) for model {model_name}")
    plt.legend(loc="lower right")
    plt.grid(True)
    plt.tight_layout()
    plt.show()


In [None]:
for model_name, trained_model in MODELS.items():
    try:
        plot_roc_auc_curve(model_name, trained_model, list(encoders_loaded['Weather Type'].classes_))
    except Exception as e:
        print(f"Error plotting precision-recall curve for {model_name}: {e}")

## 3.3) ROC vs. PR

### ROC:
zeigt, wie gut das Modell zwischen den Klassen unterscheiden kann.

Schlechter bei unausgewogener Klassenverteilung, da FPR verfälscht sein kann, wenn es viele TN gibt

Ein Modell, das zufällig rät, ergibt eine diagonale Linie von (0,0) bis (1,1).

Die AUC (Area Under Curve – Fläche unter der Kurve) dient als zusammenfassender Einzelwert:

1,0 = perfekter Klassifikator

0,5 = nicht besser als Zufall

### Precision-Recall-Kurve
konzentriert sich ausschließlich auf die positive Klasse.

Sie ist besonders nützlich, wenn die positive Klasse selten ist (z. B. bei Betrugserkennung oder Krankheitsdiagnosen).
--> Besser bei unausgewogener Klassenverteilung

Ein Modell, das zufällig rät, ergibt eine horizontale Linie auf Höhe der Häufigkeit der positiven Klasse.

Die Average Precision (AP) ist die Fläche unter dieser Kurve.

Wann was verwenden ?

| Situation                                      | ROC-Kurve ✅ | Precision-Recall-Kurve ✅ |
|-----------------------------------------------|--------------|----------------------------|
| **Ausgewogene Klassenverteilung**             | ✅ Gut geeignet | ✅ Gut geeignet           |
| **Unausgewogene Klassenverteilung**           | ❌ Irreführend möglich | ✅ Besser geeignet      |
| Ziel: **Allgemeine Modellqualität bewerten**  | ✅ Ja         | ❌ Weniger aussagekräftig |
| Ziel: **Leistung bei positiver Klasse bewerten** | ❌ Ungeeignet | ✅ Sehr gut geeignet     |
| Fokus auf **True Positive Rate (Empfindlichkeit)** | ✅ Ja         | ✅ Ja                     |
| Fokus auf **Präzision bei positiven Vorhersagen** | ❌ Nicht direkt | ✅ Optimal                |
| Zusammenfassender Kennwert                    | AUC (Area Under Curve) | AP (Average Precision)   |


# 4) Berechnen der Performance der Vorhersagen

In [None]:
from sklearn.metrics import accuracy_score

for prediction in predictions_df.columns:
    if prediction != "groundtruth":
        print(f"Classification Accuracy in % for {prediction}:")
        print(str(round(100 * accuracy_score(y_test, predictions_df[prediction]), 3)) + "%")

In [None]:
from sklearn.metrics import precision_score

for prediction in predictions_df.columns:
    if prediction != "groundtruth":
        print(f"Classification Precision (Macro) in % for {prediction}:")
        print(str(round(100 * precision_score(y_test, predictions_df[prediction], average="macro"), 3)) + "%")

In [None]:
from sklearn.metrics import f1_score

for prediction in predictions_df.columns:
    if prediction != "groundtruth":
        print(f"Classification Precision (Macro) in % for {prediction}:")
        print(str(round(100 * f1_score(y_test, predictions_df[prediction], average="macro"), 3)) + "%")

In [None]:
from sklearn.metrics import classification_report

for prediction in predictions_df.columns:
    if prediction != "groundtruth":
        print(f"Classification Report for {prediction}:")
        print(classification_report(y_test, predictions_df[prediction], target_names=encoders_loaded["Weather Type"].classes_))

In [None]:
def predict_probas(model, X_test):
    return model.predict_proba(X_test)

In [None]:
from sklearn.metrics import roc_auc_score

for model_name, model in MODELS.items():
    print(f"Predicting with {model_name}...")
    y_pred = predict_probas(model, X_test)
    print(y_pred)
    print(y_test)
    roc_auc = roc_auc_score(y_test, y_pred, multi_class='ovr')
    print("ROC AUC Score:", roc_auc)

# 5) Berechnen der Performance mit Kreuzvalidierung

In [None]:
from sklearn.model_selection import StratifiedKFold, cross_val_score
import numpy as np

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cv_scores = cross_val_score(base_rfc, X_test, np.ravel(y_test,), cv=cv, scoring="f1_macro")
print("F1-Score (5-fold CV):", cv_scores)
print("Durchschnittlicher F1 des base_rfc:", np.mean(cv_scores))


In [None]:
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cv_scores = cross_val_score(tuned_svc, X_test, np.ravel(y_test,), cv=cv, scoring="f1_macro")
print("F1-Score (5-fold CV):", cv_scores)
print("Durchschnittlicher F1 des base_svc:", np.mean(cv_scores))