# Telco Customer Churn - Model Training mit MLflow

In [1]:
# warnings unterdr√ºcken
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=UserWarning)

In [2]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from time import time

from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import (accuracy_score, precision_score, recall_score, f1_score,
                             confusion_matrix, roc_curve, roc_auc_score,
                             precision_recall_curve, average_precision_score)

import mlflow
from mlflow.models import infer_signature
from mlflow.tracking import MlflowClient

# 1) Datenvorverarbeitung

In [3]:
# daten laden
train_data = pd.read_csv('../../data/day_3/telco-customer-churn/train.csv')
test_data = pd.read_csv('../../data/day_3/telco-customer-churn/test.csv')
val_data = pd.read_csv('../../data/day_3/telco-customer-churn/validation.csv')

print(f"Train: {train_data.shape}, Test: {test_data.shape}, Val: {val_data.shape}")

Train: (4225, 52), Test: (1409, 52), Val: (1409, 52)


In [4]:
# churn verteilung checken
print("Churn Verteilung (Train):")
print(train_data['Churn'].value_counts())
print(f"\nChurn Rate: {train_data['Churn'].mean()*100:.1f}%")

Churn Verteilung (Train):
Churn
0    3104
1    1121
Name: count, dtype: int64

Churn Rate: 26.5%


In [5]:
# unn√∂tige spalten weg
drop_cols = [
    'Customer ID', 'Churn', 'Churn Category', 'Churn Reason', 'Customer Status',
    'City', 'State', 'Country', 'Zip Code', 'Lat Long', 'Latitude', 'Longitude'
]
drop_cols = [c for c in drop_cols if c in train_data.columns]

X_train = train_data.drop(columns=drop_cols)
y_train = train_data['Churn']

X_test = test_data.drop(columns=drop_cols)
y_test = test_data['Churn']

X_val = val_data.drop(columns=drop_cols)
y_val = val_data['Churn']

print(f"Features: {X_train.shape[1]}")

Features: 40


In [6]:
# encoding
cat_cols = X_train.select_dtypes(include=['object']).columns.tolist()
print(f"Kategorische Spalten: {len(cat_cols)}")

X_train_enc = pd.get_dummies(X_train, columns=cat_cols, drop_first=True)
X_test_enc = pd.get_dummies(X_test, columns=cat_cols, drop_first=True)
X_val_enc = pd.get_dummies(X_val, columns=cat_cols, drop_first=True)

# spalten angleichen
for col in X_train_enc.columns:
    if col not in X_test_enc.columns:
        X_test_enc[col] = 0
    if col not in X_val_enc.columns:
        X_val_enc[col] = 0

X_test_enc = X_test_enc[X_train_enc.columns]
X_val_enc = X_val_enc[X_train_enc.columns]

# missing values
X_train_enc = X_train_enc.fillna(X_train_enc.median())
X_test_enc = X_test_enc.fillna(X_train_enc.median())
X_val_enc = X_val_enc.fillna(X_train_enc.median())

print(f"Nach Encoding: {X_train_enc.shape[1]} Features")

Kategorische Spalten: 6
Nach Encoding: 45 Features


In [7]:
# skalieren
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_enc)
X_test_scaled = scaler.transform(X_test_enc)
X_val_scaled = scaler.transform(X_val_enc)

# als DataFrame f√ºr MLflow
X_train_scaled_df = pd.DataFrame(X_train_scaled, columns=X_train_enc.columns)
X_test_scaled_df = pd.DataFrame(X_test_scaled, columns=X_train_enc.columns)
X_val_scaled_df = pd.DataFrame(X_val_scaled, columns=X_train_enc.columns)

In [8]:
# y als flat arrays
y_train_flat = y_train.values.ravel()
y_test_flat = y_test.values.ravel()
y_val_flat = y_val.values.ravel()

# 2) Model Training mit MLflow

In [9]:
# mlflow setup
mlflow.set_tracking_uri("http://mlflow_server:5000")
mlflow.set_experiment("Telco Churn Classification")

2026/01/29 13:36:38 INFO mlflow.tracking.fluent: Experiment with name 'Telco Churn Classification' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/1', creation_time=1769693798493, experiment_id='1', last_update_time=1769693798493, lifecycle_stage='active', name='Telco Churn Classification', tags={}>

## Definieren der Modelle

In [10]:
# base models
log_reg = LogisticRegression(max_iter=1000, random_state=42)

rfc = RandomForestClassifier(
    n_estimators=100,
    max_depth=10,
    min_samples_split=2,
    random_state=42
)

gbc = GradientBoostingClassifier(
    n_estimators=100,
    max_depth=5,
    random_state=42
)

knn = KNeighborsClassifier(
    n_neighbors=5,
    weights='distance'
)

svc = SVC(
    C=1.0,
    kernel='rbf',
    probability=True,
    random_state=42
)

In [11]:
# gridsearch models
grid_log_reg = GridSearchCV(
    LogisticRegression(max_iter=1000, random_state=42),
    param_grid={
        'C': [0.1, 1.0, 10.0],
        'penalty': ['l2'],
        'solver': ['lbfgs', 'liblinear']
    },
    cv=5, n_jobs=-1, verbose=1, scoring='recall'
)

grid_rfc = GridSearchCV(
    RandomForestClassifier(random_state=42),
    param_grid={
        'n_estimators': [50, 100, 150],
        'max_depth': [5, 10, 15],
        'min_samples_leaf': [1, 2, 4]
    },
    cv=5, n_jobs=-1, verbose=1, scoring='recall'
)

grid_gbc = GridSearchCV(
    GradientBoostingClassifier(random_state=42),
    param_grid={
        'n_estimators': [50, 100],
        'max_depth': [3, 5, 7],
        'learning_rate': [0.05, 0.1, 0.2]
    },
    cv=5, n_jobs=-1, verbose=1, scoring='recall'
)

## Helper Funktionen

In [12]:
def plot_roc_curve(model_name, y_true, y_prob):
    """ROC Curve erstellen und speichern"""
    fpr, tpr, _ = roc_curve(y_true, y_prob)
    auc = roc_auc_score(y_true, y_prob)
    
    plt.figure(figsize=(8, 6))
    plt.plot(fpr, tpr, color='blue', lw=2, label=f'ROC (AUC = {auc:.3f})')
    plt.plot([0, 1], [0, 1], color='gray', linestyle='--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(f'ROC Curve - {model_name}')
    plt.legend(loc='lower right')
    plt.grid(True, alpha=0.3)
    
    plot_path = f"roc_curve_{model_name.replace(' ', '_')}.png"
    plt.savefig(plot_path)
    plt.close()
    return plot_path

In [13]:
def plot_confusion_matrix(model_name, y_true, y_pred):
    """Confusion Matrix erstellen und speichern"""
    cm = confusion_matrix(y_true, y_pred)
    
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=['No Churn', 'Churn'],
                yticklabels=['No Churn', 'Churn'])
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.title(f'Confusion Matrix - {model_name}')
    
    plot_path = f"confusion_matrix_{model_name.replace(' ', '_')}.png"
    plt.savefig(plot_path)
    plt.close()
    return plot_path

In [14]:
def plot_precision_recall_curve(model_name, y_true, y_prob):
    """Precision-Recall Curve erstellen und speichern"""
    precision, recall, _ = precision_recall_curve(y_true, y_prob)
    ap = average_precision_score(y_true, y_prob)
    
    plt.figure(figsize=(8, 6))
    plt.plot(recall, precision, color='blue', lw=2, label=f'PR (AP = {ap:.3f})')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title(f'Precision-Recall Curve - {model_name}')
    plt.legend(loc='lower left')
    plt.grid(True, alpha=0.3)
    
    plot_path = f"pr_curve_{model_name.replace(' ', '_')}.png"
    plt.savefig(plot_path)
    plt.close()
    return plot_path

## MLflow Training Funktion

In [15]:
def train_evaluate_log_model(
    model_obj,
    model_name,
    X_train_data,
    y_train_data,
    X_test_data,
    y_test_data,
    feature_columns,
    registered_model_name,
    needs_scaling=True
):
    """
    Trainiert, evaluiert und loggt ein Model mit MLflow.
    Fokus auf RECALL da wir Churner erkennen wollen!
    """
    with mlflow.start_run(run_name=model_name):
        print(f"\n--- Starting MLflow run for: {model_name} ---")
        
        is_grid_search = isinstance(model_obj, GridSearchCV)
        
        # training
        print(f"  Training {model_name}...")
        start_time = time()
        model_obj.fit(X_train_data, y_train_data)
        duration = time() - start_time
        
        if is_grid_search:
            print(f"  Tuning time: {duration:.2f}s")
            print(f"  Best params: {model_obj.best_params_}")
            mlflow.log_metric("tuning_time_seconds", duration)
            mlflow.log_params(model_obj.best_params_)
            trained_model = model_obj.best_estimator_
        else:
            print(f"  Training time: {duration:.2f}s")
            mlflow.log_metric("training_time_seconds", duration)
            mlflow.log_params(model_obj.get_params())
            trained_model = model_obj
        
        # feature importance loggen (wenn vorhanden)
        if hasattr(trained_model, 'feature_importances_') and feature_columns is not None:
            feat_imp = {str(feature_columns[i]): float(trained_model.feature_importances_[i]) 
                       for i in range(len(feature_columns))}
            mlflow.log_dict(feat_imp, "feature_importances.json")
            print("  Feature importances logged.")
        
        # model loggen und registrieren
        mlflow.sklearn.log_model(
            trained_model,
            "model",
            input_example=X_train_data.head(1) if hasattr(X_train_data, 'head') else X_train_data[:1],
            registered_model_name=registered_model_name
        )
        print(f"  Model registered as '{registered_model_name}'")
        
        # evaluation
        print(f"  Evaluating {model_name}...")
        y_pred = trained_model.predict(X_test_data)
        y_prob = trained_model.predict_proba(X_test_data)[:, 1]
        
        # metriken berechnen
        accuracy = accuracy_score(y_test_data, y_pred)
        precision = precision_score(y_test_data, y_pred, zero_division=0)
        recall = recall_score(y_test_data, y_pred, zero_division=0)
        f1 = f1_score(y_test_data, y_pred, zero_division=0)
        auc = roc_auc_score(y_test_data, y_prob)
        
        # weighted score (2x recall + 1x precision) / 3
        weighted_score = (2 * recall + precision) / 3
        
        # metriken loggen
        mlflow.log_metric("test_accuracy", accuracy)
        mlflow.log_metric("test_precision", precision)
        mlflow.log_metric("test_recall", recall)
        mlflow.log_metric("test_f1_score", f1)
        mlflow.log_metric("test_roc_auc", auc)
        mlflow.log_metric("test_weighted_score", weighted_score)
        
        print(f"  Accuracy:  {accuracy:.4f}")
        print(f"  Precision: {precision:.4f}")
        print(f"  Recall:    {recall:.4f} <-- wichtigste Metrik!")
        print(f"  F1-Score:  {f1:.4f}")
        print(f"  ROC-AUC:   {auc:.4f}")
        print(f"  Weighted:  {weighted_score:.4f}")
        
        # plots erstellen und loggen
        cm_path = plot_confusion_matrix(model_name, y_test_data, y_pred)
        mlflow.log_artifact(cm_path)
        os.remove(cm_path)
        
        roc_path = plot_roc_curve(model_name, y_test_data, y_prob)
        mlflow.log_artifact(roc_path)
        os.remove(roc_path)
        
        pr_path = plot_precision_recall_curve(model_name, y_test_data, y_prob)
        mlflow.log_artifact(pr_path)
        os.remove(pr_path)
        
        print("  Plots logged.")
        print(f"--- Finished MLflow run for: {model_name} ---")
        
        return {
            'model_name': model_name,
            'recall': recall,
            'weighted_score': weighted_score
        }

## MLflow Runs ausf√ºhren

In [16]:
# registered model names
registered_model_names = {
    "LogisticRegression": "ChurnClassifier_LogisticRegression",
    "RandomForest": "ChurnClassifier_RandomForest",
    "GradientBoosting": "ChurnClassifier_GradientBoosting",
    "KNN": "ChurnClassifier_KNN",
    "SVC": "ChurnClassifier_SVC"
}

In [17]:
# alle runs speichern f√ºr vergleich
all_results = []

In [18]:
# base logistic regression
result = train_evaluate_log_model(
    log_reg, "Base LogisticRegression",
    X_train_scaled_df, y_train_flat,
    X_test_scaled_df, y_test_flat,
    X_train_enc.columns.tolist(),
    registered_model_names["LogisticRegression"]
)
all_results.append(result)


--- Starting MLflow run for: Base LogisticRegression ---
  Training Base LogisticRegression...
  Training time: 0.02s


Successfully registered model 'ChurnClassifier_LogisticRegression'.
2026/01/29 13:36:44 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: ChurnClassifier_LogisticRegression, version 1
Created version '1' of model 'ChurnClassifier_LogisticRegression'.


  Model registered as 'ChurnClassifier_LogisticRegression'
  Evaluating Base LogisticRegression...
  Accuracy:  0.9780
  Precision: 0.9648
  Recall:    0.9519 <-- wichtigste Metrik!
  F1-Score:  0.9583
  ROC-AUC:   0.9974
  Weighted:  0.9562
  Plots logged.
--- Finished MLflow run for: Base LogisticRegression ---
üèÉ View run Base LogisticRegression at: http://mlflow_server:5000/#/experiments/1/runs/dac2e05196b14a05b3a9eaa35d74c8af
üß™ View experiment at: http://mlflow_server:5000/#/experiments/1


In [19]:
# base random forest (braucht keine skalierung)
result = train_evaluate_log_model(
    rfc, "Base RandomForest",
    X_train_enc, y_train_flat,
    X_test_enc, y_test_flat,
    X_train_enc.columns.tolist(),
    registered_model_names["RandomForest"]
)
all_results.append(result)


--- Starting MLflow run for: Base RandomForest ---
  Training Base RandomForest...
  Training time: 0.34s
  Feature importances logged.


Successfully registered model 'ChurnClassifier_RandomForest'.
2026/01/29 13:36:48 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: ChurnClassifier_RandomForest, version 1
Created version '1' of model 'ChurnClassifier_RandomForest'.


  Model registered as 'ChurnClassifier_RandomForest'
  Evaluating Base RandomForest...
  Accuracy:  0.9773
  Precision: 0.9886
  Recall:    0.9251 <-- wichtigste Metrik!
  F1-Score:  0.9558
  ROC-AUC:   0.9966
  Weighted:  0.9463
  Plots logged.
--- Finished MLflow run for: Base RandomForest ---
üèÉ View run Base RandomForest at: http://mlflow_server:5000/#/experiments/1/runs/ba4edf26623c41c495d19d020f155731
üß™ View experiment at: http://mlflow_server:5000/#/experiments/1


In [20]:
# base gradient boosting
result = train_evaluate_log_model(
    gbc, "Base GradientBoosting",
    X_train_enc, y_train_flat,
    X_test_enc, y_test_flat,
    X_train_enc.columns.tolist(),
    registered_model_names["GradientBoosting"]
)
all_results.append(result)


--- Starting MLflow run for: Base GradientBoosting ---
  Training Base GradientBoosting...
  Training time: 1.86s
  Feature importances logged.


Successfully registered model 'ChurnClassifier_GradientBoosting'.
2026/01/29 13:36:54 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: ChurnClassifier_GradientBoosting, version 1
Created version '1' of model 'ChurnClassifier_GradientBoosting'.


  Model registered as 'ChurnClassifier_GradientBoosting'
  Evaluating Base GradientBoosting...
  Accuracy:  0.9808
  Precision: 0.9780
  Recall:    0.9492 <-- wichtigste Metrik!
  F1-Score:  0.9634
  ROC-AUC:   0.9986
  Weighted:  0.9588
  Plots logged.
--- Finished MLflow run for: Base GradientBoosting ---
üèÉ View run Base GradientBoosting at: http://mlflow_server:5000/#/experiments/1/runs/26db7eab0bb24e1a82da0d100febe317
üß™ View experiment at: http://mlflow_server:5000/#/experiments/1


In [21]:
# base knn
result = train_evaluate_log_model(
    knn, "Base KNN",
    X_train_scaled_df, y_train_flat,
    X_test_scaled_df, y_test_flat,
    None,
    registered_model_names["KNN"]
)
all_results.append(result)


--- Starting MLflow run for: Base KNN ---
  Training Base KNN...
  Training time: 0.00s


Successfully registered model 'ChurnClassifier_KNN'.
2026/01/29 13:36:57 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: ChurnClassifier_KNN, version 1
Created version '1' of model 'ChurnClassifier_KNN'.


  Model registered as 'ChurnClassifier_KNN'
  Evaluating Base KNN...
  Accuracy:  0.9099
  Precision: 0.8241
  Recall:    0.8396 <-- wichtigste Metrik!
  F1-Score:  0.8318
  ROC-AUC:   0.9647
  Weighted:  0.8344
  Plots logged.
--- Finished MLflow run for: Base KNN ---
üèÉ View run Base KNN at: http://mlflow_server:5000/#/experiments/1/runs/eea0063dda804533868e7df15228b00e
üß™ View experiment at: http://mlflow_server:5000/#/experiments/1


In [22]:
# base svc
result = train_evaluate_log_model(
    svc, "Base SVC",
    X_train_scaled_df, y_train_flat,
    X_test_scaled_df, y_test_flat,
    None,
    registered_model_names["SVC"]
)
all_results.append(result)


--- Starting MLflow run for: Base SVC ---
  Training Base SVC...
  Training time: 0.45s


Successfully registered model 'ChurnClassifier_SVC'.
2026/01/29 13:37:01 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: ChurnClassifier_SVC, version 1
Created version '1' of model 'ChurnClassifier_SVC'.


  Model registered as 'ChurnClassifier_SVC'
  Evaluating Base SVC...
  Accuracy:  0.9801
  Precision: 0.9727
  Recall:    0.9519 <-- wichtigste Metrik!
  F1-Score:  0.9622
  ROC-AUC:   0.9971
  Weighted:  0.9588
  Plots logged.
--- Finished MLflow run for: Base SVC ---
üèÉ View run Base SVC at: http://mlflow_server:5000/#/experiments/1/runs/26b734cb819e4416bc3e8b675f58ce7f
üß™ View experiment at: http://mlflow_server:5000/#/experiments/1


In [23]:
# tuned logistic regression
result = train_evaluate_log_model(
    grid_log_reg, "Tuned LogisticRegression (GridSearch)",
    X_train_scaled_df, y_train_flat,
    X_test_scaled_df, y_test_flat,
    X_train_enc.columns.tolist(),
    registered_model_names["LogisticRegression"]
)
all_results.append(result)


--- Starting MLflow run for: Tuned LogisticRegression (GridSearch) ---
  Training Tuned LogisticRegression (GridSearch)...
Fitting 5 folds for each of 6 candidates, totalling 30 fits
  Tuning time: 2.80s
  Best params: {'C': 0.1, 'penalty': 'l2', 'solver': 'liblinear'}


Registered model 'ChurnClassifier_LogisticRegression' already exists. Creating a new version of this model...
2026/01/29 13:37:07 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: ChurnClassifier_LogisticRegression, version 2
Created version '2' of model 'ChurnClassifier_LogisticRegression'.


  Model registered as 'ChurnClassifier_LogisticRegression'
  Evaluating Tuned LogisticRegression (GridSearch)...
  Accuracy:  0.9815
  Precision: 0.9703
  Recall:    0.9599 <-- wichtigste Metrik!
  F1-Score:  0.9651
  ROC-AUC:   0.9978
  Weighted:  0.9634
  Plots logged.
--- Finished MLflow run for: Tuned LogisticRegression (GridSearch) ---
üèÉ View run Tuned LogisticRegression (GridSearch) at: http://mlflow_server:5000/#/experiments/1/runs/f224d74a4f874abab2aa6ce44f855665
üß™ View experiment at: http://mlflow_server:5000/#/experiments/1


In [24]:
# tuned random forest
result = train_evaluate_log_model(
    grid_rfc, "Tuned RandomForest (GridSearch)",
    X_train_enc, y_train_flat,
    X_test_enc, y_test_flat,
    X_train_enc.columns.tolist(),
    registered_model_names["RandomForest"]
)
all_results.append(result)


--- Starting MLflow run for: Tuned RandomForest (GridSearch) ---
  Training Tuned RandomForest (GridSearch)...
Fitting 5 folds for each of 27 candidates, totalling 135 fits
  Tuning time: 6.75s
  Best params: {'max_depth': 15, 'min_samples_leaf': 1, 'n_estimators': 50}
  Feature importances logged.


Registered model 'ChurnClassifier_RandomForest' already exists. Creating a new version of this model...
2026/01/29 13:37:18 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: ChurnClassifier_RandomForest, version 2
Created version '2' of model 'ChurnClassifier_RandomForest'.


  Model registered as 'ChurnClassifier_RandomForest'
  Evaluating Tuned RandomForest (GridSearch)...
  Accuracy:  0.9780
  Precision: 0.9858
  Recall:    0.9305 <-- wichtigste Metrik!
  F1-Score:  0.9574
  ROC-AUC:   0.9974
  Weighted:  0.9489
  Plots logged.
--- Finished MLflow run for: Tuned RandomForest (GridSearch) ---
üèÉ View run Tuned RandomForest (GridSearch) at: http://mlflow_server:5000/#/experiments/1/runs/05b18a80a29844b593a31b1ab02749de
üß™ View experiment at: http://mlflow_server:5000/#/experiments/1


In [25]:
# tuned gradient boosting
result = train_evaluate_log_model(
    grid_gbc, "Tuned GradientBoosting (GridSearch)",
    X_train_enc, y_train_flat,
    X_test_enc, y_test_flat,
    X_train_enc.columns.tolist(),
    registered_model_names["GradientBoosting"]
)
all_results.append(result)


--- Starting MLflow run for: Tuned GradientBoosting (GridSearch) ---
  Training Tuned GradientBoosting (GridSearch)...
Fitting 5 folds for each of 18 candidates, totalling 90 fits
  Tuning time: 18.13s
  Best params: {'learning_rate': 0.2, 'max_depth': 5, 'n_estimators': 50}
  Feature importances logged.


Registered model 'ChurnClassifier_GradientBoosting' already exists. Creating a new version of this model...
2026/01/29 13:37:40 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: ChurnClassifier_GradientBoosting, version 2
Created version '2' of model 'ChurnClassifier_GradientBoosting'.


  Model registered as 'ChurnClassifier_GradientBoosting'
  Evaluating Tuned GradientBoosting (GridSearch)...
  Accuracy:  0.9823
  Precision: 0.9781
  Recall:    0.9545 <-- wichtigste Metrik!
  F1-Score:  0.9662
  ROC-AUC:   0.9987
  Weighted:  0.9624
  Plots logged.
--- Finished MLflow run for: Tuned GradientBoosting (GridSearch) ---
üèÉ View run Tuned GradientBoosting (GridSearch) at: http://mlflow_server:5000/#/experiments/1/runs/0bdd45332cdf4420ae45aa105ea3420f
üß™ View experiment at: http://mlflow_server:5000/#/experiments/1


In [26]:
print("\n" + "="*50)
print("All MLflow runs complete!")
print("="*50)


All MLflow runs complete!


## Ergebnisse vergleichen

In [27]:
# ergebnisse als dataframe
results_df = pd.DataFrame(all_results)
results_df = results_df.sort_values('weighted_score', ascending=False)
print("\nModell-Ranking (nach Weighted Score = 2x Recall + 1x Precision):")
results_df


Modell-Ranking (nach Weighted Score = 2x Recall + 1x Precision):


Unnamed: 0,model_name,recall,weighted_score
5,Tuned LogisticRegression (GridSearch),0.959893,0.963352
7,Tuned GradientBoosting (GridSearch),0.954545,0.962391
4,Base SVC,0.951872,0.958807
2,Base GradientBoosting,0.949198,0.958786
0,Base LogisticRegression,0.951872,0.956171
6,Tuned RandomForest (GridSearch),0.930481,0.948933
1,Base RandomForest,0.925134,0.94628
3,Base KNN,0.839572,0.83443


## Champion Modell bestimmen

In [28]:
client = MlflowClient()

# wir nutzen weighted_score als champion metrik (2x recall + precision)
CHAMPION_METRIC = "test_weighted_score"
GLOBAL_CHAMPION_ALIAS = "champion"

print(f"--- MLflow Champion Promotion ---")
print(f"Champion wird basierend auf '{CHAMPION_METRIC}' ausgew√§hlt")
print(f"(Weighted Score = 2x Recall + 1x Precision, weil Recall wichtiger ist!)")

try:
    registered_models = client.search_registered_models()
    if not registered_models:
        print("Keine registrierten Modelle gefunden.")
    else:
        print(f"\n--- Collecting All Model Versions ---")
        all_versions = []
        
        for rm in registered_models:
            if not rm.name.startswith("ChurnClassifier"):
                continue
                
            print(f"  Processing: {rm.name}")
            model_versions = client.search_model_versions(f"name='{rm.name}'")
            
            for mv in model_versions:
                try:
                    run = client.get_run(mv.run_id)
                    metric_value = run.data.metrics.get(CHAMPION_METRIC)
                    
                    if metric_value is not None:
                        all_versions.append({
                            "registered_model_name": rm.name,
                            "version": mv.version,
                            "run_id": mv.run_id,
                            "metric_value": metric_value,
                            "aliases": mv.aliases
                        })
                        print(f"    Version {mv.version}: {CHAMPION_METRIC}={metric_value:.4f}")
                except Exception as e:
                    print(f"    Error: {e}")
        
        if all_versions:
            # bestes modell finden
            best = max(all_versions, key=lambda x: x['metric_value'])
            print(f"\n--- Champion gefunden ---")
            print(f"Model: {best['registered_model_name']}")
            print(f"Version: {best['version']}")
            print(f"{CHAMPION_METRIC}: {best['metric_value']:.4f}")
            
            # champion alias setzen
            # erst alte aliases entfernen
            for v in all_versions:
                if GLOBAL_CHAMPION_ALIAS in v['aliases']:
                    client.delete_registered_model_alias(
                        name=v['registered_model_name'],
                        alias=GLOBAL_CHAMPION_ALIAS
                    )
                    print(f"  Removed '{GLOBAL_CHAMPION_ALIAS}' from {v['registered_model_name']} v{v['version']}")
            
            # neuen champion setzen
            client.set_registered_model_alias(
                name=best['registered_model_name'],
                alias=GLOBAL_CHAMPION_ALIAS,
                version=best['version']
            )
            print(f"\n‚úì Champion alias gesetzt auf {best['registered_model_name']} Version {best['version']}")

except Exception as e:
    print(f"Error: {e}")

print("\n--- Champion Promotion abgeschlossen ---")

--- MLflow Champion Promotion ---
Champion wird basierend auf 'test_weighted_score' ausgew√§hlt
(Weighted Score = 2x Recall + 1x Precision, weil Recall wichtiger ist!)

--- Collecting All Model Versions ---
  Processing: ChurnClassifier_GradientBoosting
    Version 2: test_weighted_score=0.9624
    Version 1: test_weighted_score=0.9588
  Processing: ChurnClassifier_KNN
    Version 1: test_weighted_score=0.8344
  Processing: ChurnClassifier_LogisticRegression
    Version 2: test_weighted_score=0.9634
    Version 1: test_weighted_score=0.9562
  Processing: ChurnClassifier_RandomForest
    Version 2: test_weighted_score=0.9489
    Version 1: test_weighted_score=0.9463
  Processing: ChurnClassifier_SVC
    Version 1: test_weighted_score=0.9588

--- Champion gefunden ---
Model: ChurnClassifier_LogisticRegression
Version: 2
test_weighted_score: 0.9634

‚úì Champion alias gesetzt auf ChurnClassifier_LogisticRegression Version 2

--- Champion Promotion abgeschlossen ---


In [29]:
print("\nFertig! Check MLflow UI f√ºr alle Ergebnisse.")


Fertig! Check MLflow UI f√ºr alle Ergebnisse.
