In [2]:
import mlflow
import mlflow.sklearn

mlflow.set_tracking_uri(uri="http://127.0.0.1:5000")

In [None]:
import xgboost as xgb
from catboost import CatBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    classification_report,
    confusion_matrix,
    accuracy_score,
    roc_auc_score,
    precision_score,
    recall_score,
    f1_score,
    matthews_corrcoef
)
from sklearn.preprocessing import LabelEncoder
import mlflow
import mlflow.sklearn
import pandas as pd
import numpy as np

# Load data
df = pd.read_csv(r'C:\Users\hadis\OneDrive\My One Drive (Hadiseh)\H Documents\Datascience-Seminar\churn\Dataset\Databel - Data.csv')

# Select features and target
column_selection = df[['Churn Label', 'Account Length (in months)', 'Local Calls', 'Local Mins', 'Intl Calls', 'Intl Mins',
                       'Intl Active', 'Intl Plan', 'Extra International Charges', 'Customer Service Calls', 'Avg Monthly GB Download',
                       'Unlimited Data Plan', 'Extra Data Charges', 'Gender', 'Age', 'Under 30', 'Senior', 'Group',
                       'Number of Customers in Group', 'Device Protection & Online Backup', 'Contract Type', 'Payment Method',
                       'Monthly Charge', 'Total Charges', 'Churn Category', 'Churn Reason']]

features = column_selection.drop('Churn Label', axis=1)
X = pd.get_dummies(features, drop_first=True)

# Encode the target variable (Churn Label) 
y = df['Churn Label']
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)

# Define the models to compare
models = {
    "Logistic Regression": LogisticRegression(),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "Extra Trees Classifier": ExtraTreesClassifier(),
    "AdaBoost": AdaBoostClassifier(estimator=DecisionTreeClassifier(), n_estimators=50, random_state=42),
    "XGBoost": xgb.XGBClassifier(n_estimators=100, learning_rate=0.1, max_depth=6, random_state=42),
    "CatBoost": CatBoostClassifier(iterations=100, learning_rate=0.1, depth=6, random_seed=42, verbose=False),
    "SVM": SVC(probability=True),
    "KNN": KNeighborsClassifier(),
    "Naive Bayes": GaussianNB()
}

# Loop through the models and log each one in MLflow
for model_name, model in models.items():
    with mlflow.start_run(run_name=model_name):
        # Train the model
        model.fit(X_train, y_train)

        # Make predictions
        predictions = model.predict(X_test)
        probabilities = model.predict_proba(X_test)[:, 1] if hasattr(model, 'predict_proba') else None

        # Evaluate the model
        accuracy = accuracy_score(y_test, predictions)
        precision = precision_score(y_test, predictions, zero_division=0)
        recall = recall_score(y_test, predictions, zero_division=0)
        f1 = f1_score(y_test, predictions)
        mcc = matthews_corrcoef(y_test, predictions)
        auc = roc_auc_score(y_test, probabilities) if probabilities is not None else None

        # Log parameters and metrics with MLflow
        mlflow.log_param("model_name", model_name)
        mlflow.log_metric("accuracy", accuracy)
        mlflow.log_metric("precision", precision)
        mlflow.log_metric("recall", recall)
        mlflow.log_metric("f1_score", f1)
        mlflow.log_metric("mcc", mcc)
        if auc is not None:
            mlflow.log_metric("AUC", auc)

        # Log confusion matrix as an artifact
        confusion_mat = confusion_matrix(y_test, predictions)
        np.save(f"confusion_matrix_{model_name}.npy", confusion_mat)
        mlflow.log_artifact(f"confusion_matrix_{model_name}.npy")

        # Log the model
        input_example = X_train.iloc[0].values.reshape(1, -1)  
        mlflow.sklearn.log_model(model, f"{model_name}_model", input_example=input_example)

        # Print evaluation results
        print(f"Results for {model_name}:")
        print(f"Accuracy: {accuracy:.2f}")
        print(f"Precision: {precision:.2f}")
        print(f"Recall: {recall:.2f}")
        print(f"F1 Score: {f1:.2f}")
        print(f"MCC: {mcc:.2f}")
        if auc is not None:
            print(f"AUC: {auc:.2f}")
        print("Classification Report:")
        print(classification_report(y_test, predictions))


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]



Results for Logistic Regression:
Accuracy: 0.87
Precision: 0.79
Recall: 0.68
F1 Score: 0.73
MCC: 0.65
AUC: 0.91
Classification Report:
              precision    recall  f1-score   support

           0       0.89      0.93      0.91      2447
           1       0.79      0.68      0.73       897

    accuracy                           0.87      3344
   macro avg       0.84      0.81      0.82      3344
weighted avg       0.86      0.87      0.86      3344

🏃 View run Logistic Regression at: http://127.0.0.1:5000/#/experiments/0/runs/01a1f08bf6354c7fb9bd789b0ed5cef8
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/0


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]



Results for Decision Tree:
Accuracy: 0.97
Precision: 0.94
Recall: 0.95
F1 Score: 0.94
MCC: 0.92
AUC: 0.96
Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.98      0.98      2447
           1       0.94      0.95      0.94       897

    accuracy                           0.97      3344
   macro avg       0.96      0.96      0.96      3344
weighted avg       0.97      0.97      0.97      3344

🏃 View run Decision Tree at: http://127.0.0.1:5000/#/experiments/0/runs/cddb43f05df1430eaa3f928c7dc3578b
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/0




Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]



Results for Random Forest:
Accuracy: 0.99
Precision: 0.99
Recall: 0.96
F1 Score: 0.97
MCC: 0.96
AUC: 0.99
Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.99      0.99      2447
           1       0.99      0.96      0.97       897

    accuracy                           0.99      3344
   macro avg       0.99      0.98      0.98      3344
weighted avg       0.99      0.99      0.99      3344

🏃 View run Random Forest at: http://127.0.0.1:5000/#/experiments/0/runs/cbd0a2a6f07f4fd89a4c7b6e47206b09
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/0




Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]



Results for Extra Trees Classifier:
Accuracy: 0.99
Precision: 1.00
Recall: 0.95
F1 Score: 0.97
MCC: 0.97
AUC: 0.99
Classification Report:
              precision    recall  f1-score   support

           0       0.98      1.00      0.99      2447
           1       1.00      0.95      0.97       897

    accuracy                           0.99      3344
   macro avg       0.99      0.98      0.98      3344
weighted avg       0.99      0.99      0.99      3344

🏃 View run Extra Trees Classifier at: http://127.0.0.1:5000/#/experiments/0/runs/6d48afbfac584cc8a55c99eb505a7caa
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/0




Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]



Results for AdaBoost:
Accuracy: 0.97
Precision: 0.93
Recall: 0.95
F1 Score: 0.94
MCC: 0.92
AUC: 0.96
Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.98      0.98      2447
           1       0.93      0.95      0.94       897

    accuracy                           0.97      3344
   macro avg       0.96      0.96      0.96      3344
weighted avg       0.97      0.97      0.97      3344

🏃 View run AdaBoost at: http://127.0.0.1:5000/#/experiments/0/runs/01e40534c9bb42a7b754152f39b5ddcb
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/0


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Results for XGBoost:
Accuracy: 0.99
Precision: 0.99
Recall: 0.96
F1 Score: 0.97
MCC: 0.96
AUC: 0.99
Classification Report:
              precision    recall  f1-score   support

           0       0.98      1.00      0.99      2447
           1       0.99      0.96      0.97       897

    accuracy                           0.99      3344
   macro avg       0.99      0.98      0.98      3344
weighted avg       0.99      0.99      0.99      3344

🏃 View run XGBoost at: http://127.0.0.1:5000/#/experiments/0/runs/5344c6ddb26242e08859626716a93b1a
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/0


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Results for CatBoost:
Accuracy: 0.99
Precision: 1.00
Recall: 0.95
F1 Score: 0.97
MCC: 0.97
AUC: 0.99
Classification Report:
              precision    recall  f1-score   support

           0       0.98      1.00      0.99      2447
           1       1.00      0.95      0.97       897

    accuracy                           0.99      3344
   macro avg       0.99      0.98      0.98      3344
weighted avg       0.99      0.99      0.99      3344

🏃 View run CatBoost at: http://127.0.0.1:5000/#/experiments/0/runs/92b318412efa489cbbbb45634cee4abd
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/0




Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Results for SVM:
Accuracy: 0.73
Precision: 0.00
Recall: 0.00
F1 Score: 0.00
MCC: 0.00
AUC: 0.81
Classification Report:
              precision    recall  f1-score   support

           0       0.73      1.00      0.85      2447
           1       0.00      0.00      0.00       897

    accuracy                           0.73      3344
   macro avg       0.37      0.50      0.42      3344
weighted avg       0.54      0.73      0.62      3344

🏃 View run SVM at: http://127.0.0.1:5000/#/experiments/0/runs/291169db04ba453194868e25b6cf340e
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/0




Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]



Results for KNN:
Accuracy: 0.75
Precision: 0.56
Recall: 0.41
F1 Score: 0.47
MCC: 0.32
AUC: 0.72
Classification Report:
              precision    recall  f1-score   support

           0       0.80      0.88      0.84      2447
           1       0.56      0.41      0.47       897

    accuracy                           0.75      3344
   macro avg       0.68      0.64      0.66      3344
weighted avg       0.74      0.75      0.74      3344

🏃 View run KNN at: http://127.0.0.1:5000/#/experiments/0/runs/91a2e4679f044f02a519ef6e2403409e
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/0




Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Results for Naive Bayes:
Accuracy: 0.98
Precision: 1.00
Recall: 0.94
F1 Score: 0.96
MCC: 0.95
AUC: 0.99
Classification Report:
              precision    recall  f1-score   support

           0       0.98      1.00      0.99      2447
           1       1.00      0.94      0.96       897

    accuracy                           0.98      3344
   macro avg       0.99      0.97      0.98      3344
weighted avg       0.98      0.98      0.98      3344

🏃 View run Naive Bayes at: http://127.0.0.1:5000/#/experiments/0/runs/4364e9fa12504156b8576d714f9d86ee
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/0


