In [1]:
from src.setup.mlflow_setup import init_mlflow

MLflow initialization module imported.


In [5]:
import os
import mlflow
import mlflow.sklearn
import numpy as np
from sklearn.model_selection import train_test_split
from mlflow.models.signature import infer_signature

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LogisticRegression
import pandas as pd

In [6]:
init_mlflow()

In [8]:
df = pd.read_csv('../data/raw/train.csv')

In [9]:
df.head()

Unnamed: 0,state,account_length,area_code,international_plan,voice_mail_plan,number_vmail_messages,total_day_minutes,total_day_calls,total_day_charge,total_eve_minutes,total_eve_calls,total_eve_charge,total_night_minutes,total_night_calls,total_night_charge,total_intl_minutes,total_intl_calls,total_intl_charge,number_customer_service_calls,churn
0,OH,107,area_code_415,no,yes,26,161.6,123,27.47,195.5,103,16.62,254.4,103,11.45,13.7,3,3.7,1,no
1,NJ,137,area_code_415,no,no,0,243.4,114,41.38,121.2,110,10.3,162.6,104,7.32,12.2,5,3.29,0,no
2,OH,84,area_code_408,yes,no,0,299.4,71,50.9,61.9,88,5.26,196.9,89,8.86,6.6,7,1.78,2,no
3,OK,75,area_code_415,yes,no,0,166.7,113,28.34,148.3,122,12.61,186.9,121,8.41,10.1,3,2.73,3,no
4,MA,121,area_code_510,no,yes,24,218.2,88,37.09,348.5,108,29.62,212.6,118,9.57,7.5,7,2.03,3,no


In [10]:
numerical_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(drop='first', handle_unknown='ignore')

In [11]:
X = df.drop(['churn'], axis=1)

In [12]:
y = df['churn']

In [13]:
y = y.map({'no': 0, 'yes': 1})

In [14]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=42)

In [20]:
from sklearn.metrics import classification_report, accuracy_score, f1_score

In [16]:
numeric = df.select_dtypes(include='number').columns

In [17]:
categorical = df.select_dtypes(exclude='number').columns

In [19]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numeric),
        ('cat', categorical_transformer, categorical.drop('churn'))
    ])

pipeline_weighted = Pipeline(steps=[('preprocessor', preprocessor),
                           ('classifier', LogisticRegression(class_weight='balanced', solver='liblinear'))])

pipeline_weighted.fit(X_train, y_train)


y_pred = pipeline_weighted.predict(X_test)

cm = classification_report(y_test, y_pred)
print(cm)

              precision    recall  f1-score   support

           0       0.95      0.78      0.85      1191
           1       0.38      0.76      0.51       212

    accuracy                           0.77      1403
   macro avg       0.66      0.77      0.68      1403
weighted avg       0.86      0.77      0.80      1403



In [23]:
mlflow.set_experiment("churn_classification")      # –∏–º—è —ç–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç–∞

with mlflow.start_run():

    # --- 1. –õ–æ–≥–∏—Ä—É–µ–º –ø–∞—Ä–∞–º–µ—Ç—Ä—ã –º–æ–¥–µ–ª–∏ ---
    mlflow.log_param("model", "LogisticRegression")
    mlflow.log_param("solver", "liblinear")
    mlflow.log_param("class_weight", "balanced")

    # --- 2. –î–µ–ª–∞–µ–º –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–µ ---
    y_pred = pipeline_weighted.predict(X_test)

    # --- 3. –ú–µ—Ç—Ä–∏–∫–∏ ---
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    mlflow.log_metric("accuracy", acc)
    mlflow.log_metric("f1_score", f1)

    # –º–æ–∂–Ω–æ –ª–æ–≥–∏—Ä–æ–≤–∞—Ç—å —Ç–µ–∫—Å—Ç–æ–≤—ã–π classification_report –∫–∞–∫ –∞—Ä—Ç–µ—Ñ–∞–∫—Ç
    report = classification_report(y_test, y_pred)
    with open("cls_report.txt", "w") as f:
        f.write(report)
    mlflow.log_artifact("cls_report.txt")

    # --- 4. –õ–æ–≥–∏—Ä—É–µ–º —Å–∞–º PIPELINE ---
    mlflow.sklearn.log_model(
        sk_model=pipeline_weighted,
        artifact_path="model",
        registered_model_name="ChurnPipeline"  # –æ–ø—Ü–∏–æ–Ω–∞–ª—å–Ω–æ
    )

    print("Model logged to MLflow!")

Successfully registered model 'ChurnPipeline'.
2025/11/16 19:36:37 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: ChurnPipeline, version 1
Created version '1' of model 'ChurnPipeline'.


Model logged to MLflow!
üèÉ View run serious-colt-244 at: http://94.228.117.198:5000/#/experiments/1/runs/1a404b7bb9bf4600b77bb79e18241ee5
üß™ View experiment at: http://94.228.117.198:5000/#/experiments/1
