In [None]:
#!pip install pandas 
#!pip install pandas sklearn
#!pip install -r requirements.txt  
#!pip install scikit-learn
#!pip install mlflow
#!pip install dataloader
#!pip install data-loader


In [None]:
#!pip install scikit-learn pandas mlflow xgboost


Collecting xgboost
  Using cached xgboost-3.0.1-py3-none-win_amd64.whl.metadata (2.1 kB)
Downloading xgboost-3.0.1-py3-none-win_amd64.whl (150.0 MB)
   ---------------------------------------- 0.0/150.0 MB ? eta -:--:--
   ---------------------------------------- 0.0/150.0 MB ? eta -:--:--
   ---------------------------------------- 0.0/150.0 MB ? eta -:--:--
   ---------------------------------------- 0.3/150.0 MB ? eta -:--:--
   ---------------------------------------- 0.3/150.0 MB ? eta -:--:--
   ---------------------------------------- 0.3/150.0 MB ? eta -:--:--
   ---------------------------------------- 0.3/150.0 MB ? eta -:--:--
   ---------------------------------------- 0.3/150.0 MB ? eta -:--:--
   ---------------------------------------- 0.3/150.0 MB ? eta -:--:--
   ---------------------------------------- 0.3/150.0 MB ? eta -:--:--
   ---------------------------------------- 0.5/150.0 MB 173.0 kB/s eta 0:14:24
   ---------------------------------------- 0.5/150.0 MB 173.

In [18]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score 
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import confusion_matrix
import mlflow
import mlflow.sklearn
from mlflow.tracking import MlflowClient    
from mlflow.exceptions import MlflowException
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier


In [2]:
mlflow.set_tracking_uri("http://localhost:5000")


In [None]:
#run this in another terminal to see the UI
#python -m mlflow ui

In [3]:
exp_id=mlflow.create_experiment(name="bank churn")
exp_id

'626756895273908832'

In [10]:
data = pd.read_csv("Churn_Modelling.csv")


In [11]:
data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [12]:
data = data.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1)
data['Gender'] = LabelEncoder().fit_transform(data['Gender'])
data = pd.get_dummies(data, columns=['Geography'], drop_first=True)

In [13]:
# Features and target
X = data.drop('Exited', axis=1)
y = data['Exited']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [14]:
# Model training
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [15]:
# Predictions
y_pred = model.predict(X_test)

In [16]:
accuracy = accuracy_score(y_test, y_pred)


In [17]:
with mlflow.start_run():
    mlflow.log_param("model", "LogisticRegression")
    mlflow.log_param("max_iter", 1000)
    mlflow.log_metric("accuracy", accuracy)
    mlflow.sklearn.log_model(model, "model")



🏃 View run dapper-mouse-545 at: http://localhost:5000/#/experiments/0/runs/00d47f898a784d809ee731dd3970ca15
🧪 View experiment at: http://localhost:5000/#/experiments/0


In [19]:

rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_preds = rf_model.predict(X_test)
rf_accuracy = accuracy_score(y_test, rf_preds)

with mlflow.start_run():
    mlflow.log_param("model", "RandomForest")
    mlflow.log_param("n_estimators", 100)
    mlflow.log_metric("accuracy", rf_accuracy)
    mlflow.sklearn.log_model(rf_model, "model")




🏃 View run caring-rook-329 at: http://localhost:5000/#/experiments/0/runs/6784a8ae2f384ba7b365912576351355
🧪 View experiment at: http://localhost:5000/#/experiments/0


In [20]:
xgb_model = XGBClassifier(n_estimators=100, use_label_encoder=False, eval_metric='logloss')
xgb_model.fit(X_train, y_train)
xgb_preds = xgb_model.predict(X_test)
xgb_accuracy = accuracy_score(y_test, xgb_preds)

with mlflow.start_run():
    mlflow.log_param("model", "XGBoost")
    mlflow.log_param("n_estimators", 100)
    mlflow.log_metric("accuracy", xgb_accuracy)
    mlflow.sklearn.log_model(xgb_model, "model")


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


🏃 View run useful-ox-412 at: http://localhost:5000/#/experiments/0/runs/bb5703164caa4a05ad994c629c92f945
🧪 View experiment at: http://localhost:5000/#/experiments/0


In [21]:
with mlflow.start_run():
    mlflow.log_param("model", "RandomForest")
    mlflow.log_param("n_estimators", 100)
    mlflow.log_metric("accuracy", rf_accuracy)
    
    # Register the model
    mlflow.sklearn.log_model(
        rf_model,
        artifact_path="model",
        registered_model_name="RandomForestClassifier"
    )


Successfully registered model 'RandomForestClassifier'.
2025/05/18 21:47:10 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: RandomForestClassifier, version 1
Created version '1' of model 'RandomForestClassifier'.


🏃 View run brawny-ray-161 at: http://localhost:5000/#/experiments/0/runs/62b3094a245146bfb1e0ef2892018c13
🧪 View experiment at: http://localhost:5000/#/experiments/0


In [22]:
with mlflow.start_run():
    mlflow.log_param("model", "XGBoost")
    mlflow.log_param("n_estimators", 100)
    mlflow.log_metric("accuracy", xgb_accuracy)
    
    mlflow.sklearn.log_model(
        xgb_model,
        artifact_path="model",
        registered_model_name="XGBoostClassifier"
    )


Successfully registered model 'XGBoostClassifier'.
2025/05/18 21:47:35 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: XGBoostClassifier, version 1


🏃 View run indecisive-steed-920 at: http://localhost:5000/#/experiments/0/runs/dffe123dcf1d4a3781e208863d36ba7e
🧪 View experiment at: http://localhost:5000/#/experiments/0


Created version '1' of model 'XGBoostClassifier'.
