In [1]:
!pip install mlflow
!pip install pyngrok
!pip install streamlit

Collecting mlflow
  Downloading mlflow-2.21.3-py3-none-any.whl.metadata (30 kB)
Collecting mlflow-skinny==2.21.3 (from mlflow)
  Downloading mlflow_skinny-2.21.3-py3-none-any.whl.metadata (31 kB)
Collecting alembic!=1.10.0,<2 (from mlflow)
  Downloading alembic-1.15.2-py3-none-any.whl.metadata (7.3 kB)
Collecting docker<8,>=4.0.0 (from mlflow)
  Downloading docker-7.1.0-py3-none-any.whl.metadata (3.8 kB)
Collecting graphene<4 (from mlflow)
  Downloading graphene-3.4.3-py2.py3-none-any.whl.metadata (6.9 kB)
Collecting gunicorn<24 (from mlflow)
  Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting databricks-sdk<1,>=0.20.0 (from mlflow-skinny==2.21.3->mlflow)
  Downloading databricks_sdk-0.50.0-py3-none-any.whl.metadata (38 kB)
Collecting fastapi<1 (from mlflow-skinny==2.21.3->mlflow)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn<1 (from mlflow-skinny==2.21.3->mlflow)
  Downloading uvicorn-0.34.2-py3-none-any.whl.metadata (6.5 k

In [9]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score
import mlflow
import mlflow.sklearn

data=pd.read_csv(r'/content/train.csv')
data=data.drop('Loan_ID',axis=1)
data['Dependents']=data['Dependents'].replace('3+','3')
data['Credit_History']=data['Credit_History'].astype('str')
data['Loan_Amount_Term']=data['Loan_Amount_Term'].astype('str')

# filling the null values
from sklearn.impute import SimpleImputer
for i in data.columns:
  if data[i].dtypes=='object':
    imputer=SimpleImputer(strategy='most_frequent')
    data[i]=imputer.fit_transform(data[[i]]).ravel()
  else:
    imputer=SimpleImputer(strategy='mean')
    data[i]=imputer.fit_transform(data[[i]]).ravel()

    #.ravel() converts the output back to a 1D array to fit into the column properly

# Encoding and normalization
cat=data.select_dtypes(include='object').columns
num=data.select_dtypes(exclude='object').columns
for i in data:
  if i in cat:
    unique=data[i].unique()
    k=0
    d={}
    for j in unique:
      d[j]=k
      k+=1
    data[i]=data[i].map(d)
  elif i in num:
    mean=data[i].mean()
    sd=data[i].std()
    new=[]
    for j in data[i]:
      z=(j-mean)/sd
      new.append(z)
    data[i]=new


X=data.drop('Loan_Status',axis=1)
y=data['Loan_Status']
X_train,X_test,y_train,y_test=train_test_split(X,y,
                                               test_size=0.2,
                                               random_state=42)

# Start MLflow
k=mlflow.set_experiment('/mlops/Loan_status')

def train_tree(criterion, max_depth, min_samples_split):
  crt={0:'gini',1:'entropy'}
  # initiate mlflow
  with mlflow.start_run(experiment_id=k.experiment_id,run_name='Classification',description='Decision Tree Classifier'):
    # Calling the model
    dt=DecisionTreeClassifier(criterion=crt[criterion],max_depth=max_depth,min_samples_split=min_samples_split)
    dt.fit(X_train,y_train)

    # prediction
    y_pred=dt.predict(X_test)

    accuracy=accuracy_score(y_test,y_pred)
    precision=precision_score(y_test,y_pred)
    recall=recall_score(y_test,y_pred)
    f1=f1_score(y_test,y_pred)

    print('accuracy',accuracy)
    print('precision',precision)
    print('recall',recall)
    print('f1',f1)

    #log metrices ,parameters
    mlflow.log_param('criterion',crt[criterion])
    mlflow.log_param('max_depth',max_depth)
    mlflow.log_param('min_samples_split',min_samples_split)

    mlflow.log_metric('accuracy',accuracy)
    mlflow.log_metric('precision',precision)
    mlflow.log_metric('recall',recall)
    mlflow.log_metric('f1',f1)

    mlflow.sklearn.log_model(dt,'model',registered_model_name='DecisionTreeClassifier')



In [10]:
#Version 1
criterion=eval(input('enter 0 for gini,1 for entropy:'))
max_depth=eval(input('enter the maximum depth of the tree:'))
min_samples_split=eval(input('enter the minimum number of samples required to split an internal node:'))
train_tree(criterion,max_depth,min_samples_split)

enter 0 for gini,1 for entropy:1
enter the maximum depth of the tree:5
enter the minimum number of samples required to split an internal node:3
accuracy 0.7723577235772358
precision 0.8571428571428571
recall 0.4186046511627907
f1 0.5625


Successfully registered model 'DecisionTreeClassifier'.
Created version '1' of model 'DecisionTreeClassifier'.


In [11]:
#Version 2
criterion=eval(input('enter 0 for gini,1 for entropy:'))
max_depth=eval(input('enter the maximum depth of the tree:'))
min_samples_split=eval(input('enter the minimum number of samples required to split an internal node:'))
train_tree(criterion,max_depth,min_samples_split)

enter 0 for gini,1 for entropy:1
enter the maximum depth of the tree:3
enter the minimum number of samples required to split an internal node:3
accuracy 0.7642276422764228
precision 0.8181818181818182
recall 0.4186046511627907
f1 0.5538461538461539


Registered model 'DecisionTreeClassifier' already exists. Creating a new version of this model...
Created version '2' of model 'DecisionTreeClassifier'.


##**Creating a Tunnel**


In [12]:
from pyngrok import ngrok
ngrok.kill()
auth_token='2qVdKwzryIkN214CK81f6q1byfO_7bN1ouSyzNqxQf2tKM2Fi'
ngrok.set_auth_token(auth_token)

ngrok_tunnel=ngrok.connect(addr='5000',proto='http')
print('Tracking Uri:',ngrok_tunnel.public_url)

Tracking Uri: https://5b5d-35-243-135-152.ngrok-free.app


In [13]:
!mlflow ui

[2025-04-23 04:17:43 +0000] [2888] [INFO] Starting gunicorn 23.0.0
[2025-04-23 04:17:43 +0000] [2888] [INFO] Listening at: http://127.0.0.1:5000 (2888)
[2025-04-23 04:17:43 +0000] [2888] [INFO] Using worker: sync
[2025-04-23 04:17:43 +0000] [2889] [INFO] Booting worker with pid: 2889
[2025-04-23 04:17:43 +0000] [2890] [INFO] Booting worker with pid: 2890
[2025-04-23 04:17:43 +0000] [2891] [INFO] Booting worker with pid: 2891
[2025-04-23 04:17:43 +0000] [2892] [INFO] Booting worker with pid: 2892
[2025-04-23 04:27:14 +0000] [2888] [INFO] Handling signal: int

Aborted!
[2025-04-23 04:27:14 +0000] [2890] [INFO] Worker exiting (pid: 2890)
[2025-04-23 04:27:14 +0000] [2891] [INFO] Worker exiting (pid: 2891)
[2025-04-23 04:27:14 +0000] [2889] [INFO] Worker exiting (pid: 2889)
[2025-04-23 04:27:14 +0000] [2892] [INFO] Worker exiting (pid: 2892)
[2025-04-23 04:27:16 +0000] [2888] [INFO] Shutting down: Master
