#**Installing the packages**

In [48]:
!pip install mlflow
!pip install pyngrok
!pip install streamlit



#**Data Preprocessing & Training**

In [62]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score
import mlflow
import mlflow.sklearn

data=pd.read_csv(r'/content/train.csv')
data=data.drop('Loan_ID',axis=1)
data['Dependents']=data['Dependents'].replace('3+','3')
data['Credit_History']=data['Credit_History'].astype('str')
data['Loan_Amount_Term']=data['Loan_Amount_Term'].astype('str')

# filling the null values
from sklearn.impute import SimpleImputer
for i in data.columns:
  if data[i].dtypes=='object':
    imputer=SimpleImputer(strategy='most_frequent')
    data[i]=imputer.fit_transform(data[[i]]).ravel()
  else:
    imputer=SimpleImputer(strategy='mean')
    data[i]=imputer.fit_transform(data[[i]]).ravel()

    #.ravel() converts the output back to a 1D array to fit into the column properly

# Encoding and normalization
cat=data.select_dtypes(include='object').columns
num=data.select_dtypes(exclude='object').columns
for i in data:
  if i in cat:
    unique=data[i].unique()
    k=0
    d={}
    for j in unique:
      d[j]=k
      k+=1
    data[i]=data[i].map(d)
  elif i in num:
    mean=data[i].mean()
    sd=data[i].std()
    new=[]
    for j in data[i]:
      z=(j-mean)/sd
      new.append(z)
    data[i]=new


X=data.drop('Loan_Status',axis=1)
y=data['Loan_Status']
X_train,X_test,y_train,y_test=train_test_split(X,y,
                                               test_size=0.2,
                                               random_state=42)

# Start MLflow
k=mlflow.set_experiment('/mlops/Loan_status_2')

def train_tree(criterion, max_depth, min_samples_split):
  crt={0:'gini',1:'entropy'}
  # initiate mlflow
  with mlflow.start_run(experiment_id=k.experiment_id,run_name='Classification',description='Decision Tree Classifier'):
    # Calling the model
    dt=DecisionTreeClassifier(criterion=crt[criterion],max_depth=max_depth,min_samples_split=min_samples_split)
    dt.fit(X_train,y_train)

    # prediction
    y_pred=dt.predict(X_test)

    accuracy=accuracy_score(y_test,y_pred)
    precision=precision_score(y_test,y_pred)
    recall=recall_score(y_test,y_pred)
    f1=f1_score(y_test,y_pred)

    print('accuracy',accuracy)
    print('precision',precision)
    print('recall',recall)
    print('f1',f1)

    #log metrices ,parameters/
    mlflow.log_param('criterion',crt[criterion])
    mlflow.log_param('max_depth',max_depth)
    mlflow.log_param('min_samples_split',min_samples_split)

    mlflow.log_metric('accuracy',accuracy)
    mlflow.log_metric('precision',precision)
    mlflow.log_metric('recall',recall)
    mlflow.log_metric('f1',f1)

    mlflow.sklearn.log_model(dt,'model',registered_model_name='DecisionTreeClassifier')



In [63]:
#Version 1 -Parameter Testing
criterion=eval(input('enter 0 for gini,1 for entropy:'))
max_depth=eval(input('enter the maximum depth of the tree:'))
min_samples_split=eval(input('enter the minimum number of samples required to split an internal node:'))
train_tree(criterion,max_depth,min_samples_split)

enter 0 for gini,1 for entropy:0
enter the maximum depth of the tree:1
enter the minimum number of samples required to split an internal node:2
accuracy 0.7723577235772358
precision 0.7586206896551724
recall 0.5116279069767442
f1 0.6111111111111112


Registered model 'DecisionTreeClassifier' already exists. Creating a new version of this model...
Created version '21' of model 'DecisionTreeClassifier'.


In [64]:
#Version 2 -Parameter Testing
criterion=eval(input('enter 0 for gini,1 for entropy:'))
max_depth=eval(input('enter the maximum depth of the tree:'))
min_samples_split=eval(input('enter the minimum number of samples required to split an internal node:'))
train_tree(criterion,max_depth,min_samples_split)

enter 0 for gini,1 for entropy:1
enter the maximum depth of the tree:3
enter the minimum number of samples required to split an internal node:4
accuracy 0.7642276422764228
precision 0.8181818181818182
recall 0.4186046511627907
f1 0.5538461538461539


Registered model 'DecisionTreeClassifier' already exists. Creating a new version of this model...
Created version '22' of model 'DecisionTreeClassifier'.


In [65]:
#Version 3 -Parameter Testing
criterion=eval(input('enter 0 for gini,1 for entropy:'))
max_depth=eval(input('enter the maximum depth of the tree:'))
min_samples_split=eval(input('enter the minimum number of samples required to split an internal node:'))
train_tree(criterion,max_depth,min_samples_split)

enter 0 for gini,1 for entropy:1
enter the maximum depth of the tree:5
enter the minimum number of samples required to split an internal node:3
accuracy 0.7723577235772358
precision 0.8571428571428571
recall 0.4186046511627907
f1 0.5625


Registered model 'DecisionTreeClassifier' already exists. Creating a new version of this model...
Created version '23' of model 'DecisionTreeClassifier'.


In [66]:
#Version 4 -Parameter Testing
criterion=eval(input('enter 0 for gini,1 for entropy:'))
max_depth=eval(input('enter the maximum depth of the tree:'))
min_samples_split=eval(input('enter the minimum number of samples required to split an internal node:'))
train_tree(criterion,max_depth,min_samples_split)

enter 0 for gini,1 for entropy:0
enter the maximum depth of the tree:4
enter the minimum number of samples required to split an internal node:3
accuracy 0.7804878048780488
precision 0.8636363636363636
recall 0.4418604651162791
f1 0.5846153846153846


Registered model 'DecisionTreeClassifier' already exists. Creating a new version of this model...
Created version '24' of model 'DecisionTreeClassifier'.


##**Creating a Tunnel**


In [67]:
from pyngrok import ngrok
ngrok.kill()
auth_token='2qVdKwzryIkN214CK81f6q1byfO_7bN1ouSyzNqxQf2tKM2Fi'
ngrok.set_auth_token(auth_token)

ngrok_tunnel=ngrok.connect(addr='5000',proto='http')
print('Tracking Uri:',ngrok_tunnel.public_url)

Tracking Uri: https://22d9-34-169-140-66.ngrok-free.app


In [68]:
!mlflow ui

[2025-05-15 10:42:30 +0000] [31704] [INFO] Starting gunicorn 23.0.0
[2025-05-15 10:42:30 +0000] [31704] [INFO] Listening at: http://127.0.0.1:5000 (31704)
[2025-05-15 10:42:30 +0000] [31704] [INFO] Using worker: sync
[2025-05-15 10:42:30 +0000] [31705] [INFO] Booting worker with pid: 31705
[2025-05-15 10:42:30 +0000] [31706] [INFO] Booting worker with pid: 31706
[2025-05-15 10:42:30 +0000] [31707] [INFO] Booting worker with pid: 31707
[2025-05-15 10:42:30 +0000] [31708] [INFO] Booting worker with pid: 31708

Aborted!
[2025-05-15 10:43:39 +0000] [31704] [INFO] Handling signal: int
[2025-05-15 10:43:39 +0000] [31706] [INFO] Worker exiting (pid: 31706)
[2025-05-15 10:43:39 +0000] [31707] [INFO] Worker exiting (pid: 31707)
[2025-05-15 10:43:39 +0000] [31705] [INFO] Worker exiting (pid: 31705)
[2025-05-15 10:43:39 +0000] [31708] [INFO] Worker exiting (pid: 31708)
[2025-05-15 10:43:41 +0000] [31704] [INFO] Shutting down: Master


#############################################

#**Prediction on Test Dataset**

In [69]:
%%writefile app.py
import streamlit as st
import mlflow
import pandas as pd

# Set Page Configuration
st.set_page_config(page_title="Loan_Status", page_icon="🏦", layout="wide")

st.title('Loan_prediction using MLflow')
# Initialize session state for navigation
if "current_page" not in st.session_state:
    st.session_state.current_page = "Loan_Status" #'Emotion Analysis of a Sentence'


def navigate_to(page):
    st.session_state.current_page = page

# Sidebar Navigation
st.sidebar.title("🔹 Select The Input Type")
st.sidebar.button('🙍🏻Individual', on_click=navigate_to, args=('Individual',))
st.sidebar.button("📁 File (Multiple Inputs)", on_click=navigate_to, args=("File",))



if st.session_state.current_page =='Individual':
  st.subheader('Checking Loan Status For Individual Input')
  id=st.text_input('Enter the Loan ID')
  gender_map={'Male':0,'Female':1}
  gender_option=st.selectbox('Enter your Gender',options={'Male':0,'Female':1})
  gender=gender_map[gender_option]

  married_map={'Yes':0,'No':1}
  married_option=st.selectbox('Enter your Marital Status',options=['Yes','No'])
  married=married_map[married_option]

  dependents_map={'0':0,'1':1,'2':2,'3':3,'4':4,'5':5}
  dependents_option=st.selectbox('Enter the no.of Dependents',options=['0','1','2','3','4','5'])
  dependents=dependents_map[dependents_option]

  education_map={'Graduate':0,'Not Graduate':1}
  education_option=st.selectbox('Enter your Education',options=['Graduate','Not Graduate'])
  education=education_map[education_option]

  employed_map={'Employed':0,'Not Employed':1}
  employed_option=st.selectbox('Enter your Employment Status',options=['Employed','Not Employed'])
  employed=employed_map[employed_option]

  applicant_income=st.number_input('Enter your Income')
  coapplicant_income=st.number_input('Enter the Co-applicant Income')
  loan_amount=st.number_input('Enter the Loan Amount')
  loan_amount_term=st.number_input('Enter the Loan Amount Term in years')

  credit_history_map={'Yes':0,'No':1}
  credit_history_option=st.selectbox('Enter your Credit History',options=['Yes','No'])
  credit_history=credit_history_map[credit_history_option]

  property_area_map={'Urban':0,'Rural':1,'Semiurban':2}
  property_area_option=st.selectbox('Enter your Property Area',options=['Urban','Rural','Semiurban'])
  property_area=property_area_map[property_area_option]


  if st.button("Predict"):
        data = pd.DataFrame([[gender, married, dependents, education, employed,
                              applicant_income, coapplicant_income, loan_amount, loan_amount_term,credit_history,property_area]],
                            columns=['Gender', 'Married', 'Dependents', 'Education',
                                     'Self_Employed', 'ApplicantIncome', 'CoapplicantIncome',
                                     'LoanAmount', 'Loan_Amount_Term','Credit_History','Property_Area'])


        logged_model = 'runs:/59eed1ff60d24906a321e7c0e370b14b/model'
        loaded_model = mlflow.pyfunc.load_model(logged_model)
        prediction = loaded_model.predict(data)

        li = 'Approved' if prediction[0] == 0 else 'Rejected'
        st.subheader("Prediction Result")
        if li=='Approved':
          st.success(f"For Loan ID **{id}**, the loan is **{li}**.")
        else:
          st.warning(f"For Loan ID **{id}**, the loan is **{li}**.")


elif st.session_state.current_page =='File':
  st.subheader('Checking Loan Status For Multiple Inputs')
  file=st.file_uploader('Upload a CSV or excel file',type=['csv','xlsx'])

  if file is not None:
    #Encoding the test datasets
    try:
          if file.name.endswith('.csv'):
              data = pd.read_csv(file)
          else:
              data = pd.read_excel(file)
          id = data['Loan_ID']
          data['Credit_History']=data['Credit_History'].astype('str')
          data['Loan_Amount_Term']=data['Loan_Amount_Term'].astype('str')
          data=data.drop(['Loan_ID'],axis=1)
          cat=data.select_dtypes(include='object').columns
          num=data.select_dtypes(exclude='object').columns
          for i in data.columns:
            if i in cat:
              unique=data[i].unique()
              k=0
              d={}
              for j in unique:
                d[j]=k
                k+=1
              data[i]=data[i].map(d)
            elif i in num:
              mean=data[i].mean()
              sd=data[i].std()
              x=[]
              for j in data[i]:
                z=(j-mean)/sd
                x.append(z)
              data[i]=x

          if st.button('Predict'):
            # Predict on a Pandas DataFrame.

            logged_model ='runs:/59eed1ff60d24906a321e7c0e370b14b/model'
            loaded_model = mlflow.pyfunc.load_model(logged_model)
            l=loaded_model.predict(data)
            li=['Approved' if l[i]==0 else 'Rejected' for i in range(len(l))]
            status=pd.DataFrame(li,columns=['Loan_Status'])
            result = pd.concat([id,status], axis=1)
            st.subheader("Prediction Results")
            st.dataframe(result)

    except Exception as e:
          st.error(f"An error occurred: {e}")

Overwriting app.py


In [70]:
from pyngrok import ngrok
outh_token='2qVdKwzryIkN214CK81f6q1byfO_7bN1ouSyzNqxQf2tKM2Fi' #we will not provide any token
ngrok.set_auth_token(outh_token)

#create the tunnel
ngrok_tunnel=ngrok.connect(addr='5000',proto='http')
print('Tracking uri:',ngrok_tunnel.public_url)


!streamlit run --server.port 5000 app.py

Tracking uri: https://1d4e-34-169-140-66.ngrok-free.app

Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:5000[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:5000[0m
[34m  External URL: [0m[1mhttp://34.169.140.66:5000[0m
[0m
[34m  Stopping...[0m
[34m  Stopping...[0m
