### DEPLOY - Final Model

In [3]:
#Importing Libraries
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
import warnings
warnings.filterwarnings('ignore')

# Loading dataset
df = pd.read_csv('loan_approved.csv')  
df

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status (Approved)
0,LP001002,Male,No,0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y
1,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y
...,...,...,...,...,...,...,...,...,...,...,...,...,...
609,LP002978,Female,No,0,Graduate,No,2900,0.0,71.0,360.0,1.0,Rural,Y
610,LP002979,Male,Yes,3+,Graduate,No,4106,0.0,40.0,180.0,1.0,Rural,Y
611,LP002983,Male,Yes,1,Graduate,No,8072,240.0,253.0,360.0,1.0,Urban,Y
612,LP002984,Male,Yes,2,Graduate,No,7583,0.0,187.0,360.0,1.0,Urban,Y


In [5]:
#Handling missing datas

for i in df.columns:
  if df[i].dtypes == 'object':
    df[i].fillna(df[i].mode()[0], inplace = True)
  else:
    df[i].fillna(df[i].median(), inplace = True)

In [5]:
df.isnull().sum()

Loan_ID                   0
Gender                    0
Married                   0
Dependents                0
Education                 0
Self_Employed             0
ApplicantIncome           0
CoapplicantIncome         0
LoanAmount                0
Loan_Amount_Term          0
Credit_History            0
Property_Area             0
Loan_Status (Approved)    0
dtype: int64

In [7]:
#Loan_ID is a Unique Identifier — Not Useful for Prediction. So droping it
df.drop('Loan_ID', axis = 1, inplace = True)

In [9]:
#Feature selection
df.drop('Loan_Amount_Term', axis = 1, inplace = True)
df

Unnamed: 0,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Credit_History,Property_Area,Loan_Status (Approved)
0,Male,No,0,Graduate,No,5849,0.0,128.0,1.0,Urban,Y
1,Male,Yes,1,Graduate,No,4583,1508.0,128.0,1.0,Rural,N
2,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,1.0,Urban,Y
3,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,1.0,Urban,Y
4,Male,No,0,Graduate,No,6000,0.0,141.0,1.0,Urban,Y
...,...,...,...,...,...,...,...,...,...,...,...
609,Female,No,0,Graduate,No,2900,0.0,71.0,1.0,Rural,Y
610,Male,Yes,3+,Graduate,No,4106,0.0,40.0,1.0,Rural,Y
611,Male,Yes,1,Graduate,No,8072,240.0,253.0,1.0,Urban,Y
612,Male,Yes,2,Graduate,No,7583,0.0,187.0,1.0,Urban,Y


### Creating Pipeline

In [15]:
# Target variable
y = df['Loan_Status (Approved)']  # Assuming target is 0 (Not Approved), 1 (Approved)

# Features used in the model
X = df[['Gender', 'Married', 'Dependents', 'Education', 'Self_Employed',
        'ApplicantIncome', 'CoapplicantIncome', 'LoanAmount', 'Credit_History',
        'Property_Area']]

# Define column types
categorical = ['Gender', 'Married', 'Dependents', 'Education', 'Self_Employed', 'Property_Area']
numerical = ['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount', 'Credit_History']

# Preprocessing: One-hot for categorical, StandardScaler for numeric
preprocessor = ColumnTransformer([
    ('num', StandardScaler(), numerical),
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical)
])

# Create pipeline with preprocessor + model
pipeline = Pipeline([
    ('preprocessing', preprocessor),
    ('classifier', LogisticRegression(solver='liblinear'))
])

# Split data and train
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
pipeline.fit(X_train, y_train)

# Save the pipeline
with open('loan_approval_pipeline.pkl', 'wb') as file:
    pickle.dump(pipeline, file)

print("Loan pipeline trained and saved as loan_approval_pipeline.pkl")


Loan pipeline trained and saved as loan_approval_pipeline.pkl
