In [177]:
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder

In [178]:
df=pd.read_csv("loan_approval_dataset.csv")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4269 entries, 0 to 4268
Data columns (total 13 columns):
 #   Column                     Non-Null Count  Dtype 
---  ------                     --------------  ----- 
 0   loan_id                    4269 non-null   int64 
 1    no_of_dependents          4269 non-null   int64 
 2    education                 4269 non-null   object
 3    self_employed             4269 non-null   object
 4    income_annum              4269 non-null   int64 
 5    loan_amount               4269 non-null   int64 
 6    loan_term                 4269 non-null   int64 
 7    cibil_score               4269 non-null   int64 
 8    residential_assets_value  4269 non-null   int64 
 9    commercial_assets_value   4269 non-null   int64 
 10   luxury_assets_value       4269 non-null   int64 
 11   bank_asset_value          4269 non-null   int64 
 12   loan_status               4269 non-null   object
dtypes: int64(10), object(3)
memory usage: 433.7+ KB


In [179]:
df[' education']

0            Graduate
1        Not Graduate
2            Graduate
3            Graduate
4        Not Graduate
            ...      
4264         Graduate
4265     Not Graduate
4266     Not Graduate
4267     Not Graduate
4268         Graduate
Name:  education, Length: 4269, dtype: object

In [180]:
label_encoder=LabelEncoder()
df[' education']=label_encoder.fit_transform(df[' education'])

In [181]:
df[' self_employed']

0         No
1        Yes
2         No
3         No
4        Yes
        ... 
4264     Yes
4265     Yes
4266      No
4267      No
4268      No
Name:  self_employed, Length: 4269, dtype: object

In [182]:
df[' self_employed']=label_encoder.fit_transform(df[' self_employed'])

In [183]:
df[' loan_status']

0        Approved
1        Rejected
2        Rejected
3        Rejected
4        Rejected
          ...    
4264     Rejected
4265     Approved
4266     Rejected
4267     Approved
4268     Approved
Name:  loan_status, Length: 4269, dtype: object

In [184]:
df[' loan_status']=df[' loan_status'].str.strip().str.title()

In [185]:
y=df[' loan_status'].map({'Approved':1,'Rejected':0})

In [186]:
if df.isnull().sum().sum()>0:
    df=df.dropna()

In [187]:
X=df.drop(columns=['loan_id',' loan_status'],axis=1)
X=pd.get_dummies(X,drop_first=True)

In [188]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [189]:
print("X_tarin columns:",X_train.columns)
print("X_test columns",X_test.columns)

X_tarin columns: Index([' no_of_dependents', ' education', ' self_employed', ' income_annum',
       ' loan_amount', ' loan_term', ' cibil_score',
       ' residential_assets_value', ' commercial_assets_value',
       ' luxury_assets_value', ' bank_asset_value'],
      dtype='object')
X_test columns Index([' no_of_dependents', ' education', ' self_employed', ' income_annum',
       ' loan_amount', ' loan_term', ' cibil_score',
       ' residential_assets_value', ' commercial_assets_value',
       ' luxury_assets_value', ' bank_asset_value'],
      dtype='object')


In [190]:
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)


X_train shape: (3415, 11)
X_test shape: (854, 11)


In [191]:
scaler=StandardScaler()
scaler.fit(X_train)

X_train_scaled=scaler.fit_transform(X_train)
X_test_scaled=scaler.transform(X_test)

In [192]:
print("X_train shape:", X_train_scaled.shape)
print("X_test shape:", X_test_scaled.shape)


X_train shape: (3415, 11)
X_test shape: (854, 11)


In [193]:
model=RandomForestClassifier(n_estimators=100,random_state=42)
model.fit(X_train_scaled,y_train)

In [194]:
print("Training Accuracy:",model.score(X_train_scaled,y_train))
print("Test Accuracy:",model.score(X_test_scaled,y_test))

Training Accuracy: 1.0
Test Accuracy: 0.977751756440281


In [195]:
joblib.dump(model,"loan_model.pkl")
joblib.dump(scaler,"scaler.pkl")

['scaler.pkl']

In [196]:
import numpy as np

In [197]:
model=joblib.load("loan_model.pkl")
scaler=joblib.load("scaler.pkl")

In [198]:
sample_input=np.array([[2,1,0,500000,200000,36,750,1000000,500000,200000,300000]])
sample_scaled=scaler.transform(sample_input)



In [199]:
prediction=model.predict(sample_scaled)
print("Prediction:","Approved" if prediction[0]==1 else "Rejected")

Prediction: Approved
