In [133]:
import pandas as pd   # pandas is a data manipulation library

In [134]:
df = pd.read_csv('loan_data.csv')           # reading the csv file
df.head(5)

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
1,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
2,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
3,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y
4,LP001013,Male,Yes,0,Not Graduate,No,2333,1516.0,95.0,360.0,1.0,Urban,Y


In [135]:
df = df.drop('Loan_ID', axis=1)             # Drop Loan_ID column

In [136]:
#checking for null values
print("Null Values before handle it:")
df.isnull().sum()

Null Values before handle it:


Gender                5
Married               0
Dependents            8
Education             0
Self_Employed        21
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount            0
Loan_Amount_Term     11
Credit_History       30
Property_Area         0
Loan_Status           0
dtype: int64

In [137]:
#filling the null values
df['Gender'].fillna(df['Gender'].mode()[0], inplace=True)
df['Dependents'].fillna(df['Dependents'].mode()[0], inplace=True)
df['Self_Employed'].fillna(df['Self_Employed'].mode()[0], inplace=True)
df['Loan_Amount_Term'].fillna(df['Loan_Amount_Term'].mean(), inplace=True)
df['Credit_History'].fillna(df['Credit_History'].mean(), inplace=True)

In [138]:
print("Null Values after handle it:")
df.isnull().sum()

Null Values after handle it:


Gender               0
Married              0
Dependents           0
Education            0
Self_Employed        0
ApplicantIncome      0
CoapplicantIncome    0
LoanAmount           0
Loan_Amount_Term     0
Credit_History       0
Property_Area        0
Loan_Status          0
dtype: int64

In [139]:
#Handling Categorical Features   {Gender, Married,  Education, Self_Employed, Property_Area}

categorical_cols = ['Gender', 'Married', 'Education', 'Self_Employed', 'Property_Area']
df_encoded = pd.get_dummies(df[categorical_cols])     # One hot encoding the categorical features 
df_encoded


Unnamed: 0,Gender_Female,Gender_Male,Married_No,Married_Yes,Education_Graduate,Education_Not Graduate,Self_Employed_No,Self_Employed_Yes,Property_Area_Rural,Property_Area_Semiurban,Property_Area_Urban
0,0,1,0,1,1,0,1,0,1,0,0
1,0,1,0,1,1,0,0,1,0,0,1
2,0,1,0,1,0,1,1,0,0,0,1
3,0,1,1,0,1,0,1,0,0,0,1
4,0,1,0,1,0,1,1,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...
376,0,1,0,1,1,0,1,0,0,0,1
377,0,1,0,1,1,0,1,0,1,0,0
378,1,0,1,0,1,0,1,0,1,0,0
379,0,1,0,1,1,0,1,0,1,0,0


In [140]:
# Concatenate the encoded columns with the original dataframe
df = pd.concat([df, df_encoded], axis=1)
df = df.drop(categorical_cols, axis=1)
df.head()

Unnamed: 0,Dependents,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Loan_Status,Gender_Female,Gender_Male,Married_No,Married_Yes,Education_Graduate,Education_Not Graduate,Self_Employed_No,Self_Employed_Yes,Property_Area_Rural,Property_Area_Semiurban,Property_Area_Urban
0,1,4583,1508.0,128.0,360.0,1.0,N,0,1,0,1,1,0,1,0,1,0,0
1,0,3000,0.0,66.0,360.0,1.0,Y,0,1,0,1,1,0,0,1,0,0,1
2,0,2583,2358.0,120.0,360.0,1.0,Y,0,1,0,1,0,1,1,0,0,0,1
3,0,6000,0.0,141.0,360.0,1.0,Y,0,1,1,0,1,0,1,0,0,0,1
4,0,2333,1516.0,95.0,360.0,1.0,Y,0,1,0,1,0,1,1,0,0,0,1


In [141]:
df['Dependents'] = df['Dependents'].replace('3+', 3)   # Replace '3+' with 3

In [142]:
# Normalizing the data with Standard Scaler
from sklearn.preprocessing import StandardScaler
numerical_cols = ['Dependents', 'ApplicantIncome', 'CoapplicantIncome', 'LoanAmount', 'Loan_Amount_Term']
scaler = StandardScaler()
df[numerical_cols] = scaler.fit_transform(df[numerical_cols])     # Fit and transform the data using StandardScaler  
df.head()

Unnamed: 0,Dependents,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Loan_Status,Gender_Female,Gender_Male,Married_No,Married_Yes,Education_Graduate,Education_Not Graduate,Self_Employed_No,Self_Employed_Yes,Property_Area_Rural,Property_Area_Semiurban,Property_Area_Urban
0,0.337948,0.707469,0.098695,0.812575,0.283647,1.0,N,0,1,0,1,1,0,1,0,1,0,0
1,-0.675895,-0.408932,-0.546371,-1.376596,0.283647,1.0,Y,0,1,0,1,1,0,0,1,0,0,1
2,-0.675895,-0.703019,0.462294,0.530102,0.283647,1.0,Y,0,1,0,1,0,1,1,0,0,0,1
3,-0.675895,1.706799,-0.546371,1.271595,0.283647,1.0,Y,0,1,1,0,1,0,1,0,0,0,1
4,-0.675895,-0.87933,0.102118,-0.352629,0.283647,1.0,Y,0,1,0,1,0,1,1,0,0,0,1
