In [36]:
# Import Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from warnings import filterwarnings
filterwarnings('ignore')

from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score

Data Collection

In [None]:
data = pd.read_csv('/content/drive/MyDrive/ML-Projects/Bank-Loan_Data.csv')
data

In [None]:
data.sample(10)

In [None]:
data.shape

In [None]:
data.info()

In [None]:
data.describe()

**Data Preprocessing**

Null Values

In [None]:
# Missing Values
data.isnull().sum()

In [None]:
data.isnull().sum().sum() / data.size * 100

In [72]:
# Dropping Missing Values
data = data.dropna()

In [None]:
data.info()

In [None]:
data.reset_index(drop=True, inplace=True)
data

In [73]:
data1 = data.copy()    # Deep copy

Label Encoding

In [74]:
# Loan_Status
data['Loan_Status'].replace({'Y':1,'N':0}, inplace=True)

In [75]:
# Dependents
data['Dependents'].value_counts()

data['Dependents'].replace({'3+':4}, inplace=True)

In [76]:
# Gender, Married, Self Employed, Property Area, Education
data.replace({"Gender":{"Male":1,"Female":0},"Married":{'Yes':1,"No":0},"Self_Employed":{'Yes':1,"No":0},"Property_Area":{'Urban':2,'Semiurban':1,'Rural':0},'Education':{'Graduate':1,"Not Graduate":0}}, inplace=True)

Data Visualization

In [None]:
# Subplot containing Countplots of Gender, Married, Education, Self employed, Dependents, Property Area vs Loan Status
fig, ax = plt.subplots(2, 3, figsize=(15,8))

sns.countplot(data=data1, x='Gender', hue='Loan_Status', ax=ax[0,0])
sns.countplot(data=data1, x='Married', hue='Loan_Status', ax=ax[0,1])
sns.countplot(data=data1, x='Education', hue='Loan_Status', ax=ax[0,2])
sns.countplot(data=data1, x='Self_Employed', hue='Loan_Status', ax=ax[1,0])
sns.countplot(data=data1, x='Dependents', hue='Loan_Status', ax=ax[1,1])
sns.countplot(data=data1, x='Property_Area', hue='Loan_Status', ax=ax[1,2])

plt.show()

# Observations :
# Most of Loans are taken by Males & Majority are Approved
# Most of Loans are taken by Married People & Majority are Approved
# Most of Loans are taken by Graduates & Majority are Approved
# Most of Loans are taken by Self Employed People & Majority are Approved
# Most of Loans are taken by People with 0 Dependents & Majority are Approved
# Most of Loans are taken by People living in Semi Urban Areas & Majority are Approved

In [None]:
data

In [90]:
# Define Ip & Op
X = data.iloc[:,1:-1]
Y = data['Loan_Status']

In [126]:
# Split the DS
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1,stratify=Y, random_state=30)    # Stratify for unbiased splitting

In [None]:
print(X_train.shape, X_test.shape, Y_train.shape, Y_test.shape)

In [132]:
# SVM Model Creation & Training

svm = svm.SVC(kernel='linear')
svm.fit(X_train, Y_train)

In [128]:
# Model Performance & Evaluation

Y_pred_train = svm.predict(X_train)
Y_pred_test = svm.predict(X_test)

In [None]:
accuracy_score(Y_train, Y_pred_train)   # Training Accuracy
accuracy_score(Y_test, Y_pred_test)   # Testing Accuracy

Predictive Model