Project Title: Loan Prediction Analysis Using Machine Learning Models.

Group Details: 
1. Insiya Saher - A - 27
2. Chaitanya Sudan - A - 41
3. Neha Sahu - B - 09
4. Georgina Frank - B - 11

## Important Libraries Imported

In [None]:
#Magic function to render the figure/plots in this notebook
%matplotlib inline 
import pandas as pd
import numpy as np 
import seaborn as sns
import matplotlib.pyplot as plt

## Loading Dataset

In [None]:
df = pd.read_csv('data.csv')

In [None]:
# Data Before Cleaning (Passed 614 as parameter to display all the data.)
df.head(614)

In [None]:
#Description of Data
df.describe()

In [None]:
# Summary of Training Data
df.info()

## Cleaning and Preprocessing of Data

In [None]:
#Null/Missing values:
df.isnull().sum()

In [None]:
#Filling the missing values for numerical terms with the help of mean
df ['LoanAmount'] = df ['LoanAmount'].fillna(df['LoanAmount'].mean())
df ['Loan_Amount_Term'] = df ['Loan_Amount_Term'].fillna(df['Loan_Amount_Term'].mean())
df ['Credit_History'] = df ['Credit_History'].fillna(df['Credit_History'].mean())

#Filling the missing values for Categorical/Objects/Strings terms with the help of mode
df ['Gender'] = df ['Gender'].fillna(df['Gender'].mode()[0])
df ['Married'] = df ['Married'].fillna(df['Married'].mode()[0])
df ['Dependents'] = df ['Dependents'].fillna(df['Dependents'].mode()[0])
df ['Self_Employed'] = df ['Self_Employed'].fillna(df['Self_Employed'].mode()[0])

In [None]:
df.isnull().sum()

## Data Visualization using Seaborn

### Non-numerical Data Visualization using Countplot

In [None]:
sns.countplot(x=df['Gender'])

In [None]:
sns.countplot(x=df['Married'])

In [None]:
sns.countplot(x=df['Dependents'])

In [None]:
sns.countplot(x=df['Education'])

In [None]:
sns.countplot(x=df['Self_Employed'])

In [None]:
sns.countplot(x=df['Property_Area'])

In [None]:
sns.countplot(x=df['Loan_Status'])

### Numerical Data Visualization using Histplot

In [None]:
sns.histplot(x=df['ApplicantIncome'])

In [None]:
sns.histplot(df['CoapplicantIncome'])

In [None]:
sns.histplot(df['LoanAmount'])

In [None]:
sns.histplot(df['Loan_Amount_Term'])

In [None]:
sns.histplot(df['Credit_History'])

## Creating Total Income Attribute

In [None]:
df['Total_Income'] = df['ApplicantIncome'] + df['CoapplicantIncome']
df.head(614)

##  Logarithm Conversion and Visualization

In [None]:
df['ApplicantIncomeLog'] = np.log(df['ApplicantIncome'])
sns.histplot(df['ApplicantIncomeLog'])

In [None]:
df['CoapplicantIncomeLog'] = np.log(df['CoapplicantIncome']+1)
sns.histplot(df['CoapplicantIncomeLog'])

In [None]:
df['LoanAmountLog'] = np.log(df['LoanAmount'])
sns.histplot(df['LoanAmountLog'])

In [None]:
df['Loan_Amount_TermLog'] = np.log(df['Loan_Amount_Term'])
sns.histplot(df['Loan_Amount_TermLog'])

In [None]:
df['Total_IncomeLog'] = np.log(df['Total_Income'])
sns.histplot(df['Total_IncomeLog'])

## Correlation Matrix

In [None]:
correlation = df.corr()
plt.figure(figsize=(15,10))
sns.heatmap(correlation,annot=True,cmap="BuPu")

In [None]:
df.head(614)

In [None]:
## Droping unnecessary columns
column = ['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount' ,'Loan_Amount_Term','Total_Income','Loan_ID','CoapplicantIncomeLog']
df = df.drop(columns = column , axis = 1)

In [None]:
df.head(614)

## Label Encoder

In [None]:
from sklearn.preprocessing import LabelEncoder
column = ['Gender','Married','Education','Self_Employed','Property_Area','Loan_Status','Dependents']
lableEncoder = LabelEncoder()
for col in column :
    df[col] = lableEncoder.fit_transform(df[col])
df.head(614)

## Model Training and Testing

In [None]:
X = df.drop(columns = ['Loan_Status'],axis =1)
Y = df['Loan_Status']

In [None]:
from sklearn.model_selection import cross_val_score,train_test_split 
def classify(model,x,y):
    x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.25,random_state=105)
    model.fit(x_train,y_train)
    print("Accuracy is ", model.score(x_test,y_test)*100)
    score=cross_val_score(model,x,y,cv=5)
    print("Cross validation is ," ,np.mean(score)*100)

In [None]:
# Logistic Regression
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
classify(model,X,Y)

In [None]:
# Decision Tree Classifier
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier()
classify(model,X,Y)

In [None]:
# Random Forest Classifier
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()
classify(model,X,Y)

In [None]:
# Extra Trees Classifier
from sklearn.ensemble import ExtraTreesClassifier
model = ExtraTreesClassifier()
classify(model,X,Y)