## Read the dataset

In [None]:
import pandas as pd
import numpy as np
import matplotlib as plt
%matplotlib inline


In [None]:
df = pd.read_csv("loan.csv")

## Exploratory Data Analyis

In [None]:
df.head()

In [None]:
df.dtypes

In [None]:
df.describe()

In [None]:
df['Gender'].value_counts()

In [None]:
df['ApplicantIncome'].hist(bins=50)

In [None]:
temp3 = pd.crosstab(df['Credit_History'], df['Loan_Status'])
temp3.plot(kind='bar', stacked=True, color=['red','blue'], grid=False)

In [None]:
temp4 = df.groupby("Loan_Status").agg('mean')

In [None]:
temp4

#### Check for missing Values

In [None]:
df.apply(lambda x: sum(x.isnull()),axis=0) 


## Data Preparation

#### Remove missing values

In [None]:
df['Gender'].fillna(df['Gender'].mode()[0], inplace=True)
df['Married'].fillna(df['Married'].mode()[0], inplace=True)
df['Dependents'].fillna(df['Dependents'].mode()[0], inplace=True)
df['Self_Employed'].fillna(df['Self_Employed'].mode()[0], inplace=True)
df['LoanAmount'].fillna(df['LoanAmount'].mode()[0], inplace=True)
df['Loan_Amount_Term'].fillna(df['Loan_Amount_Term'].mode()[0], inplace=True)
df['Credit_History'].fillna(df['Credit_History'].mode()[0], inplace=True)

#### Transform every feature to numeric

In [None]:
from sklearn.preprocessing import LabelEncoder
var_mod = ['Gender','Married','Dependents','Education','Self_Employed','Property_Area','Loan_Status']
le = LabelEncoder()
for i in var_mod:
    df[i] = le.fit_transform(df[i])
df.dtypes 

#### drop ID

In [None]:
df = df.drop(["Loan_ID"], axis=1)

In [None]:
df

#### Train and Validation Split

In [None]:
from sklearn.model_selection import train_test_split


In [None]:
df_train, df_val = train_test_split(df)
print(df_train.shape)
print(df_val.shape)

## Modelling

In [None]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()


In [None]:
X_train = df_train.drop(["Loan_Status"], axis=1)
y_train = df_train["Loan_Status"]

X_val = df_val.drop(["Loan_Status"], axis=1)
y_val = df_val["Loan_Status"]

In [None]:
model.fit(X_train, y_train)


## Evaluate

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import confusion_matrix

In [None]:
#Make predictions on validation set:
predictions = model.predict(X_val)

#Print accuracy
accuracy = accuracy_score(predictions,y_val)
print(accuracy)

In [None]:
confusion_matrix(predictions,y_val)