In [26]:
# ------------------------------------------------------------------
# Build the Support Vector Classifier Model
# Predict the loan approval status based on 
# Gender, Marital Status, Credit History, Income and Loan Amount
# ------------------------------------------------------------------

# Import Libraries and read csv file
import pandas as pd
import numpy as np
loan=pd.read_csv('01Exercise1.csv')
loan

Unnamed: 0,gender,married,ch,income,loanamt,status
0,Male,No,1.0,5849,,Y
1,Male,Yes,1.0,4583,128.0,N
2,Male,Yes,1.0,3000,66.0,Y
3,Male,Yes,1.0,2583,120.0,Y
4,Male,No,1.0,6000,141.0,Y
...,...,...,...,...,...,...
609,Female,No,1.0,2900,71.0,Y
610,Male,Yes,1.0,4106,40.0,Y
611,Male,Yes,1.0,8072,253.0,Y
612,Male,Yes,1.0,7583,187.0,Y


In [27]:
#find out columns with missing values
pd.isnull(loan).sum()

gender     13
married     3
ch         50
income      0
loanamt    22
status      0
dtype: int64

In [28]:
# Replace Missing Values. Drop the rows.
loan['loanamt'].fillna(value=loan['loanamt'].mean(), inplace = True)
pd.isnull(loan).sum()

gender     13
married     3
ch         50
income      0
loanamt     0
status      0
dtype: int64

In [29]:
loan.drop('ch',axis=1,inplace=True)
loan.dropna(inplace=True)
loan

Unnamed: 0,gender,married,income,loanamt,status
0,Male,No,5849,146.412162,Y
1,Male,Yes,4583,128.000000,N
2,Male,Yes,3000,66.000000,Y
3,Male,Yes,2583,120.000000,Y
4,Male,No,6000,141.000000,Y
...,...,...,...,...,...
609,Female,No,2900,71.000000,Y
610,Male,Yes,4106,40.000000,Y
611,Male,Yes,8072,253.000000,Y
612,Male,Yes,7583,187.000000,Y


In [30]:
# Drop irrelevant columns based on business sense
loan.drop('gender',axis=1,inplace=True)
loan.dropna(inplace=True)
loan.drop('married',axis=1,inplace=True)
loan.dropna(inplace=True)
loan

Unnamed: 0,income,loanamt,status
0,5849,146.412162,Y
1,4583,128.000000,N
2,3000,66.000000,Y
3,2583,120.000000,Y
4,6000,141.000000,Y
...,...,...,...
609,2900,71.000000,Y
610,4106,40.000000,Y
611,8072,253.000000,Y
612,7583,187.000000,Y


In [31]:
# Create Dummy variables
status = pd.get_dummies(loan['status'],drop_first=True)

In [34]:
# Normalize the data (Income and Loan Amount) Using StandardScaler
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler() 
column_names_to_normalize = ['loanamt', 'income']
x = loan[column_names_to_normalize].values
loan_scaled = scaler.fit_transform(x)

In [40]:
# Create the X (Independent) and Y (Dependent) dataframes
X=pd.DataFrame(np.c_[loan['income']])
y= loan.iloc[:,2].values
x

array([[ 146.41216216, 5849.        ],
       [ 128.        , 4583.        ],
       [  66.        , 3000.        ],
       ...,
       [ 253.        , 8072.        ],
       [ 187.        , 7583.        ],
       [ 133.        , 4583.        ]])

In [42]:
# Split the X and Y dataset into training and testing set
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=0)

In [50]:
# Import and build Support Vector Classifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
svm_clf = SVC()
svm_model = svm_clf.fit(X_train, y_train)

In [52]:
# Predict the outcome using Test data
svm_prediction = svm_clf.predict(X_test)
svm_prediction

array(['N', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',
       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',
       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',
       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',
       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',
       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',
       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',
       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',
       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',
       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',
       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',
       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',
       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',
       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y'],
 

In [49]:
# Build the conufsion matrix and get the accuracy/score
from sklearn.metrics import confusion_matrix
print("Accuracy {0:.2f}%".format(100*accuracy_score(svm_prediction, y_test)))
print(confusion_matrix(svm_prediction, y_test))

Accuracy 70.56%
[[  0   1]
 [ 52 127]]
