In [43]:
# ------------------------------------------------------------------
# Build the Support Vector Classifier Model
# Predict the loan approval status based on 
# Gender, Marital Status, Credit History, Income and Loan Amount
# ------------------------------------------------------------------

# Import Libraries and read csv file
import pandas as pd  
import numpy as np  
import matplotlib.pyplot as plt  
import seaborn as sns 
from sklearn.model_selection import train_test_split 
from sklearn import metrics
%matplotlib inline

In [44]:
credit=pd.read_csv('01Exercise1.csv')
credit.head()


Unnamed: 0,gender,married,ch,income,loanamt,status
0,Male,No,1.0,5849,,Y
1,Male,Yes,1.0,4583,128.0,N
2,Male,Yes,1.0,3000,66.0,Y
3,Male,Yes,1.0,2583,120.0,Y
4,Male,No,1.0,6000,141.0,Y


In [45]:
#find out columns with missing values
credit.isnull().sum()

gender     13
married     3
ch         50
income      0
loanamt    22
status      0
dtype: int64

In [46]:
# Replace Missing Values. Drop the rows.
credit.dropna(inplace=True)

In [47]:
credit.isnull().sum()

gender     0
married    0
ch         0
income     0
loanamt    0
status     0
dtype: int64

In [48]:
# Drop irrelevant columns based on business sense


In [49]:
# Create Dummy variables
gender = pd.get_dummies(credit['gender'],drop_first=True)
married = pd.get_dummies(credit['married'],drop_first=True)
credit.drop(['gender','married'],axis=1,inplace=True)
credit = pd.concat([credit,gender,married],axis=1)

In [50]:
credit.replace({'status': 'Y'}, 1, inplace=True)
credit.replace({'status': 'N'}, 0, inplace=True)

In [51]:
# Normalize the data (Income and Loan Amount) Using StandardScaler
from sklearn.preprocessing import StandardScaler
scalar_ = StandardScaler()

credit['income'] = scalar_.fit_transform(credit[['income']])
credit['loanamt'] = scalar_.fit_transform(credit[['loanamt']])
credit

Unnamed: 0,ch,income,loanamt,status,Male,Yes
1,1.0,-0.128073,-0.194250,0,1,1
2,1.0,-0.392077,-0.971015,1,1,1
3,1.0,-0.461621,-0.294478,1,1,1
4,1.0,0.108246,-0.031380,1,1,0
5,1.0,0.011017,1.547205,1,1,1
...,...,...,...,...,...,...
609,1.0,-0.408754,-0.908372,1,0,0
610,1.0,-0.207624,-1.296754,1,1,1
611,1.0,0.453802,1.371807,1,1,1
612,1.0,0.372249,0.544929,1,1,1


In [52]:
# Create the X (Independent) and Y (Dependent) dataframes
X=credit.drop('status',axis=1)
X

Unnamed: 0,ch,income,loanamt,Male,Yes
1,1.0,-0.128073,-0.194250,1,1
2,1.0,-0.392077,-0.971015,1,1
3,1.0,-0.461621,-0.294478,1,1
4,1.0,0.108246,-0.031380,1,0
5,1.0,0.011017,1.547205,1,1
...,...,...,...,...,...
609,1.0,-0.408754,-0.908372,0,0
610,1.0,-0.207624,-1.296754,1,1
611,1.0,0.453802,1.371807,1,1
612,1.0,0.372249,0.544929,1,1


In [53]:
Y=credit['status']
Y

1      0
2      1
3      1
4      1
5      1
      ..
609    1
610    1
611    1
612    1
613    0
Name: status, Length: 529, dtype: int64

In [54]:
# Split the X and Y dataset into training and testing set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test=train_test_split(X,Y, test_size=0.30, random_state=101)

In [55]:
# Import and build Support Vector Classifier
from sklearn import svm
clf = svm.SVC(kernel='linear')
clf.fit(X_train, y_train)

SVC(kernel='linear')

In [56]:
# Predict the outcome using Test data
predicted = clf.predict(X_test)
predicted

array([1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1,
       1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
       1, 1, 0, 1, 0], dtype=int64)

In [57]:
# Build the conufsion matrix and get the accuracy/score
print("Dogruluq:",metrics.accuracy_score(y_test, y_pred))

Dogruluq: 0.7484276729559748


In [58]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score

In [59]:
cm2 = confusion_matrix(y_test,predicted)
cm2

array([[ 22,  32],
       [  3, 102]], dtype=int64)