In [83]:
# ------------------------------------------------------------------
# Build the Support Vector Classifier Model
# Predict the loan approval status based on 
# Gender, Marital Status, Credit History, Income and Loan Amount
# ------------------------------------------------------------------

# Import Libraries and read csv file
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn import svm

from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
df = pd.read_csv('01Exercise1.csv')
df.head()

Unnamed: 0,gender,married,ch,income,loanamt,status
0,Male,No,1.0,5849,,Y
1,Male,Yes,1.0,4583,128.0,N
2,Male,Yes,1.0,3000,66.0,Y
3,Male,Yes,1.0,2583,120.0,Y
4,Male,No,1.0,6000,141.0,Y


In [84]:
#find out columns with missing values
pd.isnull(df).sum()

gender     13
married     3
ch         50
income      0
loanamt    22
status      0
dtype: int64

In [85]:
# Replace Missing Values. Drop the rows.
df.fillna(df.mean(),inplace=True)
df.dropna(axis=0)

Unnamed: 0,gender,married,ch,income,loanamt,status
0,Male,No,1.0,5849,146.412162,Y
1,Male,Yes,1.0,4583,128.000000,N
2,Male,Yes,1.0,3000,66.000000,Y
3,Male,Yes,1.0,2583,120.000000,Y
4,Male,No,1.0,6000,141.000000,Y
...,...,...,...,...,...,...
609,Female,No,1.0,2900,71.000000,Y
610,Male,Yes,1.0,4106,40.000000,Y
611,Male,Yes,1.0,8072,253.000000,Y
612,Male,Yes,1.0,7583,187.000000,Y


In [86]:
# Drop irrelevant columns based on business sense
df=df.drop(['gender','married'],axis=1)

In [87]:
# Create Dummy variables
df=pd.get_dummies(df)
df.drop('status_N',axis=1, inplace=True)
df.head()

Unnamed: 0,ch,income,loanamt,status_Y
0,1.0,5849,146.412162,1
1,1.0,4583,128.0,0
2,1.0,3000,66.0,1
3,1.0,2583,120.0,1
4,1.0,6000,141.0,1


In [88]:
# Normalize the data (Income and Loan Amount) Using StandardScaler
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler().fit(df) 
array = scaler.transform(df)
array
np.set_printoptions(precision=3) 
array[0:5,:]

array([[ 0.452,  0.073,  0.   ,  0.675],
       [ 0.452, -0.134, -0.219, -1.483],
       [ 0.452, -0.394, -0.958,  0.675],
       [ 0.452, -0.462, -0.315,  0.675],
       [ 0.452,  0.098, -0.064,  0.675]])

In [77]:
# Create the X (Independent) and Y (Dependent) dataframes
X = array[:,0:3] 
y = array[:,3]
y=y.astype('int')

# Komentdeki hisse yuxaridaki komentdeki "dataframes" sozune gore array-i df-e cevirmek cehdim olub. 
#    Ancaq y dataframe-i data tipini Float kimi goturduyu ucun Model quranda error verirdi. Ona gore
#    array sheklinde saxlayib davam etdim. Shaig beyin verdiyi suala gelib cixdim: Rescale olmus 
#    array-den menali olan (0.1) seklinde df-e qayitmaq istedim, alinmadi.


# df = pd.DataFrame(array, columns=['ch','income','loanamt','approved'])
# X=df.drop('approved',axis=1)
# y=df.approved

In [91]:
# Split the X and Y dataset into training and testing set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [92]:
# Import and build Support Vector Classifier
from sklearn import svm
svm_model=svm.SVC()
svm_model.fit(X_train,y_train)

SVC()

In [93]:
# Predict the outcome using Test data
predict=svm_model.predict(X_test)

In [94]:
# Build the conufsion matrix and get the accuracy/score
acc=accuracy_score(y_test,predict)
acc

0.8292682926829268

In [95]:
matrix=confusion_matrix(y_test,predict)
matrix

array([[14, 19],
       [ 2, 88]], dtype=int64)