In [1]:
# Import the Libraries
import numpy as np
import pandas as pd
import random
from sklearn import linear_model, model_selection, metrics, svm, preprocessing
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.neural_network import MLPClassifier

In [2]:
# Import the Data
data = pd.read_csv('./SAheart.data', sep=',', index_col=0)
data.head()

Unnamed: 0_level_0,sbp,tobacco,ldl,adiposity,famhist,typea,obesity,alcohol,age,chd
row.names,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,160,12.0,5.73,23.11,Present,49,25.3,97.2,52,1
2,144,0.01,4.41,28.61,Absent,55,28.87,2.06,63,1
3,118,0.08,3.48,32.28,Present,52,29.14,3.81,46,0
4,170,7.5,6.41,38.03,Present,51,31.99,24.26,58,1
5,134,13.6,3.5,27.78,Present,60,25.99,57.34,49,1


In [3]:
# Preprocess the Data
data['famhist_true'] = data['famhist'] == 'Present'
data['famhist_false'] = data['famhist'] == 'Absent'
data = data.drop(['famhist'], axis=1)
data.head()

Unnamed: 0_level_0,sbp,tobacco,ldl,adiposity,typea,obesity,alcohol,age,chd,famhist_true,famhist_false
row.names,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,160,12.0,5.73,23.11,49,25.3,97.2,52,1,True,False
2,144,0.01,4.41,28.61,55,28.87,2.06,63,1,False,True
3,118,0.08,3.48,32.28,52,29.14,3.81,46,0,True,False
4,170,7.5,6.41,38.03,51,31.99,24.26,58,1,True,False
5,134,13.6,3.5,27.78,60,25.99,57.34,49,1,True,False


In [4]:
# Split the Data
np.random.seed(42)
random.seed(42)
train, test = model_selection.train_test_split(data)
x_train = train.loc[:, train.columns != 'chd']
y_train = train['chd']
x_test = test.loc[:, test.columns != 'chd']
y_test = test['chd']

In [5]:
## LR
# Fit the Model
LR_model = LogisticRegression(random_state=0, solver='lbfgs', multi_class='multinomial').fit(x_train, y_train)
# Predict Class
predict_class = LR_model.predict(x_test)
# Predict Probability
predict_prob = LR_model.predict_proba(x_test)
# Evaluate
predict_acc = LR_model.score(x_test, y_test)
print(predict_acc)

0.7413793103448276




In [6]:
## SVM
# Fit the Model
SVM_model = svm.SVC(probability=True)
SVM_model.fit(x_train, y_train)
# Predict Class
predict_class = SVM_model.predict(x_test)
# Predict Probability
predict_prob = SVM_model.predict_proba(x_test)
# Evaluate
predict_acc = accuracy_score(y_test, predict_class)
print(predict_acc)

0.646551724137931




In [7]:
## NN
# Fit the Model
NN_model = MLPClassifier(hidden_layer_sizes=(50,), max_iter=10, alpha=1e-4,
                    solver='sgd', verbose=10, tol=1e-4, random_state=1,
                    learning_rate_init=.1)
NN_model.fit(x_train, y_train)
# Evaluate
predict_acc = NN_model.score(x_test, y_test)
print(predict_acc)

Iteration 1, loss = 10.27302717
Iteration 2, loss = 0.72165847
Iteration 3, loss = 0.71698188
Iteration 4, loss = 0.73195543
Iteration 5, loss = 0.75215696
Iteration 6, loss = 0.77387214
Iteration 7, loss = 0.79915465
Iteration 8, loss = 0.82132905
Iteration 9, loss = 0.83807534
Iteration 10, loss = 0.85632412
0.6379310344827587


