## Imports

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import preprocessing
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from IPython.display import clear_output

## Dataset Manipulation and Analysis

In [2]:
df = pd.read_csv('../datasets/heart.csv')
df1 = pd.read_csv('../datasets/o2Saturation.csv')
df = df.join(df1)

In [3]:
columns = df.columns.to_list()
columns = ['age','sex','cp','trtbps','chol','fbs','restecg','thalachh','exng',
           'oldpeak','slp','caa','thall','O2','output']

In [4]:
df = df[columns]
df.head()

Unnamed: 0,age,sex,cp,trtbps,chol,fbs,restecg,thalachh,exng,oldpeak,slp,caa,thall,O2,output
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,98.6,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,98.6,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,98.6,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,98.6,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,98.1,1


In [5]:
df.isna().sum()

age         0
sex         0
cp          0
trtbps      0
chol        0
fbs         0
restecg     0
thalachh    0
exng        0
oldpeak     0
slp         0
caa         0
thall       0
O2          0
output      0
dtype: int64

In [6]:
df.corr().style.background_gradient(cmap='coolwarm').set_precision(2)

Unnamed: 0,age,sex,cp,trtbps,chol,fbs,restecg,thalachh,exng,oldpeak,slp,caa,thall,O2,output
age,1.0,-0.1,-0.07,0.28,0.21,0.12,-0.12,-0.4,0.1,0.21,-0.17,0.28,0.07,-0.0,-0.23
sex,-0.1,1.0,-0.05,-0.06,-0.2,0.05,-0.06,-0.04,0.14,0.1,-0.03,0.12,0.21,-0.1,-0.28
cp,-0.07,-0.05,1.0,0.05,-0.08,0.09,0.04,0.3,-0.39,-0.15,0.12,-0.18,-0.16,0.15,0.43
trtbps,0.28,-0.06,0.05,1.0,0.12,0.18,-0.11,-0.05,0.07,0.19,-0.12,0.1,0.06,0.04,-0.14
chol,0.21,-0.2,-0.08,0.12,1.0,0.01,-0.15,-0.01,0.07,0.05,-0.0,0.07,0.1,-0.02,-0.09
fbs,0.12,0.05,0.09,0.18,0.01,1.0,-0.08,-0.01,0.03,0.01,-0.06,0.14,-0.03,-0.02,-0.03
restecg,-0.12,-0.06,0.04,-0.11,-0.15,-0.08,1.0,0.04,-0.07,-0.06,0.09,-0.07,-0.01,0.1,0.14
thalachh,-0.4,-0.04,0.3,-0.05,-0.01,-0.01,0.04,1.0,-0.38,-0.34,0.39,-0.21,-0.1,0.16,0.42
exng,0.1,0.14,-0.39,0.07,0.07,0.03,-0.07,-0.38,1.0,0.29,-0.26,0.12,0.21,-0.06,-0.44
oldpeak,0.21,0.1,-0.15,0.19,0.05,0.01,-0.06,-0.34,0.29,1.0,-0.58,0.22,0.21,0.03,-0.43


In [7]:
df.describe()

Unnamed: 0,age,sex,cp,trtbps,chol,fbs,restecg,thalachh,exng,oldpeak,slp,caa,thall,O2,output
count,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0
mean,54.366337,0.683168,0.966997,131.623762,246.264026,0.148515,0.528053,149.646865,0.326733,1.039604,1.39934,0.729373,2.313531,97.484488,0.544554
std,9.082101,0.466011,1.032052,17.538143,51.830751,0.356198,0.52586,22.905161,0.469794,1.161075,0.616226,1.022606,0.612277,0.352649,0.498835
min,29.0,0.0,0.0,94.0,126.0,0.0,0.0,71.0,0.0,0.0,0.0,0.0,0.0,96.5,0.0
25%,47.5,0.0,0.0,120.0,211.0,0.0,0.0,133.5,0.0,0.0,1.0,0.0,2.0,97.5,0.0
50%,55.0,1.0,1.0,130.0,240.0,0.0,1.0,153.0,0.0,0.8,1.0,0.0,2.0,97.5,1.0
75%,61.0,1.0,2.0,140.0,274.5,0.0,1.0,166.0,1.0,1.6,2.0,1.0,3.0,97.5,1.0
max,77.0,1.0,3.0,200.0,564.0,1.0,2.0,202.0,1.0,6.2,2.0,4.0,3.0,98.6,1.0


## Model using LinearSVC

95% accuracy, seed 5

In [8]:
X = df[['age','sex','cp','trtbps','chol','fbs','restecg','thalachh','exng',
           'oldpeak','slp','caa','thall','O2']]
y = df['output']

In [9]:
X = X.values
min_max_scaler = preprocessing.StandardScaler()
x_scaled = min_max_scaler.fit_transform(X)
X = pd.DataFrame(x_scaled)

In [10]:
model = LinearSVC()

SEED = 5
np.random.seed(SEED)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) # Separa os valores

model.fit(X_train, y_train)

prediction = model.predict(X_test)

accuracy = accuracy_score(prediction, y_test)

accuracy

0.95

In [11]:
aux = 0 
for i in range(0, 5000):
    SEED = i
    np.random.seed(SEED)
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)
    model.fit(X_train, y_train)
    prediction = model.predict(X_test)
    
    accuracy = accuracy_score(prediction, y_test)
    
    if accuracy > aux:
        aux = accuracy
        best_seed = i
        print(best_seed, accuracy)

clear_output()
print(best_seed, aux)

5 0.95


## Linear with selected correlations

88% accuracy, best seed 1486

In [12]:
df_corr = df.corr()

In [13]:
df_corr = df_corr[df_corr.output > -0.2]
df_corr

Unnamed: 0,age,sex,cp,trtbps,chol,fbs,restecg,thalachh,exng,oldpeak,slp,caa,thall,O2,output
cp,-0.068653,-0.049353,1.0,0.047608,-0.076904,0.094444,0.044421,0.295762,-0.39428,-0.14923,0.119717,-0.181053,-0.161736,0.145067,0.433798
trtbps,0.279351,-0.056769,0.047608,1.0,0.123174,0.177531,-0.114103,-0.046698,0.067616,0.193216,-0.121475,0.101389,0.06221,0.04349,-0.144931
chol,0.213678,-0.197912,-0.076904,0.123174,1.0,0.013294,-0.15104,-0.00994,0.067023,0.053952,-0.004038,0.070511,0.098803,-0.022873,-0.085239
fbs,0.121308,0.045032,0.094444,0.177531,0.013294,1.0,-0.084189,-0.008567,0.025665,0.005747,-0.059894,0.137979,-0.032019,-0.015869,-0.028046
restecg,-0.116211,-0.058196,0.044421,-0.114103,-0.15104,-0.084189,1.0,0.044123,-0.070733,-0.05877,0.093045,-0.072042,-0.011981,0.097883,0.13723
thalachh,-0.398522,-0.04402,0.295762,-0.046698,-0.00994,-0.008567,0.044123,1.0,-0.378812,-0.344187,0.386784,-0.213177,-0.096439,0.157801,0.421741
slp,-0.168814,-0.030711,0.119717,-0.121475,-0.004038,-0.059894,0.093045,0.386784,-0.257748,-0.577537,1.0,-0.080155,-0.104764,-0.053683,0.345877
O2,-0.002252,-0.104556,0.145067,0.04349,-0.022873,-0.015869,0.097883,0.157801,-0.059248,0.030053,-0.053683,-0.122783,0.002662,1.0,0.313584
output,-0.225439,-0.280937,0.433798,-0.144931,-0.085239,-0.028046,0.13723,0.421741,-0.436757,-0.430696,0.345877,-0.391724,-0.344029,0.313584,1.0


In [14]:
columns = df_corr.index
columns = columns.drop('output')

In [15]:
df_corr = df[columns]
df_corr.head()

Unnamed: 0,cp,trtbps,chol,fbs,restecg,thalachh,slp,O2
0,3,145,233,1,0,150,0,98.6
1,2,130,250,0,1,187,0,98.6
2,1,130,204,0,0,172,2,98.6
3,1,120,236,0,1,178,2,98.6
4,0,120,354,0,1,163,2,98.1


In [16]:
X_corr = df_corr
y_corr = df['output']
X_corr.head()

Unnamed: 0,cp,trtbps,chol,fbs,restecg,thalachh,slp,O2
0,3,145,233,1,0,150,0,98.6
1,2,130,250,0,1,187,0,98.6
2,1,130,204,0,0,172,2,98.6
3,1,120,236,0,1,178,2,98.6
4,0,120,354,0,1,163,2,98.1


In [17]:
X_corr = X_corr.values
min_max_scaler = preprocessing.StandardScaler()
x_corr_scaled = min_max_scaler.fit_transform(X_corr)
X_corr = pd.DataFrame(x_corr_scaled)
X_corr.head()

Unnamed: 0,0,1,2,3,4,5,6,7
0,1.973123,0.763956,-0.256334,2.394438,-1.005832,0.015443,-2.274579,3.168468
1,1.002577,-0.092738,0.072199,-0.417635,0.898962,1.633471,-2.274579,3.168468
2,0.032031,-0.092738,-0.816773,-0.417635,-1.005832,0.977514,0.976352,3.168468
3,0.032031,-0.663867,-0.198357,-0.417635,0.898962,1.239897,0.976352,3.168468
4,-0.938515,-0.663867,2.08205,-0.417635,0.898962,0.583939,0.976352,1.748282


In [18]:
model = LinearSVC()

SEED = 2
np.random.rand(SEED)

X_train, X_test, y_train, y_test = train_test_split(X_corr, y_corr, test_size=0.33) # Separa os valores

model.fit(X_train, y_train)

prediction = model.predict(X_test)

accuracy = accuracy_score(prediction, y_test)

accuracy

0.78

In [19]:
aux = 0 
for i in range(0, 5000):
    SEED = i
    np.random.seed(SEED)
    
    X_train, X_test, y_train, y_test = train_test_split(X_corr, y_corr, test_size=0.33)
    model.fit(X_train, y_train)
    prediction = model.predict(X_test)
    
    accuracy = accuracy_score(prediction, y_test)
    
    if accuracy > aux:
        aux = accuracy
        best_seed_corr = i
        print(aux, best_seed_corr)

clear_output()
print(aux, best_seed_corr)

0.88 1486


## Using neural network

In [20]:
X = df[['age','sex','cp','trtbps','chol','fbs','restecg','thalachh','exng',
           'oldpeak','slp','caa','thall','O2']]
y = df['output']

In [21]:
X = X.values
min_max_scaler = preprocessing.StandardScaler()
x_scaled = min_max_scaler.fit_transform(X)
X = pd.DataFrame(x_scaled)
X = np.array(X)

In [22]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

In [23]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from sklearn.metrics import confusion_matrix
import itertools
import matplotlib.pyplot as plt

In [24]:
physical_devices = tf.config.experimental.list_physical_devices('GPU')
print("GPUs Available: ", len(physical_devices))
tf.config.experimental.set_memory_growth(physical_devices[0], True)

GPUs Available:  1


In [25]:
model = Sequential([
    Dense(units=16, input_shape=(14,), activation='relu'),
    Dense(units=32, activation='relu'),
    Dense(units=2, activation='softmax')
])

In [26]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 16)                240       
_________________________________________________________________
dense_1 (Dense)              (None, 32)                544       
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 66        
Total params: 850
Trainable params: 850
Non-trainable params: 0
_________________________________________________________________


In [27]:
model.compile(optimizer=Adam(learning_rate=0.0001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [28]:
model.fit(x=X_train, y=y_train, batch_size=10, epochs=100, shuffle=True, verbose=2)

Epoch 1/100
21/21 - 1s - loss: 0.7642 - accuracy: 0.4581
Epoch 2/100
21/21 - 0s - loss: 0.7486 - accuracy: 0.4828
Epoch 3/100
21/21 - 0s - loss: 0.7341 - accuracy: 0.4926
Epoch 4/100
21/21 - 0s - loss: 0.7200 - accuracy: 0.4975
Epoch 5/100
21/21 - 0s - loss: 0.7069 - accuracy: 0.5320
Epoch 6/100
21/21 - 0s - loss: 0.6951 - accuracy: 0.5567
Epoch 7/100
21/21 - 0s - loss: 0.6837 - accuracy: 0.5665
Epoch 8/100
21/21 - 0s - loss: 0.6731 - accuracy: 0.5862
Epoch 9/100
21/21 - 0s - loss: 0.6630 - accuracy: 0.6256
Epoch 10/100
21/21 - 0s - loss: 0.6537 - accuracy: 0.6256
Epoch 11/100
21/21 - 0s - loss: 0.6446 - accuracy: 0.6650
Epoch 12/100
21/21 - 0s - loss: 0.6357 - accuracy: 0.6798
Epoch 13/100
21/21 - 0s - loss: 0.6271 - accuracy: 0.7044
Epoch 14/100
21/21 - 0s - loss: 0.6187 - accuracy: 0.7192
Epoch 15/100
21/21 - 0s - loss: 0.6101 - accuracy: 0.7192
Epoch 16/100
21/21 - 0s - loss: 0.6018 - accuracy: 0.7389
Epoch 17/100
21/21 - 0s - loss: 0.5936 - accuracy: 0.7438
Epoch 18/100
21/21 - 0s

<keras.callbacks.History at 0x28114994ca0>

In [29]:
y_test

44     1
236    0
140    1
302    0
8      1
      ..
102    1
97     1
177    0
206    0
51     1
Name: output, Length: 100, dtype: int64

In [30]:
prediction = model.predict(X_test, batch_size=10, verbose=0)

In [31]:
for i in prediction:
    print(i)

[0.03470256 0.9652974 ]
[0.7179676  0.28203237]
[0.00816655 0.99183345]
[0.06086031 0.9391397 ]
[0.4340659 0.5659341]
[0.993182   0.00681803]
[0.5058885  0.49411145]
[0.92886895 0.07113105]
[0.13429298 0.865707  ]
[0.14896056 0.8510394 ]
[0.10475782 0.89524215]
[0.5267192  0.47328076]
[0.09458172 0.9054183 ]
[0.9981976  0.00180245]
[0.00346683 0.99653316]
[0.6042161  0.39578387]
[0.04852699 0.95147294]
[0.36584735 0.63415265]
[0.8118349  0.18816514]
[0.02200674 0.97799325]
[0.44133633 0.55866367]
[0.24059089 0.7594091 ]
[0.15411665 0.8458833 ]
[0.7105405  0.28945956]
[0.1730354  0.82696456]
[0.3846475 0.6153525]
[0.37366965 0.6263304 ]
[0.992281   0.00771899]
[0.9943066  0.00569341]
[0.58691204 0.41308796]
[0.24082184 0.75917816]
[9.9905902e-01 9.4093126e-04]
[0.09537961 0.90462035]
[0.03380525 0.96619475]
[0.02050092 0.97949904]
[0.9239652  0.07603476]
[0.9778131  0.02218687]
[0.99393857 0.00606144]
[0.99614406 0.00385589]
[0.41889662 0.5811033 ]
[0.10196324 0.8980368 ]
[0.11435015 0.

In [32]:
rounded_prediction = np.argmax(prediction, axis=-1)

In [36]:
accuracy = accuracy_score(y_test, rounded_prediction)
accuracy

0.83