# Imports

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
from sklearn.model_selection import train_test_split
import tensorflow as tf
sb.set()
%matplotlib inline

# Get Data

In [2]:
df = pd.read_csv('heart.csv')
df.head(5)

Unnamed: 0,row.names,sbp,tobacco,ldl,adiposity,famhist,typea,obesity,alcohol,age,chd
0,1,160,12.0,5.73,23.11,Present,49,25.3,97.2,52,1
1,2,144,0.01,4.41,28.61,Absent,55,28.87,2.06,63,1
2,3,118,0.08,3.48,32.28,Present,52,29.14,3.81,46,0
3,4,170,7.5,6.41,38.03,Present,51,31.99,24.26,58,1
4,5,134,13.6,3.5,27.78,Present,60,25.99,57.34,49,1


In [3]:
df.pop('row.names')
df.shape

(462, 10)

In [4]:
df.head(5)

Unnamed: 0,sbp,tobacco,ldl,adiposity,famhist,typea,obesity,alcohol,age,chd
0,160,12.0,5.73,23.11,Present,49,25.3,97.2,52,1
1,144,0.01,4.41,28.61,Absent,55,28.87,2.06,63,1
2,118,0.08,3.48,32.28,Present,52,29.14,3.81,46,0
3,170,7.5,6.41,38.03,Present,51,31.99,24.26,58,1
4,134,13.6,3.5,27.78,Present,60,25.99,57.34,49,1


In [5]:
def handle_non_num_data(df):
    columns = df.columns.values

    for column in columns:
        text_digit_values = {}

        def convert_to_int(val):
            return text_digit_values[val]

        if df[column].dtype != np.int64 and df[column].dtype != np.float64:
            column_cont = df[column].values.tolist()
            unique_elem = set(column_cont)

            x = 0
            for unique in unique_elem:
                if unique not in text_digit_values:
                    text_digit_values[unique] = x
                    x+=1
            df[column] = list(map(convert_to_int, df[column]))

    return df
df = handle_non_num_data(df)
df.head(5)

Unnamed: 0,sbp,tobacco,ldl,adiposity,famhist,typea,obesity,alcohol,age,chd
0,160,12.0,5.73,23.11,1,49,25.3,97.2,52,1
1,144,0.01,4.41,28.61,0,55,28.87,2.06,63,1
2,118,0.08,3.48,32.28,1,52,29.14,3.81,46,0
3,170,7.5,6.41,38.03,1,51,31.99,24.26,58,1
4,134,13.6,3.5,27.78,1,60,25.99,57.34,49,1


# split Data

In [6]:
X=df.drop(['chd'],axis=1)
y=df['chd']
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=.45,random_state=120)
ytr= pd.DataFrame({'chd' : y_train})
yte= pd.DataFrame({'chd' : y_test})

# Create and Save as CSV File

In [7]:
trainDF = pd.concat([X_train, ytr], axis=1, join='inner')
trainDF.head()

Unnamed: 0,sbp,tobacco,ldl,adiposity,famhist,typea,obesity,alcohol,age,chd
39,136,11.2,5.81,31.85,1,75,27.68,22.94,58,1
310,174,3.5,5.26,21.97,1,36,22.04,8.33,59,1
275,136,8.8,4.69,36.07,1,38,26.56,2.78,63,1
98,122,3.2,11.32,35.36,1,55,27.07,0.0,51,1
193,130,0.0,4.89,25.98,0,72,30.42,14.71,23,0


In [8]:
testDF = pd.concat([X_test, yte], axis=1, join='inner')
testDF.head()

Unnamed: 0,sbp,tobacco,ldl,adiposity,famhist,typea,obesity,alcohol,age,chd
380,112,1.44,2.71,22.92,0,59,24.81,0.0,52,0
207,124,4.0,6.65,30.84,1,54,28.4,33.51,60,0
121,120,1.4,6.25,20.47,0,60,25.85,8.51,28,0
262,124,1.04,2.84,16.42,1,46,20.17,0.0,61,0
210,134,0.05,8.03,27.95,0,48,26.88,0.0,60,0


In [9]:
trainDF.shape

(254, 10)

In [10]:
testDF.shape

(208, 10)

In [11]:
trainDF.to_csv(r'train.csv', index=False, header=True)
testDF.to_csv(r'test.csv', index=False, header=True)

# Create, Fit, and Compile Model

In [12]:
print("--Make model--")
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(9,  kernel_regularizer=tf.keras.regularizers.L2(0.001), input_shape=(9,), activation='tanh'))
model.add(tf.keras.layers.Dropout(0.3))
model.add(tf.keras.layers.Dense(1,  kernel_regularizer=tf.keras.regularizers.L2(0.001), activation='tanh'))
model.add(tf.keras.layers.Dropout(0.3))
model.add(tf.keras.layers.Dense(2,  kernel_regularizer=tf.keras.regularizers.L2(0.001), activation='tanh'))
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
model.summary()

--Make model--
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 9)                 90        
                                                                 
 dropout (Dropout)           (None, 9)                 0         
                                                                 
 dense_1 (Dense)             (None, 1)                 10        
                                                                 
 dropout_1 (Dropout)         (None, 1)                 0         
                                                                 
 dense_2 (Dense)             (None, 2)                 4         
                                                                 
 dropout_2 (Dropout)         (None, 2)                 0         
                                                                 
 dense_3 (Dense)             (None, 1)   

In [13]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['binary_accuracy'])
print("--Fit model--")
model.fit(X_train, y_train, epochs=12, batch_size=10, verbose=2)

--Fit model--
Epoch 1/12
26/26 - 1s - loss: 7.3834 - binary_accuracy: 0.5472 - 1s/epoch - 40ms/step
Epoch 2/12
26/26 - 0s - loss: 5.3026 - binary_accuracy: 0.5394 - 107ms/epoch - 4ms/step
Epoch 3/12
26/26 - 0s - loss: 3.0708 - binary_accuracy: 0.5039 - 106ms/epoch - 4ms/step
Epoch 4/12
26/26 - 0s - loss: 2.1949 - binary_accuracy: 0.5433 - 106ms/epoch - 4ms/step
Epoch 5/12
26/26 - 0s - loss: 1.5203 - binary_accuracy: 0.5551 - 106ms/epoch - 4ms/step
Epoch 6/12
26/26 - 0s - loss: 1.3982 - binary_accuracy: 0.5512 - 109ms/epoch - 4ms/step
Epoch 7/12
26/26 - 0s - loss: 1.5526 - binary_accuracy: 0.5433 - 109ms/epoch - 4ms/step
Epoch 8/12
26/26 - 0s - loss: 1.2602 - binary_accuracy: 0.5787 - 112ms/epoch - 4ms/step
Epoch 9/12
26/26 - 0s - loss: 0.9877 - binary_accuracy: 0.5906 - 101ms/epoch - 4ms/step
Epoch 10/12
26/26 - 0s - loss: 1.0491 - binary_accuracy: 0.5472 - 109ms/epoch - 4ms/step
Epoch 11/12
26/26 - 0s - loss: 0.7683 - binary_accuracy: 0.6575 - 106ms/epoch - 4ms/step
Epoch 12/12
26/26 

<keras.callbacks.History at 0x1f556516080>

In [14]:
print("--Evaluate model--")
model_loss1, model_acc1 = model.evaluate(X_train,  y_train, verbose=2)
model_loss2, model_acc2 = model.evaluate(X_test,  y_test, verbose=2)
print(f"Train / Test Accuracy: {model_acc1*100:.1f}% / {model_acc2*100:.1f}%")

--Evaluate model--
8/8 - 0s - loss: 0.6767 - binary_accuracy: 0.6260 - 135ms/epoch - 17ms/step
7/7 - 0s - loss: 0.6610 - binary_accuracy: 0.6875 - 31ms/epoch - 4ms/step
Train / Test Accuracy: 62.6% / 68.8%
