In [2]:
import pandas as pd
import tensorflow as tf
import tensorflow.keras as keras
import time

In [4]:
csv_file = tf.keras.utils.get_file("heart.csv", "https://storage.googleapis.com/download.tensorflow.org/data/heart.csv")

df = pd.read_csv(csv_file)
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,1,145,233,1,2,150,0,2.3,3,0,fixed,0
1,67,1,4,160,286,0,2,108,1,1.5,2,3,normal,1
2,67,1,4,120,229,0,2,129,1,2.6,2,2,reversible,0
3,37,1,3,130,250,0,0,187,0,3.5,3,0,normal,0
4,41,0,2,130,204,0,2,172,0,1.4,1,0,normal,0


In [7]:
df.thal = pd.Categorical(df.thal)
df.thal = df.thal.cat.codes

In [8]:
train_dataset = df.sample(frac = 0.8, random_state = 0)
test_dataset = df.drop(train_dataset.index)

train_labels = train_dataset.pop("target")  # does this in place 
test_labels = test_dataset.pop("target")

In [9]:
def norm(x, train_stats):
    # scale data, x = dataset to scale, train_stats is description of dataset
    return(x - train_stats["mean"]) / train_stats["std"]

In [11]:
train_stats = train_dataset.describe().T

normed_train_data = norm(train_dataset, train_stats)

normed_train_data.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
225,0.351009,-1.420082,0.89547,-0.11511,-0.934323,-0.389553,-1.035919,-0.778745,-0.668765,-0.391781,0.687374,-0.744473,-0.44413
152,-1.07386,0.701275,0.89547,0.54683,1.213225,-0.389553,0.985794,-0.087162,1.489116,-0.919976,0.687374,2.434865,1.113543
228,0.022193,-1.420082,0.89547,-0.225433,-0.780926,-0.389553,-0.025063,-0.821968,1.489116,0.840674,0.687374,0.315306,1.113543
201,-1.29307,0.701275,0.89547,-0.942535,1.098178,-0.389553,-1.035919,1.38245,-0.668765,0.136414,0.687374,-0.744473,-0.44413
52,-0.197018,-1.420082,0.89547,-0.11511,0.350371,-0.389553,0.985794,-0.260058,-0.668765,-0.567846,0.687374,-0.744473,-0.44413


In [13]:
test_stats = test_dataset.describe().T

normed_test_data = norm(test_dataset, test_stats)

In [15]:
model = keras.Sequential([
    # input shape is len of col of train data in 1d array form
    keras.layers.Dense(64, activation ="relu", input_shape = [len(normed_train_data.keys())]),
    keras.layers.Dense(64, activation = "relu"),
    keras.layers.Dense(1)
])

optimizer = keras.optimizers.RMSprop(0.001)
model.compile(loss = "mse", optimizer = optimizer,
             metrics = ["mse", "mae"])

In [16]:
history = model.fit(normed_train_data, train_labels, epochs = 10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [27]:
y_pred = model.predict(normed_test_data)

In [28]:
# y_pred = [x[0] for x in y_pred]

keras.losses.MAE(test_labels.values, y_pred)

<tf.Tensor: shape=(61,), dtype=float32, numpy=
array([0.60027325, 0.6386521 , 0.30599478, 0.3189914 , 0.29377082,
       0.4617992 , 0.37352636, 0.28545448, 0.5193871 , 0.4267507 ,
       0.41621056, 0.57659566, 0.31448594, 0.3102503 , 0.36038443,
       0.67665935, 0.78468776, 0.33418894, 0.6072789 , 0.4323941 ,
       0.81607646, 0.47127092, 0.43307233, 0.30169943, 0.409288  ,
       0.43157265, 0.36134693, 0.30971983, 0.3011577 , 0.32135937,
       0.281767  , 0.44444364, 0.29489473, 0.29056332, 0.3385855 ,
       0.46793053, 0.61990017, 0.35353422, 0.33451897, 0.35336122,
       0.31072053, 0.3656328 , 0.35109276, 0.33535376, 0.46225336,
       0.4721593 , 0.36538088, 0.3686503 , 0.49647138, 0.28912616,
       0.5095542 , 0.30722314, 0.5790323 , 0.28602386, 0.40134925,
       0.6278302 , 0.36732483, 0.66884536, 0.54064155, 0.3075116 ,
       0.39769238], dtype=float32)>

In [29]:
model.evaluate(normed_test_data, test_labels)



[0.16178062558174133, 0.16178062558174133, 0.29982826113700867]

**Batching and Prefetching**

In [34]:
model = keras.Sequential([
    # input shape is len of col of train data in 1d array form
    keras.layers.Dense(64, activation ="relu", input_shape = [len(normed_train_data.keys())]),
    keras.layers.Dense(64, activation = "relu"),
    keras.layers.Dense(1)
])

optimizer = keras.optimizers.RMSprop(0.001)
model.compile(loss = "mse", optimizer = optimizer,
             metrics = ["mse", "mae"])

In [35]:
# batching
dataset = tf.data.Dataset.from_tensor_slices(
    (normed_train_data.values, train_labels.values)
).batch(10)

history = model.fit(dataset, epochs = 100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [36]:
model = keras.Sequential([
    # input shape is len of col of train data in 1d array form
    keras.layers.Dense(64, activation ="relu", input_shape = [len(normed_train_data.keys())]),
    keras.layers.Dense(64, activation = "relu"),
    keras.layers.Dense(1)
])

optimizer = keras.optimizers.RMSprop(0.001)
model.compile(loss = "mse", optimizer = optimizer,
             metrics = ["mse", "mae"])

In [37]:
# prefetching
dataset = tf.data.Dataset.from_tensor_slices(
    (normed_train_data.values, train_labels.values)
).batch(10).prefetch(2)  # prefetches 2 batches at a time

history = model.fit(dataset, epochs = 100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

**Parallelizing data extraction**

In [38]:
# mostly used when needed data is stored remotely
tf.data.Dataset.interleave(
    dataset,
    num_parallel_calls=tf.data.experimental.AUTOTUNE  # auto decide optimal num threads to use
)

TypeError: interleave() missing 1 required positional argument: 'map_func'