In [34]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [35]:
ds = pd.read_csv("./Datasets/insurance.csv")

In [36]:
ds.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


In [37]:
ds_one_hot = pd.get_dummies(ds)

In [38]:
ds_one_hot.head(10)
# ds

Unnamed: 0,age,bmi,children,charges,sex_female,sex_male,smoker_no,smoker_yes,region_northeast,region_northwest,region_southeast,region_southwest
0,19,27.9,0,16884.924,1,0,0,1,0,0,0,1
1,18,33.77,1,1725.5523,0,1,1,0,0,0,1,0
2,28,33.0,3,4449.462,0,1,1,0,0,0,1,0
3,33,22.705,0,21984.47061,0,1,1,0,0,1,0,0
4,32,28.88,0,3866.8552,0,1,1,0,0,1,0,0
5,31,25.74,0,3756.6216,1,0,1,0,0,0,1,0
6,46,33.44,1,8240.5896,1,0,1,0,0,0,1,0
7,37,27.74,3,7281.5056,1,0,1,0,0,1,0,0
8,37,29.83,2,6406.4107,0,1,1,0,1,0,0,0
9,60,25.84,0,28923.13692,1,0,1,0,0,1,0,0


In [39]:
ds.shape, ds_one_hot.shape


((1338, 7), (1338, 12))

In [40]:
x = ds_one_hot.drop("charges", axis=1)
y = ds_one_hot["charges"]


In [41]:
x.head()

Unnamed: 0,age,bmi,children,sex_female,sex_male,smoker_no,smoker_yes,region_northeast,region_northwest,region_southeast,region_southwest
0,19,27.9,0,1,0,0,1,0,0,0,1
1,18,33.77,1,0,1,1,0,0,0,1,0
2,28,33.0,3,0,1,1,0,0,0,1,0
3,33,22.705,0,0,1,1,0,0,1,0,0
4,32,28.88,0,0,1,1,0,0,1,0,0


In [42]:
y.head()

0    16884.92400
1     1725.55230
2     4449.46200
3    21984.47061
4     3866.85520
Name: charges, dtype: float64

In [43]:
x_tr, x_ts, y_tr, y_ts = train_test_split(
    x, y, test_size=0.20, random_state=42)


In [44]:
x_tr.head()

Unnamed: 0,age,bmi,children,sex_female,sex_male,smoker_no,smoker_yes,region_northeast,region_northwest,region_southeast,region_southwest
560,46,19.95,2,1,0,1,0,0,1,0,0
1285,47,24.32,0,1,0,1,0,1,0,0,0
1142,52,24.86,0,1,0,1,0,0,0,1,0
969,39,34.32,5,1,0,1,0,0,0,1,0
486,54,21.47,3,1,0,1,0,0,1,0,0


In [45]:
x_tr.head()

Unnamed: 0,age,bmi,children,sex_female,sex_male,smoker_no,smoker_yes,region_northeast,region_northwest,region_southeast,region_southwest
560,46,19.95,2,1,0,1,0,0,1,0,0
1285,47,24.32,0,1,0,1,0,1,0,0,0
1142,52,24.86,0,1,0,1,0,0,0,1,0
969,39,34.32,5,1,0,1,0,0,0,1,0
486,54,21.47,3,1,0,1,0,0,1,0,0


In [46]:
y_tr.head()

560      9193.83850
1285     8534.67180
1142    27117.99378
969      8596.82780
486     12475.35130
Name: charges, dtype: float64

In [47]:
x_ts.head()

Unnamed: 0,age,bmi,children,sex_female,sex_male,smoker_no,smoker_yes,region_northeast,region_northwest,region_southeast,region_southwest
764,45,25.175,2,1,0,1,0,1,0,0,0
887,36,30.02,0,1,0,1,0,0,1,0,0
890,64,26.885,0,1,0,0,1,0,1,0,0
1293,46,25.745,3,0,1,1,0,0,1,0,0
259,19,31.92,0,0,1,0,1,0,1,0,0


In [48]:
y_ts.head()

764      9095.06825
887      5272.17580
890     29330.98315
1293     9301.89355
259     33750.29180
Name: charges, dtype: float64

In [49]:
len(x_tr), len(x_ts), len(y_tr), len(y_ts), len(x), len(y)


(1070, 268, 1070, 268, 1338, 1338)

# 1. Model Set up


In [50]:
tf.random.set_seed(42)

In [51]:
insr_model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(90, activation="relu"),
    tf.keras.layers.Dense(90, activation="relu"),
    tf.keras.layers.Dense(100, activation="relu"),
    tf.keras.layers.Dense(1, activation="relu")
])


In [52]:
insr_model.compile(
    loss=tf.keras.losses.mae,
    optimizer=tf.optimizers.Adamax(learning_rate=0.01),
    metrics=tf.metrics.mean_absolute_error
)

In [53]:
insr_model.fit(tf.expand_dims(x_tr, axis=-1), y_tr, epochs=190, verbose=0)


<keras.callbacks.History at 0x1d6ec2fe320>

In [54]:
insr_model.evaluate(x_ts, y_ts)




[8543.3125, 8543.3125]

# Improving the model accuracy


In [55]:
tf.random.set_seed(42)

In [56]:
insr_model_impr = tf.keras.models.Sequential([
    tf.keras.layers.Dense(228, activation="relu"),
    tf.keras.layers.Dense(128, activation="relu"),
    tf.keras.layers.Dense(156, activation="relu"),
    tf.keras.layers.Dense(256, activation="relu"),
    tf.keras.layers.Dense(128, activation="relu"),
    tf.keras.layers.Dense(1, activation="relu"),
])


In [57]:
insr_model_impr.compile(
    loss=tf.losses.mae,
    optimizer=tf.optimizers.Adamax(learning_rate=0.002),
    metrics=tf.metrics.mean_absolute_error
)

In [58]:
insr_model_impr.fit(tf.expand_dims(x_tr, axis=-1), y_tr, epochs=70, verbose=0)


<keras.callbacks.History at 0x1d6ee420700>

In [59]:
insr_model_impr.evaluate(x_ts, y_ts)




[12968.3173828125, 12968.3173828125]