In [1]:
# Import required libraries 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf

In [2]:
# Load data set
# Read in the insurance dataset from github repository
url = "https://raw.githubusercontent.com/stedy/Machine-Learning-with-R-datasets/master/insurance.csv"
insurance = pd.read_csv(url)
insurance.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


In [None]:
# See info
insurance.info()

In [None]:
# see summary statistics and transpose output
insurance.describe().T

In [None]:
# Explore correlation between data and plot correlation matrix heatmap
insurance.corr()
dataplot = sns.heatmap(insurance.corr(), cmap='YlGnBu', annot = True)

In [None]:
fig, axes = plt.subplots(3,2,dpi=100,figsize=(10,8))
sns.scatterplot(x='age', y='charges', data = insurance, hue = 'smoker',ax = axes[0][0])
sns.scatterplot(x='age', y='charges', data = insurance, hue = 'bmi', ax = axes[1][0])
sns.scatterplot(x='age', y='charges', data = insurance, hue = 'region',ax = axes[2][0])

sns.scatterplot(x='bmi', y='charges', data = insurance, hue = 'smoker', ax = axes[0][1])
sns.scatterplot(x='bmi', y='charges', data = insurance, hue = 'age', ax = axes[1][1])
sns.scatterplot(x='bmi', y='charges', data = insurance, hue = 'region', ax = axes[2][1])

plt.tight_layout()


In [None]:
# one-hot encode categorical features into arrays
#insurance_onehot_encoding = pd.get_dummies(insurance)
#insurance_onehot_encoding.head()

In [None]:
# one-hot encode categorical features into arrays
#insurance_onehot_encoding_reduced = pd.get_dummies(insurance, drop_first = True,)
#insurance_onehot_encoding_reduced.head()

In [None]:
# Split data into features (independent variable) and label (dependent variable)
X = insurance.drop(['charges','region'], axis = 1) #ignore region feature as not corralated
y = insurance['charges']

In [None]:
X.head()

In [None]:
y.head()

In [None]:
# Create train and test data sets
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)

In [None]:
# data preparartion with scaling/normalization

from sklearn.compose import make_column_transformer
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder

# Create a column transformer 

ct = make_column_transformer(

    (MinMaxScaler(),['bmi','age','children']), # turn all values in these columns in numbers between 0 and 1
    (OneHotEncoder(handle_unknown='ignore'),['sex','smoker','region'])
    
)


In [None]:
#Fit column transformer to training data
# data preparartion with scaling/normalization

from sklearn.compose import make_column_transformer
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder

# Create a column transformer 

ct = make_column_transformer(

    (MinMaxScaler(),['bmi','age','children']), # turn all values in these columns in numbers between 0 and 1
    (OneHotEncoder(handle_unknown='ignore', drop = 'first'),['sex','smoker'])
    
)


In [None]:
# fit the column transformer to the training data only (test data should not be revealed and used)
ct.fit(X_train)

In [None]:
# transform both train and test data sets with normalization (MinMaxScaler) and one-hot encoding
X_train_normal = ct.transform(X_train)
X_test_normal = ct.transform(X_test)

In [None]:
type(X_train_normal)

In [None]:
X_train_normal.shape

In [None]:
# Build neural network 
# set random seed 
tf.random.set_seed(42)
# 1 Create the model
insurance_model = tf.keras.Sequential([
    tf.keras.Input(shape=(5,)),
    tf.keras.layers.Dense(10, name = 'FirstLayer'),
    tf.keras.layers.Dense(1, name = 'SecondLayer'),
    

])

# 2. Compile the model

insurance_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.1),
    loss=tf.keras.losses.mae,
    metrics=['mae'])

# 3. Fit model
#set early stop callback
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', min_delta=0.01, patience=20)

history = insurance_model.fit(x = X_train_normal,  y = y_train, epochs=500, verbose=1,callbacks=[callback])

In [None]:
insurance_model.summary()

In [None]:
tf.keras.utils.plot_model(insurance_model,
    show_shapes=True,
    show_layer_names=True,
    rankdir='TB',
    expand_nested=True,
    dpi=100)


In [None]:
insurance_model.evaluate(x = X_test_normal, y = y_test)

In [None]:
# plot history or loss curve/ training curve
df = pd.DataFrame(history.history)
df.head()

In [None]:
plt.figure(dpi=100)
plt.plot(df['loss'])
plt.ylabel('Loss')
plt.xlabel('epochs')