In [None]:
%pip install matplotlib numpy pandas tensorflow-cpu tensorflow-docs pydot-ng graphviz

In [None]:
# Import libraries. You may or may not use all of these.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf

from tensorflow import keras
from keras import layers

import tensorflow_docs as tfdocs
import tensorflow_docs.plots
import tensorflow_docs.modeling

import os.path

In [None]:
# Import data
if not os.path.isfile('insurance.csv'):
	!wget 'https://cdn.freecodecamp.org/project-data/health-costs/insurance.csv'

dataset = pd.read_csv('insurance.csv')
dataset.tail()

In [None]:
# encode categorical data

for column in dataset.columns:
	if dataset[column].dtype == 'object':
		encoder = layers.StringLookup(vocabulary=dataset[column].unique())
		dataset[column] = pd.Series(encoder(dataset[column]).numpy())

display(dataset.head())

In [None]:
# split dataset for training and testing
train_dataset, test_dataset = np.split(dataset, [int(0.8 * len(dataset))])
display(train_dataset.head())
display(test_dataset.head())

In [None]:
# pop off expenses column for labels
train_labels = train_dataset.pop('expenses')
test_labels = test_dataset.pop('expenses')

# display(train_dataset.head())
# display(train_labels.head())

In [None]:
# create normalizer (still unclear what this does or why it helps increase accuracy)
normalizer = tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(np.array(train_dataset))

In [None]:
# build and compile model

model = tf.keras.Sequential([
	normalizer,
	layers.Dense(64),
	layers.Dense(64),
	layers.Dense(64),
	layers.Dense(1)
])

model.compile(
	loss=keras.losses.MeanAbsoluteError(),
	optimizer=keras.optimizers.Adam(),
	metrics=['mae', 'mse']
)

In [None]:
# train model
model.fit(train_dataset, train_labels, epochs=25)

In [None]:
# see how we are doing
model.evaluate(test_dataset, test_labels)

In [None]:
# RUN THIS CELL TO TEST YOUR MODEL. DO NOT MODIFY CONTENTS.
# Test model by checking how well the model generalizes using the test set.
loss, mae, mse = model.evaluate(test_dataset, test_labels, verbose=2)

print("Testing set Mean Abs Error: {:5.2f} expenses".format(mae))

if mae < 3500:
  print("You passed the challenge. Great job!")
else:
  print("The Mean Abs Error must be less than 3500. Keep trying.")

# Plot predictions.
test_predictions = model.predict(test_dataset).flatten()

a = plt.axes(aspect='equal')
plt.scatter(test_labels, test_predictions)
plt.xlabel('True values (expenses)')
plt.ylabel('Predictions (expenses)')
lims = [0, 50000]
plt.xlim(lims)
plt.ylim(lims)
_ = plt.plot(lims,lims)
