In [None]:
# Import libraries. You may or may not use all of these.
!pip install -q git+https://github.com/tensorflow/docs
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers

import tensorflow_docs as tfdocs
import tensorflow_docs.plots
import tensorflow_docs.modeling

In [None]:
# Import data
!wget https://cdn.freecodecamp.org/project-data/health-costs/insurance.csv
dataset = pd.read_csv('insurance.csv')
dataset.tail()

In [None]:
dataset['sex'].replace(('male', 'female'),  (1,0), inplace=True)
dataset['smoker'].replace(('no','yes'), (0,1), inplace = True)
dataset = pd.get_dummies(dataset, columns=['region'])
dataset = dataset.astype(int)
dataset.tail()

In [None]:
train_dataset = dataset.sample(frac=0.8,random_state=0)
test_dataset = dataset.drop(train_dataset.index)

train_features = train_dataset.copy()
test_features = test_dataset.copy()
train_labels = train_features.pop('expenses')
test_labels = test_features.pop('expenses')

In [None]:
def norm_data(df):
  return (df-df.min())/(df.max()-df.min())

train_features_norm = norm_data(train_features)
test_features_norm = norm_data(test_features)
train_features_norm.head()


In [None]:
train_features_arr = np.array(train_features_norm)
test_features_arr = np.array(test_features_norm)
train_labels_arr = np.array(train_labels)
test_labels_arr = np.array(test_labels)

#normalizer = layers.experimental.preprocessing.Normalization()
#normalizer.adapt(np.array(train_dataset))

model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(train_features_arr.shape[1],)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(1)
])
model.summary()

In [None]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.1),
    loss='mean_absolute_error',
    metrics = ['mae','mse']
)

In [None]:
model.fit(
    x=train_features_arr,
    y=train_labels_arr,
    batch_size = 25,
    epochs = 100,
    validation_split = 0.2
)

In [None]:
# RUN THIS CELL TO TEST YOUR MODEL. DO NOT MODIFY CONTENTS.
# Test model by checking how well the model generalizes using the test set.
loss, mae, mse = model.evaluate(test_features_arr, test_labels_arr, verbose=2)

print("Testing set Mean Abs Error: {:5.2f} expenses".format(mae))

if mae < 3500:
  print("You passed the challenge. Great job!")
else:
  print("The Mean Abs Error must be less than 3500. Keep trying.")

test_predictions = model.predict(test_features_arr).flatten()


a = plt.axes(aspect='equal')

plt.scatter(test_labels, test_predictions, label='Predictions', color='blue')

plt.scatter(test_labels, test_labels, label='True values', color='red')

plt.xlabel('True values (expenses)')
plt.ylabel('Predictions (expenses)')
lims = [0, 50000]
plt.xlim(lims)
plt.ylim(lims)

_ = plt.plot(lims, lims, color='black', linestyle='--')

plt.legend()

plt.show()


In [None]:
new_input = np.array([[5000, 25, 3, 1, 4, 100, 12, 200, 1]])

new_input_normalized = normalize(new_input)

prediction = model.predict(new_input_normalized)

print("Predicted value (expenses):", prediction[0])