In [None]:
# Import libraries. You may or may not use all of these.

!pip install -q git+https://github.com/tensorflow/docs
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

try:
  %tensorflow_version 2.x
except Exception:
  pass

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_docs as tfdocs
import tensorflow_docs.plots
import tensorflow_docs.modeling

!wget https://cdn.freecodecamp.org/project-data/health-costs/insurance.csv
dataset = pd.read_csv('insurance.csv')
dataset.head()



In [None]:
dataset = pd.get_dummies(dataset, columns=['sex', 'smoker', 'region'])
train_dataset = dataset.sample(frac=0.8, random_state=0)
test_dataset = dataset.drop(train_dataset.index)

train_labels = train_dataset.pop('expenses')
test_labels = test_dataset.pop('expenses')

numeric_cols = ['age', 'bmi', 'children']
train_stats_features = train_dataset[numeric_cols].describe().transpose()

def norm_features(df):
    df = df.copy()
    for col in numeric_cols:
        mean = train_stats_features.loc[col, 'mean']
        std = train_stats_features.loc[col, 'std']
        if std == 0:
            std = 1
        df[col] = (df[col] - mean) / std
    return df
train_dataset = norm_features(train_dataset)
test_dataset = norm_features(test_dataset)
train_stats_labels = train_labels.describe()
label_mean = train_stats_labels['mean']
label_std = train_stats_labels['std']

train_labels_norm = (train_labels - label_mean) / label_std
test_labels_norm = (test_labels - label_mean) / label_std


In [None]:
def verificar_datos(train_dataset, test_dataset, train_labels_norm, test_labels_norm, train_stats_features, label_std):
    print("üîé Verificando datasets...\n")

    print("Train NaNs:\n", train_dataset.isna().sum())
    print("Test NaNs:\n", test_dataset.isna().sum())
    print("Train labels NaNs:", train_labels_norm.isna().sum())
    print("Test labels NaNs:", test_labels_norm.isna().sum())

    print("\nüìä Estad√≠sticas de features:")
    print(train_stats_features)

    if any(train_stats_features['std'] == 0):
        print("\n‚ö†Ô∏è Atenci√≥n: Hay columnas con std = 0 (posible divisi√≥n por cero).")
    else:
        print("\n‚úÖ Todas las columnas tienen std > 0.")

    print("\nLabel std:", label_std)
    if label_std == 0:
        print("‚ö†Ô∏è Atenci√≥n: La desviaci√≥n est√°ndar de las etiquetas es 0.")
    else:
        print("‚úÖ Label std correcto.")

verificar_datos(train_dataset, test_dataset, train_labels_norm, test_labels_norm, train_stats_features, label_std)


In [None]:
def build_model():
    model = keras.Sequential([
        keras.Input(shape=(len(train_dataset.keys()),)),
        layers.Dense(64, activation='relu'),
        layers.Dense(64, activation='relu'),
        layers.Dense(1)
    ])

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        loss='mse',
        metrics=['mae', 'mse']
    )
    return model

model = build_model()

early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_mae', patience=50)
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_mae', factor=0.5, patience=20)

history = model.fit(
    train_dataset, train_labels_norm,
    epochs=500, validation_split=0.2, verbose=1,
    callbacks=[tfdocs.modeling.EpochDots(), early_stop, reduce_lr]
)


In [None]:

test_predictions_norm = model.predict(test_dataset).flatten()

test_predictions = (test_predictions_norm * label_std) + label_mean

loss, mae_norm, mse = model.evaluate(test_dataset, test_labels_norm, verbose=2)

mae_real = mae_norm * label_std
print("Testing set Mean Abs Error: {:5.2f} expenses".format(mae_real))

if mae_real < 3500:
    print("You passed the challenge. Great job!")
else:
    print("The Mean Abs Error must be less than 3500. Keep trying.")

plt.figure(figsize=(6,6))
plt.scatter(test_labels, test_predictions)
plt.xlabel('True values (expenses)')
plt.ylabel('Predictions (expenses)')
lims = [0, 50000]
plt.xlim(lims)
plt.ylim(lims)
plt.plot(lims, lims, color='red')
plt.show()
