In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt

# Load the dataset (assuming the dataset is named 'insurance.csv')
df = pd.read_csv('insurance.csv')

# Display the first few rows of the dataset to understand its structure
df.head()


In [None]:
# Encode categorical columns using LabelEncoder
label_encoder = LabelEncoder()

df['sex'] = label_encoder.fit_transform(df['sex'])
df['smoker'] = label_encoder.fit_transform(df['smoker'])
df['region'] = label_encoder.fit_transform(df['region'])

# Check the first few rows again to confirm the transformation
df.head()


In [None]:
# Split the data into train and test sets (80% train, 20% test)
train_dataset, test_dataset = train_test_split(df, test_size=0.2, random_state=42)

# Separate features and target variable ('expenses') in both train and test sets
train_labels = train_dataset.pop('expenses')
test_labels = test_dataset.pop('expenses')

train_features = train_dataset
test_features = test_dataset

# Display the shape of the train and test sets
print(f"Training dataset shape: {train_features.shape}")
print(f"Test dataset shape: {test_features.shape}")

In [None]:
# Build the model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=[train_features.shape[1]]),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1)  # Output layer for regression (1 value - expenses)
])

# Compile the model
model.compile(optimizer='adam', loss='mean_absolute_error')

In [None]:
# Train the model
history = model.fit(train_features, train_labels, epochs=100, validation_split=0.2, verbose=1)

In [None]:
# Evaluate the model using the test dataset
test_loss = model.evaluate(test_features, test_labels)

print(f"Test Mean Absolute Error: {test_loss}")


In [None]:
# Predict on the test dataset
predictions = model.predict(test_features)

# Plot the actual vs predicted healthcare costs
plt.figure(figsize=(10, 6))
plt.scatter(test_labels, predictions, color='blue', alpha=0.5)
plt.plot([0, max(test_labels)], [0, max(test_labels)], color='red', linewidth=2)  # Line of equality
plt.title("Actual vs Predicted Healthcare Costs")
plt.xlabel("Actual Healthcare Costs")
plt.ylabel("Predicted Healthcare Costs")
plt.show()