<a href="https://colab.research.google.com/github/KonstantinosGkoutis/email-app-/blob/main/fcc_predict_health_costs_with_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

In [None]:
# Load the dataset
df = pd.read_csv('healthcare_costs.csv')

In [None]:
# Preprocessing: Convert categorical data to numerical data
categorical_columns = ['sex', 'smoker', 'region']
numerical_columns = ['age', 'bmi', 'children']

In [None]:
# One-hot encode categorical variables
preprocessor = ColumnTransformer([
    ('onehot', OneHotEncoder(drop='first'), categorical_columns),
    ('scale', StandardScaler(), numerical_columns)
])

# Prepare features and labels
X = df.drop(columns=['expenses'])
y = df['expenses']

# Apply transformations
X_transformed = preprocessor.fit_transform(X)

# Split the data into training (80%) and testing (20%) sets
train_dataset, test_dataset, train_labels, test_labels = train_test_split(
    X_transformed, y, test_size=0.2, random_state=42
)

# Build the regression model
model = Sequential([
    Dense(64, activation='relu', input_shape=(train_dataset.shape[1],)),
    Dense(32, activation='relu'),
    Dense(16, activation='relu'),
    Dense(1)  # Single output neuron for regression
])

# Compile the model
model.compile(optimizer='adam', loss='mae', metrics=['mae'])

# Train the model
history = model.fit(train_dataset, train_labels, epochs=100, batch_size=32, validation_split=0.2, verbose=0)

# Evaluate the model
test_loss, test_mae = model.evaluate(test_dataset, test_labels, verbose=0)
print(f"Mean Absolute Error on Test Data: ${test_mae:.2f}")

# Check if the MAE requirement is met
if test_mae < 3500:
    print("Challenge Passed! Model meets the required accuracy.")
else:
    print("Challenge Failed. Try adjusting the model.")

# Predict expenses on the test dataset
predicted_expenses = model.predict(test_dataset).flatten()

# Plot actual vs predicted expenses
import matplotlib.pyplot as plt

plt.scatter(test_labels, predicted_expenses, alpha=0.5)
plt.xlabel("Actual Expenses")
plt.ylabel("Predicted Expenses")
plt.title("Actual vs Predicted Healthcare Costs")
plt.plot([min(test_labels), max(test_labels)], [min(test_labels), max(test_labels)], color='red')
plt.show()
