In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# FCC dataset is preloaded as 'insurance'; else load manually
# insurance = pd.read_csv('insurance.csv')

In [None]:
# Convert categorical variables to numeric
categorical_cols = ['sex', 'smoker', 'region']
insurance_processed = pd.get_dummies(insurance, columns=categorical_cols, drop_first=True)

# Split dataset into train/test
train_dataset, test_dataset = train_test_split(insurance_processed, test_size=0.2, random_state=42)

# Pop 'expenses' column for labels
train_labels = train_dataset.pop('expenses')
test_labels = test_dataset.pop('expenses')

In [None]:
model = LinearRegression()
model.fit(train_dataset, train_labels)

In [None]:
predictions = model.predict(test_dataset)
mae = mean_absolute_error(test_labels, predictions)
print('Mean Absolute Error:', mae)

In [None]:
plt.figure(figsize=(8,6))
sns.scatterplot(x=test_labels, y=predictions)
plt.xlabel('Actual Expenses')
plt.ylabel('Predicted Expenses')
plt.title('Actual vs Predicted Health Expenses')
plt.show()