# Sections:
# 1. Data Loading
# 2. Data Preparation
# 3. Model Training
# 4. Model Evaluation
# 5. Insights and Visualizations

# Use this notebook for an interactive workflow, combining code, explanations, and outputs.


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
import xgboost as xgb
from sklearn.metrics import mean_squared_error, r2_score

# Load Data
data = pd.read_csv('../data/preprocessed_data.csv')
display(data.head())

# Split Data
X = data.drop(['TotalClaims'], axis=1)
y = data['TotalClaims']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train Models
lr_model = LinearRegression().fit(X_train, y_train)
rf_model = RandomForestRegressor().fit(X_train, y_train)
xgb_model = xgb.XGBRegressor().fit(X_train, y_train)

# Evaluate Models
models = {'Linear Regression': lr_model, 'Random Forest': rf_model, 'XGBoost': xgb_model}

for name, model in models.items():
    predictions = model.predict(X_test)
    mse = mean_squared_error(y_test, predictions)
    r2 = r2_score(y_test, predictions)
    print(f"{name}: MSE = {mse:.2f}, R^2 = {r2:.2f}")