## 1. Introduction
- Predict total claims and optimal premium values.

In [None]:
import sys
sys.path.append('../src')
import pandas as pd
import src.data_loader as data_loader
import src.modeling as modeling
import src.utils as utils

## 2. Data Loading

In [None]:
df = data_loader.load_data('../data/MachineLearningRating_v3.txt')
df = data_loader.clean_data(df)
df.head()

## 3. Feature Engineering
- Encode categorical variables for modeling.

In [None]:
cat_cols = ['Province', 'VehicleType', 'Gender', 'CoverType', 'Product']
df_enc = utils.encode_categorical(df, cat_cols)
df_enc.head()

 Fit Linear Regression Model

In [None]:
features = ['PostalCode', 'SumInsured', 'CalculatedPremiumPerTerm']
target = 'TotalClaims'
model, preds, r2, mse = modeling.fit_linear_regression(df_enc, features, target)
print(f'R2: {r2:.3f}, MSE: {mse:.2f}')

## 5. ML Model: Predict Optimal Premium Values

In [None]:
features = ['SumInsured', 'TotalClaims', 'PostalCode', 'Province', 'VehicleType', 'Gender']
target = 'TotalPremium'
model, X_test, y_test, y_pred, importances = modeling.fit_random_forest(df_enc, features, target)
from sklearn.metrics import mean_squared_error, r2_score
print(f'R2: {r2_score(y_test, y_pred):.3f}, MSE: {mean_squared_error(y_test, y_pred):.2f}')

### Feature Importances

In [None]:
import matplotlib.pyplot as plt
import numpy as np
feat_names = features
plt.figure(figsize=(8,4))
plt.barh(feat_names, importances)
plt.xlabel('Importance')
plt.title('Feature Importances for Premium Prediction')
plt.show()