# Neural network for car insurance predictions

In this notebook, we will try to implement and compare different models for insurance frequency claim
prediction on real-life data from the French motor third party liability dataset (see file freMTPL2freq.csv),
which comprises m=678,007 car insurance policies.

We will explore three different architectures of neural networks and compare their results

(credits to Gabriele Visentin for creating the exercise in the course [Machine learning for finance and insurance at ETH](https://people.math.ethz.ch/~patrickc/mlfi/))

In [1]:
import pandas as pd
import numpy as np

import sklearn.linear_model
from sklearn.model_selection import train_test_split

import torch
from torch.utils.data import DataLoader, TensorDataset
import torch.nn as nn
from torch.nn import Sequential, Linear, ReLU
import torch.optim as optim

from prettytable import PrettyTable

In [2]:
# Read csv file `freMTPL2freq.csv` (e.g. use pandas)
df = pd.read_csv("freMTPL2freq.csv", sep=";")
df.head()

Unnamed: 0,VehPower,VehAge,DrivAge,BonusMalus,VehBrand,VehGas,Density,Region,Exposure,ClaimNb
0,4,9,23,100,B6,Regular,7887,R31,76,0
1,4,6,26,100,B6,Regular,2308,R31,74,0
2,4,6,26,100,B6,Regular,2308,R31,11,0
3,7,4,44,50,B6,Regular,37,R94,83,0
4,5,2,29,90,B6,Regular,335,R91,69,0


We first do some conversion to more appropriate units

In [3]:
# Pre-process features
df["VehPower"] = np.log(df["VehPower"])
df["VehAge"] = np.array(["New" * (x < 6) + "Mid" * (x >= 6) * (x < 13) + "Old" * (x >= 13) for x in df["VehAge"]])
df["DrivAge"] = np.log(df["DrivAge"])
df["BonusMalus"] = np.log(df["BonusMalus"])
df["Density"] = np.log(df["Density"])

# Convert Exposure from string to float
df["Exposure"] = df["Exposure"].str.replace(",", ".").str.strip()
df["Exposure"] = pd.to_numeric(df["Exposure"])

# Rename columns
df = df.rename(columns={
    "VehPower": "LogVehPower",
    "VehAge": "CategorizedVehAge",
    "DrivAge": "LogDrivAge",
    "BonusMalus": "LogBonusMalus",
    "Density": "LogDensity"
})

df.head()

Unnamed: 0,LogVehPower,CategorizedVehAge,LogDrivAge,LogBonusMalus,VehBrand,VehGas,LogDensity,Region,Exposure,ClaimNb
0,1.386294,Mid,3.135494,4.60517,B6,Regular,8.972971,R31,0.76,0
1,1.386294,Mid,3.258097,4.60517,B6,Regular,7.744137,R31,0.74,0
2,1.386294,Mid,3.258097,4.60517,B6,Regular,7.744137,R31,0.11,0
3,1.94591,New,3.78419,3.912023,B6,Regular,3.610918,R94,0.83,0
4,1.609438,New,3.367296,4.49981,B6,Regular,5.814131,R91,0.69,0


Our variables for the neural network

In [4]:
# Features and Targets
X, ExpoClaim = df.iloc[:, :-2], df.iloc[:, -2:]

# One-hot Encoding
X = pd.get_dummies(X, dtype=int)

# Train-test split
X_train, X_test, ExpoClaim_train, ExpoClaim_test = train_test_split(X, ExpoClaim, test_size=0.1, shuffle=True, random_state=42)

X_train, X_test = X_train.astype(np.float32), X_test.astype(np.float32)
X_train, X_test = torch.tensor(X_train.values, dtype=torch.float32), torch.tensor(X_test.values, dtype=torch.float32)

y_train = ExpoClaim_train["ClaimNb"] / ExpoClaim_train["Exposure"]
y_test = ExpoClaim_test["ClaimNb"] / ExpoClaim_test["Exposure"]

y_train, y_test = y_train.astype(np.float32), y_test.astype(np.float32)
y_train, y_test = torch.tensor(y_train.values, dtype=torch.float32).reshape(1, -1)[0], torch.tensor(y_test.values, dtype=torch.float32).reshape(1, -1)[0]

Exposure_train = ExpoClaim_train["Exposure"]
Exposure_test = ExpoClaim_test["Exposure"]
expos_test,  expos_train= Exposure_test.astype(np.float32), Exposure_train.astype(np.float32)
expos_test, expos_train  = torch.tensor(expos_test.values,dtype=torch.float32), torch.tensor(expos_train.values,dtype=torch.float32)

train_size = len(y_train)

We want to train the function according to the loss function
$$
\mathcal{L}(x,y) = \frac{1}{\sum_{i=1}^m\text{Exposure}_i} \sum_{i=1}^m \text{Exposure}_i \cdot l(x_i,y_i)
$$
where
$$
l(x_i,y_i) = 2(x_i-y_i-y_i \log x_i+y_i\log y_i)
$$
with the convetion that $0\log 0 = 0$


Our functions used for training

In [5]:
def littlel(ypred, ybatch):
  yy = torch.mul(ybatch,torch.log(ybatch))
  yy = torch.nan_to_num(yy)
  res = 2*(ypred-ybatch-ybatch*torch.log(ypred)+yy)
  return res

def expos_dot_littlel(ypred, ybatch, exposure):
  calc_littlel = littlel(ypred,ybatch)
  return torch.dot(exposure,calc_littlel)

def loss_fun_batch(ypred ,ybatch, exposure):
  return expos_dot_littlel(ypred,ybatch,exposure)/ torch.sum(exposure)

def training(NN, n_epochs, batch_size, learning_rate):

  optimizer = optim.Adam(NN.parameters(), lr=learning_rate)

  for epoch in range(n_epochs):

    for i in range(0, len(X_train), batch_size):

      Xbatch = X_train[i:i+batch_size]
      ypred = NN(Xbatch).reshape(1,-1)[0]
      ybatch = y_train[i:i+batch_size]
      exposbatch = expos_train[i:i+batch_size]
      loss = loss_fun_batch(ypred, ybatch, exposbatch)

      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

One function to compute and display properly the results

In [6]:
def nn_evaluation(NN, X, y, expos, batch_size):

  total_loss = 0
  MAE = 0
  MSE = 0

  for i in range(0, len(X), batch_size):

    Xbatch = X[i:i+batch_size]
    ypred = NN(Xbatch).reshape(1,-1)[0]
    ybatch = y[i:i+batch_size]
    exposbatch = expos[i:i+batch_size]
    loss = expos_dot_littlel(ypred, ybatch, exposbatch)

    total_loss += loss
    MAE += sum(torch.abs(ypred-ybatch))
    MSE += sum((ypred-ybatch)**2)

  total_loss = total_loss / torch.sum(expos)
  MAE = MAE / len(y)
  MSE = MSE / len(y)

  total_loss = total_loss.item()
  MAE = MAE.item()
  MSE = MSE.item()

  Table = PrettyTable(["Loss", "MAE", "MSE"])
  Table.add_row([total_loss, MAE, MSE])
  print(Table)

  return total_loss, MAE, MSE

We will use an Exponential Activation for the last layer

In [7]:
class ExponentialActivation(torch.nn.Module):
    def forward(self, x):
        return torch.exp(x)

Our first architecture

In [8]:
#(1 minute)

#1: first parameters:
n_epochs_1 = 100
batch_size_1 = 10000
learning_rate_1 = 0.01

nn1 = Sequential(Linear(42, 20),ReLU(),Linear(20, 20),ReLU(),Linear(20, 1),ExponentialActivation())

#about seven minutes:
training(nn1, n_epochs_1, batch_size_1, learning_rate_1)
print("TRAIN RESULTS:")
nn_evaluation(nn1, X_train, y_train, expos_train, batch_size_1)
print("TEST RESULTS:")
nn_evaluation(nn1, X_test, y_test, expos_test, batch_size_1)

TRAIN RESULTS:
+---------------------+--------------------+-------------------+
|         Loss        |        MAE         |        MSE        |
+---------------------+--------------------+-------------------+
| 0.45771288871765137 | 0.1790134608745575 | 3.857011318206787 |
+---------------------+--------------------+-------------------+
TEST RESULTS:
+--------------------+---------------------+------------------+
|        Loss        |         MAE         |       MSE        |
+--------------------+---------------------+------------------+
| 0.4495368003845215 | 0.19141551852226257 | 5.39058780670166 |
+--------------------+---------------------+------------------+


(0.4495368003845215, 0.19141551852226257, 5.39058780670166)

Second one

In [None]:
#(3 minutes)

#Second parameters
n_epochs_2 = 50
batch_size_2 = 500
learning_rate_2 = 0.001

nn2 = Sequential(Linear(42, 64),ReLU(),Linear(64, 32),ReLU(),Linear(32, 16), ReLU(),Linear(16,1),ExponentialActivation())

#about five minutes:
training(nn2, n_epochs_2, batch_size_2, learning_rate_2)
print("TRAIN RESULTS:")
nn_evaluation(nn2, X_train, y_train, expos_train, batch_size_2)
print("TEST RESULTS:")
nn_evaluation(nn2, X_test, y_test, expos_test, batch_size_2)

TRAIN RESULTS:
+---------------------+---------------------+--------------------+
|         Loss        |         MAE         |        MSE         |
+---------------------+---------------------+--------------------+
| 0.45483389496803284 | 0.19500739872455597 | 3.8542675971984863 |
+---------------------+---------------------+--------------------+
TEST RESULTS:
+--------------------+---------------------+-------------------+
|        Loss        |         MAE         |        MSE        |
+--------------------+---------------------+-------------------+
| 0.4509471356868744 | 0.20766058564186096 | 5.387542724609375 |
+--------------------+---------------------+-------------------+


(0.4509471356868744, 0.20766058564186096, 5.387542724609375)

Third one

In [None]:
#(4 minutes)
#Third parameters
n_epochs_3 = 100
batch_size_3 = 1000
learning_rate_3 = 0.005

nn3 = Sequential(Linear(42, 128),ReLU(),Linear(128, 64),ReLU(),Linear(64,1),ExponentialActivation())

#about five minutes:
training(nn3, n_epochs_3, batch_size_3, learning_rate_3)
print("TRAIN RESULTS:")
nn_evaluation(nn3, X_train, y_train, expos_train, batch_size_3)
print("TEST RESULTS:")
nn_evaluation(nn3, X_test, y_test, expos_test, batch_size_3)

TRAIN RESULTS:
+---------------------+---------------------+-------------------+
|         Loss        |         MAE         |        MSE        |
+---------------------+---------------------+-------------------+
| 0.45551159977912903 | 0.18678630888462067 | 3.855602502822876 |
+---------------------+---------------------+-------------------+
TEST RESULTS:
+-------------------+---------------------+-------------------+
|        Loss       |         MAE         |        MSE        |
+-------------------+---------------------+-------------------+
| 0.450625479221344 | 0.19936321675777435 | 5.389647483825684 |
+-------------------+---------------------+-------------------+


(0.450625479221344, 0.19936321675777435, 5.389647483825684)