## import the modules

In [66]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as OPT
from torch.utils.data import random_split, DataLoader, TensorDataset
import pandas as pd
import numpy as np
from IPython.display import clear_output

## Looking at the Dataset

In [67]:
df = pd.read_csv("../Datasets/insurance.csv")
df

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.900,0,yes,southwest,16884.92400
1,18,male,33.770,1,no,southeast,1725.55230
2,28,male,33.000,3,no,southeast,4449.46200
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.880,0,no,northwest,3866.85520
...,...,...,...,...,...,...,...
1333,50,male,30.970,3,no,northwest,10600.54830
1334,18,female,31.920,0,no,northeast,2205.98080
1335,18,female,36.850,0,no,southeast,1629.83350
1336,21,female,25.800,0,no,southwest,2007.94500


## Fixing the Dataset

In [68]:
sex = {"female": 1, "male": 0}
smoker = {"yes": 1, "no": 0}
regions = {"southwest": 3, "southeast": 2, "northwest": 1, "northeast": 0}

# This way we can get all the different elements from a column, where a[...] is a specific column.
A = []
for a in df.itertuples():
    if a[6] not in A:
        A.append(a[6])        

# Modifying the hard way the dataset
for i, column in enumerate(df["sex"]):
    if column == "male":
        df["sex"][i] = 1
    else:
        df["sex"][i] = 0
        
for i, column in enumerate(df["smoker"]):
    if column == "yes":
        df["smoker"][i] = 1
    else:
        df["smoker"][i] = 0
        
for i, column in enumerate(df["region"]):
    if column == "southwest":
        df["region"][i] = 3
    elif column == "southeast":
        df["region"][i] = 2
    elif column == "northwest":
        df["region"][i] = 1
    else:
        df["region"][i] = 0
        
for i, column in enumerate(df["charges"]):
    df["charges"][i] /= 1_000
        
clear_output()

df

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,0,27.900,0,1,3,16.884924
1,18,1,33.770,1,0,2,1.725552
2,28,1,33.000,3,0,2,4.449462
3,33,1,22.705,0,0,1,21.984471
4,32,1,28.880,0,0,1,3.866855
...,...,...,...,...,...,...,...
1333,50,1,30.970,3,0,1,10.600548
1334,18,0,31.920,0,0,0,2.205981
1335,18,0,36.850,0,0,2,1.629833
1336,21,0,25.800,0,0,3,2.007945


## Hyperparameters

In [155]:
input_size = 6
hidden_size1 = 32
hidden_size2 = 64
hidden_size3 = 32
output_size = 1
train_size = 900
valid_size = 400
test_size = 38
batch_size = 64

## Creating the Data Loaders

In [156]:
numpy_dataset = df.to_numpy()

inputs = []
output = []

for row in numpy_dataset:
    inputs_row = []
    output_row = []
    for i, column in enumerate(row):
        if i == 6:
            output_row.append(column)
        else:
            inputs_row.append(column)
            
    inputs.append(inputs_row)
    output.append(output_row)
    
inputs_tensor = torch.tensor(inputs, dtype=torch.float32)
output_tensor = torch.tensor(output, dtype=torch.float32)

train_ds, valid_ds, test_ds = random_split(TensorDataset(inputs_tensor, output_tensor), [train_size, valid_size, test_size])

train_dl = DataLoader(train_ds, batch_size, shuffle=True)
valid_dl = DataLoader(valid_ds, batch_size)

## Functions to train the Model on GPU

In [157]:
def get_default_device():
    if torch.cuda.is_available():
        return torch.device("cuda")
    return torch.device("cpu")

def to_device(data, device):
    device_data = []
    if isinstance(data, (list, tuple)):
        for d in data:
            device_data.append(d.to(device))
    else:
        device_data.append(data)
        
    return device_data

class DeviceLoader:
    def __init__(self, loader, device):
        self.loader = loader
        self.device = device
        
    def __iter__(self):
        for data in self.loader:
            yield to_device(data, self.device)
            
    def __len__(self):
        return len(self.loader)

## Moving the Data Loaders and the test Dataset to device

In [158]:
device = get_default_device()

train_loader = DeviceLoader(train_dl, device)
valid_loader = DeviceLoader(valid_dl, device)
test_ds = DeviceLoader(test_ds, device)

## Creaing the Model

In [161]:
class InsuranceModel(nn.Module):
    def __init__(self, in_size, h_size1, h_size2, h_size3, out_size):
        super().__init__()
        self.linear1 = nn.Linear(in_size, h_size1)
        self.linear2 = nn.Linear(h_size1, h_size2)
        self.linear3 = nn.Linear(h_size2, h_size3)
        self.linear4 = nn.Linear(h_size3, out_size)
        
    def __call__(self, input_batch):
        out1 = self.linear1(input_batch)
        out2 = F.leaky_relu(out1)
        out3 = self.linear2(out2)
        out4 = F.relu(out3)
        out5 = self.linear3(out4)
        out5 = F.leaky_relu(out5)
        model_preds = self.linear4(out5)
        return model_preds
    
    def training_step(self, train_batch):
        inputs_batch, output_batch = train_batch
        model_preds = self(inputs_batch)
        loss = F.mse_loss(model_preds, output_batch)
        return loss
    
    def validation_step(self, valid_batch):
        inputs_batch, output_batch = valid_batch
        model_preds = self(inputs_batch)
        loss = F.mse_loss(model_preds, output_batch)
        return loss.sqrt().item()
    
    def validation_end(self, results):
        avg_loss = torch.tensor(results).mean().item()
        return {"valid_loss": avg_loss}
    
    def evaluate(self, valid_loader):
        valid_batch_losses = torch.tensor([self.validation_step(x) for x in valid_loader])
        return self.validation_end(valid_batch_losses)
    
    def epoch_end(self, epoch, results):
        return {"Epoch": epoch, "Loss": results["valid_loss"]}
    
    def predict(self, test_batch):
        inputs_batch, output_batch = test_batch
        preds = self(inputs_batch)
        loss = F.mse_loss(preds, output_batch)
        return preds.item(), output_batch.item(), loss.sqrt().item()
        

## Creating and moving the Model to the device

In [162]:
model = InsuranceModel(input_size, hidden_size1, hidden_size2, hidden_size3, output_size)
model.to(device)

InsuranceModel(
  (linear1): Linear(in_features=6, out_features=32, bias=True)
  (linear2): Linear(in_features=32, out_features=64, bias=True)
  (linear3): Linear(in_features=64, out_features=32, bias=True)
  (linear4): Linear(in_features=32, out_features=1, bias=True)
)

## Creating the training loop

In [163]:
def fit(model, epochs, training_loader, validation_loader, opt=OPT.SGD, lr=1e-3):
    history = []
    optim = opt(model.parameters(), lr=lr)
    for epoch in range(epochs):
        for training_batch in training_loader:
            loss = model.training_step(training_batch)
            loss.backward()
            optim.step()
            optim.zero_grad()
            
        valid_result = model.evaluate(validation_loader)
        epoch_result = model.epoch_end(epoch+1, valid_result)
        history.append(epoch_result)
        
        if (epoch + 1 ) % 10 == 0:
            print({"Epoch": epoch+1, "Loss": valid_result["valid_loss"]})

In [179]:
h1 = fit(model, 100, train_loader, valid_loader, lr=1e-7)

  avg_loss = torch.tensor(results).mean().item()


{'Epoch': 10, 'Loss': 5.540953636169434}
{'Epoch': 20, 'Loss': 5.567719459533691}
{'Epoch': 30, 'Loss': 5.514923095703125}
{'Epoch': 40, 'Loss': 5.5243330001831055}
{'Epoch': 50, 'Loss': 5.5680413246154785}
{'Epoch': 60, 'Loss': 5.528293132781982}
{'Epoch': 70, 'Loss': 5.542458534240723}
{'Epoch': 80, 'Loss': 5.554141521453857}
{'Epoch': 90, 'Loss': 5.523105621337891}
{'Epoch': 100, 'Loss': 5.532444000244141}


In [180]:
for batch in test_ds:
    preds, actual, loss = model.predict(batch)
    print("Predicted: ", preds, " Actual: ", actual, " Loss: ", loss)

Predicted:  29.9804630279541  Actual:  36.837467193603516  Loss:  6.857004165649414
Predicted:  34.08074951171875  Actual:  23.56827163696289  Loss:  10.51247787475586
Predicted:  12.461176872253418  Actual:  4.915060043334961  Loss:  7.546116828918457
Predicted:  29.6442813873291  Actual:  16.115304946899414  Loss:  13.528976440429688
Predicted:  2.516085386276245  Actual:  2.7092440128326416  Loss:  0.19315862655639648
Predicted:  5.284409523010254  Actual:  5.24076509475708  Loss:  0.04364442825317383
Predicted:  10.046138763427734  Actual:  6.985507011413574  Loss:  3.06063175201416
Predicted:  11.519895553588867  Actual:  10.096969604492188  Loss:  1.4229259490966797
Predicted:  5.933268070220947  Actual:  5.472448825836182  Loss:  0.4608192443847656
Predicted:  13.96327018737793  Actual:  12.6445894241333  Loss:  1.318680763244629
Predicted:  7.737087249755859  Actual:  4.040558338165283  Loss:  3.696528911590576
Predicted:  13.33292007446289  Actual:  13.063882827758789  Loss:  