In [1]:
import torch
import torchvision
import torch.nn as nn
import pandas as pd
import matplotlib.pyplot as plt
import torch.nn.functional as F
from torchvision.datasets.utils import download_url
from torch.utils.data import DataLoader, TensorDataset, random_split

In [2]:
DATASET_URL = "https://hub.jovian.ml/wp-content/uploads/2020/05/insurance.csv"
DATA_FILENAME = "insurance.csv"
download_url(DATASET_URL, '.')

Downloading https://hub.jovian.ml/wp-content/uploads/2020/05/insurance.csv to ./insurance.csv


  0%|          | 0/55628 [00:00<?, ?it/s]

In [3]:
df_raw = pd.read_csv('insurance.csv')
df_raw.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


In [5]:
def customize_ds(dfr, rand_str):
    df = dfr.copy(deep=True)
    #drop some rows
    df = df.sample(int(0.95*len(df)), random_state=int(ord(rand_str[0])) )
    #scale input
    df.bmi = df.bmi*ord(rand_str[1])/100
    #scale target
    df.charges = df.charges * ord(rand_str[2])/100
    if ord(rand_str[3]) %2 ==1:
        df = df.drop(['region'], axis=1)
    return df    

In [6]:
dfc = customize_ds(df_raw, 'samararora')
dfc.head()

Unnamed: 0,age,sex,bmi,children,smoker,charges
1178,23,female,33.81905,0,no,3160.443391
1295,20,male,21.34,1,no,2141.6102
205,28,female,28.0136,1,no,4728.131368
1067,39,male,41.37535,0,no,6275.58066
523,38,female,36.5981,0,no,5883.402203


In [19]:
input_cols = ['age','bmi','sex','children','smoker']
categorical_cols = ['sex','smoker']
output_cols = ['charges']

def df_to_array(df):
    df1 = df.copy(deep=True)
    for col in categorical_cols:
        df1[col] = df1[col].astype('category').cat.codes
    inputs_array = df1[input_cols].to_numpy()
    targets_array = df1[output_cols].to_numpy()
    return inputs_array, targets_array

In [20]:
inputs_array, targets_array = df_to_array(dfc)

In [26]:
len(inputs_array)

1271

In [39]:
torch.device

torch.device

In [41]:
cpu = torch.device('cpu')
inputs = torch.Tensor(inputs_array)

In [42]:
inputs.dtype

torch.float32

In [43]:
targets = torch.Tensor(targets_array)

In [44]:
targets.dtype

torch.float32

In [45]:
dataset = TensorDataset(inputs, targets)

In [46]:
val_pct = 0.1
val_size = int(len(inputs)* val_pct)
train_size = len(inputs) - val_size

train_ds, val_ds = random_split(dataset,[train_size, val_size])

In [47]:
batch_size = 32

In [48]:
train_loader = DataLoader(train_ds, batch_size, shuffle=True)
val_loader = DataLoader(val_ds, batch_size)

In [49]:
#targets and inputs specified before prob in DataLoader
for xb,yb in train_loader:
    print('inputs', xb)
    print('targets', yb)
    break
    

inputs tensor([[18.0000, 23.0375,  1.0000,  0.0000,  0.0000],
        [54.0000, 32.6211,  1.0000,  1.0000,  0.0000],
        [33.0000, 34.6775,  1.0000,  1.0000,  1.0000],
        [43.0000, 24.3276,  0.0000,  0.0000,  0.0000],
        [19.0000, 19.2060,  1.0000,  0.0000,  0.0000],
        [22.0000, 20.6416,  0.0000,  3.0000,  0.0000],
        [28.0000, 25.7147,  0.0000,  2.0000,  0.0000],
        [52.0000, 23.5904,  1.0000,  3.0000,  1.0000],
        [18.0000, 34.5563,  0.0000,  0.0000,  0.0000],
        [31.0000, 21.1024,  0.0000,  0.0000,  0.0000],
        [29.0000, 21.1945,  0.0000,  0.0000,  1.0000],
        [35.0000, 26.7817,  1.0000,  1.0000,  0.0000],
        [31.0000, 22.8920,  0.0000,  2.0000,  0.0000],
        [18.0000, 24.4198,  1.0000,  0.0000,  1.0000],
        [52.0000, 40.5460,  1.0000,  2.0000,  1.0000],
        [47.0000, 27.3685,  1.0000,  4.0000,  0.0000],
        [58.0000, 35.8464,  1.0000,  2.0000,  1.0000],
        [18.0000, 30.7296,  1.0000,  2.0000,  1.0000],
   

In [50]:
input_size = len(input_cols)
output_size = len(output_cols)

In [90]:
class InsuranceModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(input_size,output_size)
    def forward(self,xb):
        out = self.linear(xb)
        return out
    def training_step(self,batch):
        inputs,targets = batch
        out = self(inputs)
        loss = F.mse_loss(out,targets)
        return loss
    def validation_step(self,batch):
        inputs,targets = batch
        out = self(inputs)
        loss = F.mse_loss(out,targets)
        return {'val_loss':loss}
    
    def validation_epoch_end(self,outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()
        return {'val_loss': epoch_loss.item()}
        
    def epoch_end(self, epoch,result, num_epochs):
        if (epoch+1) % 20 or epoch == num_epochs-1:
            print("Epoch [{}], val_loss: {:.4f}".format(epoch+1, result['val_loss']))
        
        

In [91]:
model = InsuranceModel()

In [92]:
list(model.parameters())

[Parameter containing:
 tensor([[-0.2259, -0.2996, -0.4171, -0.2997, -0.0717]], requires_grad=True),
 Parameter containing:
 tensor([-0.2098], requires_grad=True)]

In [93]:
import torch.optim as optim

def evaluate(model, val_loader):
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)

def fit(epochs, lr, model, train_loader, val_loader):
    history = []
    optimizer = optim.SGD(model.parameters(), lr)
    for epoch in range(epochs):
        for batch in train_loader:
            loss = model.training_step(batch)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        result = evaluate(model, val_loader)
        model.epoch_end(epoch, result, epochs)
        history.append(result)
    return history


In [95]:
model = InsuranceModel()
history1 = fit(10, 0.00001, model, train_loader, val_loader)

Epoch [1], val_loss: 187353808.0000
Epoch [2], val_loss: 180418336.0000
Epoch [3], val_loss: 180055696.0000
Epoch [4], val_loss: 180137728.0000
Epoch [5], val_loss: 179825232.0000
Epoch [6], val_loss: 180160320.0000
Epoch [7], val_loss: 179986624.0000
Epoch [8], val_loss: 179809728.0000
Epoch [9], val_loss: 179755920.0000
Epoch [10], val_loss: 179746336.0000
