####Copyright https://jovian.ai/learn/deep-learning-with-pytorch-zero-to-gans

In [None]:
import torch
import torchvision
import numpy as  np 
import pandas as pd
import torch.nn as nn
import matplotlib.pyplot as plt
import torch.nn.functional as F
from torchvision.datasets.utils import download_url
from torch.utils.data import DataLoader, TensorDataset, random_split

In [None]:
project_name = '02-insurance-linear-regression'


In [None]:
DATASET_URL = "https://gist.github.com/BirajCoder/5f068dfe759c1ea6bdfce9535acdb72d/raw/c84d84e3c80f93be67f6c069cbdc0195ec36acbd/insurance.csv"
DATA_FILENAME = "insurance.csv"
download_url(DATASET_URL, '.')

Using downloaded and verified file: ./insurance.csv


In [None]:
dataframe_raw = pd.read_csv(DATA_FILENAME)
dataframe_raw.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


In [None]:
def customize_dataset(dataframe_raw, rand_str):
    dataframe = dataframe_raw.copy(deep=True)
    # drop some rows
    dataframe = dataframe.sample(int(0.95*len(dataframe)), random_state=int(ord(rand_str[0])))
    # scale input
    dataframe.bmi = dataframe.bmi * ord(rand_str[1])/100.
    # scale target
    dataframe.charges = dataframe.charges * ord(rand_str[2])/100.
    # drop column
    if ord(rand_str[3]) % 2 == 1:
        dataframe = dataframe.drop(['region'], axis=1)
    return dataframe

In [None]:
dataframe = customize_dataset(dataframe_raw, 'iqranoor')
dataframe.shape

(1271, 6)

In [None]:
#Seprating input and target from dataframe
inputs = dataframe.drop(['charges'], axis=1)
#Get the column names of input data
input_col_name = inputs.columns.values
#Target data
target = dataframe['charges']
output_col_name = ['charges']
output_col_name, input_col_name

(['charges'], array(['age', 'sex', 'bmi', 'children', 'smoker'], dtype=object))

In [None]:
#Seprating the catagorical and numerical columns
numCols = inputs.select_dtypes(exclude = ["object"])
catCols = inputs.select_dtypes(exclude=[np.number])

In [None]:
# Preparing the data or converting catagorical to numbers
def dataframe_to_array(dataframe):
  dataframe1 = dataframe.copy(deep=True)
  #converting non-numeric categorical columns to numbers
  for col in catCols:
    dataframe1[col] = dataframe1[col].astype('category').cat.codes 
  input_array = dataframe1[input_col_name].to_numpy()
  target_array = dataframe1[output_col_name].to_numpy()
  return input_array, target_array

In [None]:
#Getting numeric data in numpy array form
input_array, targets_array = dataframe_to_array(dataframe)
input_array = input_array.astype('float32')
targets_array = targets_array.astype('float32')

In [None]:
input_array.dtype

dtype('float32')

In [None]:
#Converting numpy array to tensors 
inputs = torch.from_numpy(input_array)
targets = torch.from_numpy(targets_array)
#inputs, targets

In [None]:
#Get the number of rows
num_rows = inputs.shape[0]

In [None]:
val_percent = 0.1
val_size = int(num_rows * val_percent)
train_size = num_rows - val_size
train_size, val_size

(1144, 127)

In [None]:
#preparing and spliting the datasets
dataset = TensorDataset(inputs, targets)
train_ds, val_ds = random_split(dataset,[train_size, val_size] )

In [None]:
#Converitng the data into batches
batch_size = 256
train_loader = DataLoader(train_ds, batch_size, shuffle=True)
val_loader = DataLoader(val_ds, batch_size)

In [None]:
#Displaying the batch of data
for xb, yb in val_loader:
  #print('inputs:', xb)
  #print('targets:', yb)
  break

#Create Linear Regression Model

In [None]:
input_size = len(input_col_name)
output_size = len(output_col_name)
input_size, output_size

(5, 1)

In [None]:
class InsuranceModel(nn.Module):
  def __init__(self):
    super().__init__()
    self.linear = nn.Linear(input_size, output_size)

  def forward(self, xb):
    out = self.linear(xb)
    return out

  def training_step(self, batch):
    inputs, target = batch
    out = self(inputs)
    loss = F.mse_loss(out, target)
    return loss

  def validation_step(self, batch):
    inputs, target = batch
    out = self(inputs)
    loss = F.mse_loss(out, target)
    return {'val_loss': loss.detach()}

  def validation_epoch_end(self, output):
    batch_loss = [x['val_loss'] for x in output]
    epoch_loss = torch.stack(batch_loss).mean()
    return { 'val_loss': epoch_loss.item()}

  def epoch_end(self, epoch, result, num_epochs):
    if (epoch + 1) % 20 == 0 or epoch == num_epochs - 1:
      print(" Epoch [{}], val_loss: {:4f}".format(epoch, result['val_loss']))

model = InsuranceModel()

In [None]:
def evaluate(model, val_loader):
  outputs = [model.validation_step(batch) for batch in val_loader]
  return model.validation_epoch_end(outputs)

def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
  history = []
  optimizer = opt_func(model.parameters(), lr)
  for epoch in range(epochs):
    #training phasee
    for batch in train_loader:
      loss = model.training_step(batch)
      loss.backward()
      optimizer.step()
      optimizer.zero_grad()

    #validation step
    result = evaluate(model, val_loader)
    model.epoch_end(epoch, result, epochs)
    history.append(result)
  return history

In [None]:
print(val_loader)

<torch.utils.data.dataloader.DataLoader object at 0x7f5a50303810>


In [None]:
evaluate(model, val_loader)

{'val_loss': 431016192.0}

In [None]:
#Fitting the model
history = fit(1000, 1e-5, model, train_loader, val_loader)


 Epoch [19], val_loss: 166991952.000000
 Epoch [39], val_loss: 166930336.000000
 Epoch [59], val_loss: 167012800.000000
 Epoch [79], val_loss: 167038800.000000
 Epoch [99], val_loss: 167077840.000000
 Epoch [119], val_loss: 167072880.000000
 Epoch [139], val_loss: 167073072.000000
 Epoch [159], val_loss: 167000864.000000
 Epoch [179], val_loss: 166997152.000000
 Epoch [199], val_loss: 166973440.000000
 Epoch [219], val_loss: 166905280.000000
 Epoch [239], val_loss: 166852160.000000
 Epoch [259], val_loss: 166873040.000000
 Epoch [279], val_loss: 166792512.000000
 Epoch [299], val_loss: 166717808.000000
 Epoch [319], val_loss: 166681904.000000
 Epoch [339], val_loss: 166566560.000000
 Epoch [359], val_loss: 166528608.000000
 Epoch [379], val_loss: 166517920.000000
 Epoch [399], val_loss: 166399744.000000
 Epoch [419], val_loss: 166323664.000000
 Epoch [439], val_loss: 166274736.000000
 Epoch [459], val_loss: 166243840.000000
 Epoch [479], val_loss: 166140960.000000
 Epoch [499], val_los