<a href="https://colab.research.google.com/github/Mirjafarrr/DeepLearning/blob/main/MedicalCostPrediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

In [None]:
if torch.cuda.is_available():
  device = "cuda"
elif torch.backends.mps.is_available():
  device = "mps"
else:
  device = "cpu"

In [None]:
!curl -L -o insurance.zip https://www.kaggle.com/api/v1/datasets/download/mirichoi0218/insurance

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100 16425  100 16425    0     0  23710      0 --:--:-- --:--:-- --:--:-- 23710


In [None]:
!unzip insurance.zip

Archive:  insurance.zip
replace insurance.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
  inflating: insurance.csv           


In [None]:
insurance = pd.read_csv('insurance.csv')

In [None]:
insurance.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


In [None]:
insurance.shape

(1338, 7)

In [None]:
insurance.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1338 entries, 0 to 1337
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       1338 non-null   int64  
 1   sex       1338 non-null   object 
 2   bmi       1338 non-null   float64
 3   children  1338 non-null   int64  
 4   smoker    1338 non-null   object 
 5   region    1338 non-null   object 
 6   charges   1338 non-null   float64
dtypes: float64(2), int64(2), object(3)
memory usage: 73.3+ KB


In [None]:
X = insurance.drop('charges', axis=1)
y = insurance['charges']

In [None]:
X.head()

Unnamed: 0,age,sex,bmi,children,smoker,region
0,19,female,27.9,0,yes,southwest
1,18,male,33.77,1,no,southeast
2,28,male,33.0,3,no,southeast
3,33,male,22.705,0,no,northwest
4,32,male,28.88,0,no,northwest


In [None]:
y.head()

Unnamed: 0,charges
0,16884.924
1,1725.5523
2,4449.462
3,21984.47061
4,3866.8552


In [None]:
nums = ['age', 'bmi', 'children']
cats = ['sex', 'smoker', 'region']

In [None]:
preprocessor = ColumnTransformer(
    transformers = [
        ('num', StandardScaler(), nums),
        ('cat', OneHotEncoder(drop='first'), cats)
    ]
)

In [None]:
X_processed = preprocessor.fit_transform(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_processed, y, test_size = 0.2, random_state = 42)

In [None]:
y_train = torch.tensor(y_train.to_numpy(), dtype=torch.float32).reshape(-1, 1)
y_test = torch.tensor(y_test.to_numpy(), dtype=torch.float32).reshape(-1, 1)
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)

In [None]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

(torch.Size([1070, 8]),
 torch.Size([1070, 1]),
 torch.Size([268, 8]),
 torch.Size([268, 1]))

In [None]:
input_dim = X_train.shape[1]

In [None]:
input_dim

8

In [None]:
torch.manual_seed(42)
model = nn.Sequential(
    nn.Linear(input_dim, 64),
    nn.ReLU(),
    nn.Linear(64, 64),
    nn.ReLU(),
    nn.Linear(64, 64),
    nn.ReLU(),
    nn.Linear(64, 64),
    nn.ReLU(),
    nn.Linear(64, 32),
    nn.ReLU(),
    nn.Linear(32, 1)
).to(device)

In [None]:
model

Sequential(
  (0): Linear(in_features=8, out_features=64, bias=True)
  (1): ReLU()
  (2): Linear(in_features=64, out_features=64, bias=True)
  (3): ReLU()
  (4): Linear(in_features=64, out_features=64, bias=True)
  (5): ReLU()
  (6): Linear(in_features=64, out_features=64, bias=True)
  (7): ReLU()
  (8): Linear(in_features=64, out_features=32, bias=True)
  (9): ReLU()
  (10): Linear(in_features=32, out_features=1, bias=True)
)

In [None]:
learning_rate = 0.0005
n_epochs = 2000
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
mae = nn.L1Loss()

In [None]:
def train_bgd(model, optimizer, criterion, X_train, y_train, n_epochs):
  for epoch in range(n_epochs):
    y_pred = model(X_train)
    loss = criterion(y_pred, y_train)
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    if epoch % 100 == 0 or (epoch + 1) == n_epochs:
      print(f'Epoch: {epoch+1}/{n_epochs}, Loss: {loss.item()}')
  return loss

In [None]:
loss = train_bgd(model, optimizer, mae, X_train.to(device), y_train.to(device), n_epochs)

Epoch: 1/2000, Loss: 13345.9326171875
Epoch: 101/2000, Loss: 13228.3876953125
Epoch: 201/2000, Loss: 7181.3203125
Epoch: 301/2000, Loss: 6128.20556640625
Epoch: 401/2000, Loss: 5400.6357421875
Epoch: 501/2000, Loss: 4007.368408203125
Epoch: 601/2000, Loss: 3335.78515625
Epoch: 701/2000, Loss: 2830.90673828125
Epoch: 801/2000, Loss: 2363.503662109375
Epoch: 901/2000, Loss: 2118.3037109375
Epoch: 1001/2000, Loss: 1971.2042236328125
Epoch: 1101/2000, Loss: 1892.2816162109375
Epoch: 1201/2000, Loss: 1832.7545166015625
Epoch: 1301/2000, Loss: 1786.678466796875
Epoch: 1401/2000, Loss: 1739.5460205078125
Epoch: 1501/2000, Loss: 1705.3153076171875
Epoch: 1601/2000, Loss: 1675.120361328125
Epoch: 1701/2000, Loss: 1649.928955078125
Epoch: 1801/2000, Loss: 1630.60009765625
Epoch: 1901/2000, Loss: 1613.32861328125
Epoch: 2000/2000, Loss: 1598.8861083984375


In [None]:
model.eval()
with torch.no_grad():
    y_pred_test = model(X_test.to(device))
    test_loss = mae(y_pred_test, y_test.to(device))

In [None]:
print(f"Final Training Loss (MAE): ${loss.item():.2f}")
print(f"Final Test Loss (MAE): ${test_loss.item():.2f}")

Final Training Loss (MAE): $1598.89
Final Test Loss (MAE): $1403.00
