In [6]:
import torch

In [7]:
torch.cuda.is_available()

True

In [8]:
!pip install kaggle



In [10]:
import kaggle



In [11]:
!chmod 600 /root/.config/kaggle/kaggle.json

In [12]:
import kagglehub
path = kagglehub.dataset_download("mirichoi0218/insurance")
print(f"Pat of dataset files: {path}")

Using Colab cache for faster access to the 'insurance' dataset.
Pat of dataset files: /kaggle/input/insurance


In [13]:
import pandas as pd

In [14]:
import os
df = pd.read_csv(os.path.join(path, "insurance.csv"))

In [None]:
df.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


In [15]:
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

In [16]:
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

In [17]:
#Encode Categorical Variables
label_encoder = {}
for col in ['sex', 'smoker', 'region']:
    le = LabelEncoder()
    train_df[col] = le.fit_transform(train_df[col])
    test_df[col] = le.transform(test_df[col])
    label_encoder[col] = le

In [18]:
# Feature and target
x_train = train_df.drop(columns=['charges'])
y_train = train_df['charges']

x_test = test_df.drop(columns=['charges'])
y_test = test_df['charges']

In [19]:
#Normalize Features
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

In [21]:
#Convert to tensors
x_train_tensor = torch.tensor(x_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
x_test_tensor = torch.tensor(x_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)


In [53]:
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

#Define NN model

class SimpleNNRegressionModel(nn.Module):
  def __init__(self, input_dim):
    super(SimpleNNRegressionModel, self).__init__()
    self.network = nn.Sequential(
        nn.Linear(input_dim, 64),
        nn.ReLU(),
        nn.Linear(64, 128),
        nn.ReLU(),
        nn.Linear(128, 1)
    )

  def forward(self, x):
    return self.network(x)


In [44]:
input_dim = x_train_tensor.shape[1]
print(input_dim)

6


In [45]:
model = SimpleNNRegressionModel(input_dim)
print(model)

SimpleNNRegressionModel(
  (network): Sequential(
    (0): Linear(in_features=6, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=128, bias=True)
    (3): ReLU()
    (4): Linear(in_features=128, out_features=1, bias=True)
  )
)


In [46]:
#Loss Function and Optimizer

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

In [47]:
#Training Loop

epochs = 10000

for k in range(epochs):
  model.train()
  optimizer.zero_grad()
  predictions = model(x_train_tensor)
  loss = criterion(predictions, y_train_tensor)
  loss.backward()

  optimizer.step()

  if (k+1) % 1000 == 0:
    print(f"Epoch: {k+1}/{epochs}, Loss: {loss.item():.4f}")

Epoch: 1000/10000, Loss: 23216090.0000
Epoch: 2000/10000, Loss: 20969608.0000
Epoch: 3000/10000, Loss: 18819456.0000
Epoch: 4000/10000, Loss: 15325834.0000
Epoch: 5000/10000, Loss: 11664307.0000
Epoch: 6000/10000, Loss: 9891200.0000
Epoch: 7000/10000, Loss: 8947470.0000
Epoch: 8000/10000, Loss: 8045576.5000
Epoch: 9000/10000, Loss: 7503473.0000
Epoch: 10000/10000, Loss: 7101070.5000


In [48]:
#Model Evaluation

model.eval()
y_pred = model(x_test_tensor).detach().numpy()

In [49]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

y_test_numpy = y_test_tensor.numpy()

#Calculate metrics

mse = mean_squared_error(y_test_numpy, y_pred)
rmse = mse ** 0.5
mae = mean_absolute_error(y_test_numpy, y_pred)
r2 = r2_score(y_test_numpy, y_pred)

print(f"MSE: {mse:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"MAE: {mae:.2f}")
print(f"R2 Score: {r2:.2f}")

MSE: 36595176.00
RMSE: 6049.39
MAE: 3916.64
R2 Score: 0.76


In [54]:
def predict_charges(age, sex, smoker, bmi, children, region):
  # Ensure the data and column names match the order used during training (bmi then smoker)
  input_data = pd.DataFrame([[age, sex, bmi, children, smoker, region]],
                            columns=['age', 'sex', 'bmi', 'children', 'smoker', 'region'])

  for col in ['sex', 'smoker', 'region']:
    input_data[col] = label_encoder[col].transform(input_data[col])

  input_data = scaler.transform(input_data)
  input_tensor = torch.tensor(input_data, dtype=torch.float32)
  predicted_charges = model(input_tensor).item()

  return predicted_charges

In [55]:
predict = predict_charges(19, 'female', 'yes', 27.9, 0, 'southwest')
print(f"Predicted Insurance Charge: ${predict:.2f}")

Predicted Insurance Charge: $17031.22
