In [23]:
!pip install torch



In [24]:
import pandas as pd

df = pd.read_csv("/content/insurance.csv")
df.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


In [25]:
df = df.drop_duplicates()
df = df.drop(columns=["region"])
df["smoker"] = df["smoker"].map({"yes": 1, "no": 0})
df = pd.get_dummies(
    df,
    columns=["sex"],
)
df["sex_female"] = df["sex_female"].astype(int)
df["sex_male"] = df["sex_male"].astype(int)

df.head()

Unnamed: 0,age,bmi,children,smoker,charges,sex_female,sex_male
0,19,27.9,0,1,16884.924,1,0
1,18,33.77,1,0,1725.5523,0,1
2,28,33.0,3,0,4449.462,0,1
3,33,22.705,0,0,21984.47061,0,1
4,32,28.88,0,0,3866.8552,0,1


# Training Using Neural networks

### Train test split

In [26]:
X = df.drop(columns=["charges"])
y = df["charges"]

In [27]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

### Feature scaling

- Neural Networks are trained using gradient descent, which is sensitive to feature scale.
- If inputs have very different ranges, large-valued features dominate the learning process and slow or destabilize training.
- StandardScaler puts all features on a similar scale (mean 0, std 1), leading to faster and more stable convergence.
- Classical linear regression can often work without scaling because it has a closed-form solution and is less gradient-sensitive.

In [28]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [29]:
X_train

array([[-1.1576804 , -0.99692768, -0.90790804, -0.50029231, -0.97140947,
         0.97140947],
       [-1.30061876, -0.79276204,  0.7669042 , -0.50029231, -0.97140947,
         0.97140947],
       [ 0.91492586,  1.15466402,  0.7669042 , -0.50029231,  1.029432  ,
        -1.029432  ],
       ...,
       [-1.37208794, -1.4118716 , -0.07050192, -0.50029231, -0.97140947,
         0.97140947],
       [-0.08564268, -0.41997378,  1.60431032, -0.50029231,  1.029432  ,
        -1.029432  ],
       [-0.30005022,  0.87941237,  0.7669042 , -0.50029231,  1.029432  ,
        -1.029432  ]])

### Convert to pytorch tensors

In [30]:
import torch

In [31]:
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test  = torch.tensor(X_test, dtype=torch.float32)

y_train = torch.tensor(y_train.to_numpy().reshape(-1, 1), dtype=torch.float32)
y_test  = torch.tensor(y_test.to_numpy().reshape(-1, 1), dtype=torch.float32)

### Design Your neural networks

This defines a feedforward neural network for regression using PyTorch.

The model takes 6 input features and passes them through multiple fully connected (Linear) layers with ReLU activations in between to learn non-linear relationships.

Each hidden layer gradually reduces dimensionality (32 → 16 → 8), helping the network extract useful patterns.

The final layer outputs a single value, representing the predicted insurance charges.

In [32]:
import torch.nn as nn


class InsuranceNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(6, 32),   # 6 input features
            nn.ReLU(),
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Linear(16, 8),
            nn.ReLU(),
            nn.Linear(8, 1)     # output: charges
        )

    def forward(self, x):
        return self.net(x)

### Create model instence, and define criterian and optimizer

- `criterion` (loss function) measures how wrong the model’s predictions are.

MSELoss computes the mean squared error, which is standard for regression problems.

- `optimizer` updates the model’s weights to reduce the loss.

Adam is an adaptive gradient descent algorithm that adjusts learning rates automatically, leading to faster and more stable training.

In [33]:
from torch.nn import MSELoss
from torch.optim import Adam

model = InsuranceNN()
criterion = MSELoss()
optimizer = Adam(model.parameters(), lr=0.01)

## Optionally you can move your model to GPU if available

In [34]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


In [35]:
model.to(device)

X_train = X_train.to(device)
y_train = y_train.to(device)

### Train Our model

In [36]:
epochs = 1000

for epoch in range(epochs):
    model.train()

    predictions = model(X_train)
    loss = criterion(predictions, y_train)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 50 == 0:
        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.2f}")

Epoch 50/1000, Loss: 297437440.00
Epoch 100/1000, Loss: 81041336.00
Epoch 150/1000, Loss: 34995328.00
Epoch 200/1000, Loss: 33032144.00
Epoch 250/1000, Loss: 32290446.00
Epoch 300/1000, Loss: 31417864.00
Epoch 350/1000, Loss: 30291330.00
Epoch 400/1000, Loss: 28819368.00
Epoch 450/1000, Loss: 27325726.00
Epoch 500/1000, Loss: 26077362.00
Epoch 550/1000, Loss: 25159460.00
Epoch 600/1000, Loss: 24493588.00
Epoch 650/1000, Loss: 24012086.00
Epoch 700/1000, Loss: 23626878.00
Epoch 750/1000, Loss: 23362668.00
Epoch 800/1000, Loss: 23138584.00
Epoch 850/1000, Loss: 22946796.00
Epoch 900/1000, Loss: 22784198.00
Epoch 950/1000, Loss: 22631454.00
Epoch 1000/1000, Loss: 22480446.00


### Evaluate our model

In [37]:
from sklearn.metrics import r2_score
model.eval()

model = model.cpu()


with torch.no_grad():
    y_pred = model(X_test).numpy()
    y_true = y_test.numpy()

r2 = r2_score(y_true, y_pred)
print("R2 score:", r2)

R2 score: 0.8792626261711121


In [48]:
def predict_insurance(age, bmi, children, smoker, sex):
    new_df = pd.DataFrame(
        [{
            "age": age,
            "bmi": bmi,
            "children": children,
            "smoker": 1 if smoker else 0,
            "sex_female": 1 if sex=="female" else 0,
            "sex_male": 1 if sex=="male" else 0
        }]
    )


    new_scaled = scaler.transform(new_df)
    model.eval()

    with torch.inference_mode():
        new_tensor = torch.tensor(new_scaled, dtype=torch.float32)
        prediction = model(new_tensor)
    result = float(prediction[0][0])
    return round(result, 2)

In [49]:
predict_insurance(
    age=26,
    bmi=27.9,
    children=3,
    smoker=True,
    sex="male"
)

23735.61