In [27]:
import pandas as pd
import numpy as np

**Importing dataset**

In [28]:
df=pd.read_csv("/content/processed_insurance_dataset.csv")
df.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,0,27.9,0,1,0,16884.924
1,18,1,33.77,1,0,1,1725.5523
2,28,1,33.0,3,0,1,4449.462
3,33,1,22.705,0,0,2,21984.47061
4,32,1,28.88,0,0,2,3866.8552


In [32]:
df.describe()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
count,1338.0,1338.0,1338.0,1338.0,1338.0,1338.0,1338.0
mean,39.207025,0.505232,30.663397,1.094918,0.204783,1.484305,13270.422265
std,14.04996,0.50016,6.098187,1.205493,0.403694,1.104885,12110.011237
min,18.0,0.0,15.96,0.0,0.0,0.0,1121.8739
25%,27.0,0.0,26.29625,0.0,0.0,1.0,4740.28715
50%,39.0,1.0,30.4,1.0,0.0,1.0,9382.033
75%,51.0,1.0,34.69375,2.0,0.0,2.0,16639.912515
max,64.0,1.0,53.13,5.0,1.0,3.0,63770.42801


Selecting feature and target

In [52]:
x = df['smoker'].values        # selected feature
y = df['charges'].values      # target


Shuffling the dataset

In [53]:
indices = np.arange(len(x))
np.random.shuffle(indices)

x = x[indices]
y = y[indices]


Train test split

In [54]:
split = int(0.8 * len(x))

x_train, x_test = x[:split], x[split:]
y_train, y_test = y[:split], y[split:]


Normalizing training data

In [55]:
x_mean = x_train.mean()
x_std = x_train.std()

x_train = (x_train - x_mean) / x_std
x_test = (x_test - x_mean) / x_std


Scaling target variable

In [56]:
y_mean = y_train.mean()
y_std = y_train.std()

y_train_scaled = (y_train - y_mean) / y_std


Adding bias term

In [57]:
X_train = np.c_[np.ones(len(x_train)), x_train]
X_test = np.c_[np.ones(len(x_test)), x_test]


training the model

In [58]:
theta = np.zeros(2)     # [bias, slope]
lr = 0.01
epochs = 3000
n = len(y_train_scaled)

for _ in range(epochs):
    y_pred = X_train.dot(theta)
    error = y_pred - y_train_scaled
    gradients = (2/n) * X_train.T.dot(error)
    theta -= lr * gradients


In [59]:
y_test_pred_scaled = X_test.dot(theta)
y_test_pred = y_test_pred_scaled * y_std + y_mean


Evaluation

In [60]:
mse = np.mean((y_test - y_test_pred) ** 2)
rmse = np.sqrt(mse)

r2 = 1 - np.sum((y_test - y_test_pred) ** 2) / np.sum((y_test - np.mean(y_test)) ** 2)

print("MSE:", mse)
print("RMSE:", rmse)
print("R2 Score:", r2)


MSE: 62364954.85719438
RMSE: 7897.148526980759
R2 Score: 0.627708858823785


In [61]:
def predict_insurance_smoker(smoker_value):
    """
    smoker_value: 0 (non-smoker) or 1 (smoker)
    """
    # Normalize input using training stats
    x_norm = (smoker_value - x_mean) / x_std

    # Add bias term
    X_new = np.array([1, x_norm])

    # Predict (scaled)
    y_scaled = X_new.dot(theta)

    # Convert back to original scale
    y_pred = y_scaled * y_std + y_mean

    return y_pred


In [62]:
print("Predicted charges (Non-smoker):",
      predict_insurance_smoker(0))

print("Predicted charges (Smoker):",
      predict_insurance_smoker(1))


Predicted charges (Non-smoker): 8556.575433070357
Predicted charges (Smoker): 31780.52438728104
