In [1]:
import torch 
import torch.nn as nn
import torch.optim as optim

### Basic Neural Network using PyTorch


###### X --> Input

###### Wx --> Weights

###### b --> Bias

###### Y --> Output

###### A --> Activation Function (Sigmoid,ReLU,Tanh)

###### Z = WX + b

###### Z` = Activation(Z)

###### Y = W2.Z` + b2


- Loss Function: MSE, Cross Entropy
- Backpropagation: Gradient Descent, Adam, RMSProp
- Optimizer: SGD, Adam, RMSProp
 


### Components of Pytorch

- Base class for defining cutom models is `torch.nn.Module`
- Layers are defined in `__init__` method
- Forward pass is defined in `forward` method
- Loss functions are defined in `torch.nn` module
- Optimizers are defined in `torch.optim` module
- Data loading and preprocessing is done using `torch.utils.data` module
- Fully connected layer is defined using `torch.nn.Linear`
- Activation functions are defined in `torch.nn.ReLU` module 
- Optimizers are defined in `torch.optim` module
- Loss functions are defined in `torch.nn.CrossEntropyLoss` module
- Loads data in batches using `torch.utils.data.DataLoader` module


### Different ways to define a model in Pytorch

1. Functional: Flexable, harder to interpret
2. Sequential: Easy to interpret, less flexable
3. Custom: Most flexable, harder to interpret

### Functional API

In [2]:
class SimpleNN(nn.Module):
   def __init__(self,input_size,hidden_size,output_size):
      super(SimpleNN,self).__init__()
       
      self.fullyConnectedLayer_1 = nn.Linear(input_size,hidden_size)
      self.relu = nn.ReLU()
      self.fullyConnnectedLayer_2 = nn.Linear(hidden_size,output_size)
       
       
   def forward(self,x):
      x = self.fullyConnectedLayer_1(x)
      x = self.relu(x)
      x = self.fullyConnnectedLayer_2(x)
      return x

### Sequential API


In [3]:
class SimpleNNSequential(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleNN, self).__init__()

        self.network = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, output_size)
        )

    def forward(self, x):
        return self.network(x)

### Training the neural network using Functional API


In [4]:
model_func = SimpleNN(input_size=4, hidden_size=8,output_size=3)
print(model_func)

SimpleNN(
  (fullyConnectedLayer_1): Linear(in_features=4, out_features=8, bias=True)
  (relu): ReLU()
  (fullyConnnectedLayer_2): Linear(in_features=8, out_features=3, bias=True)
)


In [5]:
X = torch.randn(10,4) # 10 samples, 4 features
Y = torch.randint(0,3,(10,))

print(X)
print(Y)

tensor([[ 1.1078e-02,  7.1471e-01, -2.9741e-01,  1.0817e-01],
        [-7.0193e-01, -1.5220e+00,  5.2227e-01,  2.0043e-01],
        [ 2.7192e+00, -1.1287e+00, -9.6982e-01,  9.9883e-01],
        [-2.0749e-01,  1.0156e-03, -9.5886e-01,  6.5245e-01],
        [-2.0277e-01, -1.7470e-02, -6.4231e-01, -7.9846e-01],
        [-8.2634e-01,  8.4686e-01, -4.1232e-01, -2.4988e-01],
        [-5.1659e-01,  1.4309e+00,  7.0469e-01,  6.1677e-01],
        [ 2.4391e-01,  5.5396e-01,  5.6156e-01, -1.4503e+00],
        [ 7.8056e-01, -2.9445e-02, -9.5471e-02, -1.5726e+00],
        [ 8.0370e-02,  2.0079e+00,  2.9171e-01,  1.9636e-01]])
tensor([1, 2, 0, 1, 1, 1, 2, 2, 0, 1])


In [6]:
criterion = nn.CrossEntropyLoss() # includes the advance version of sigmax function
optimizer = optim.Adam(model_func.parameters(),lr=0.01)

In [7]:
# Training Loop
epoch = 300
for e in range(epoch):
   optimizer.zero_grad() # Clear all the gradients
   outputs = model_func(X) # Passing inputs
   loss = criterion(outputs,Y)
   loss.backward()
   optimizer.step()
   
   if(e+1) % 10 == 0:
      print(f"Epoch [{e+1}]/{epoch} , Loss : {loss.item() :.4f}")
   

Epoch [10]/300 , Loss : 0.9285
Epoch [20]/300 , Loss : 0.7490
Epoch [30]/300 , Loss : 0.5819
Epoch [40]/300 , Loss : 0.4421
Epoch [50]/300 , Loss : 0.3183
Epoch [60]/300 , Loss : 0.2122
Epoch [70]/300 , Loss : 0.1375
Epoch [80]/300 , Loss : 0.0864
Epoch [90]/300 , Loss : 0.0560
Epoch [100]/300 , Loss : 0.0385
Epoch [110]/300 , Loss : 0.0284
Epoch [120]/300 , Loss : 0.0220
Epoch [130]/300 , Loss : 0.0177
Epoch [140]/300 , Loss : 0.0146
Epoch [150]/300 , Loss : 0.0124
Epoch [160]/300 , Loss : 0.0106
Epoch [170]/300 , Loss : 0.0093
Epoch [180]/300 , Loss : 0.0082
Epoch [190]/300 , Loss : 0.0073
Epoch [200]/300 , Loss : 0.0065
Epoch [210]/300 , Loss : 0.0059
Epoch [220]/300 , Loss : 0.0054
Epoch [230]/300 , Loss : 0.0049
Epoch [240]/300 , Loss : 0.0045
Epoch [250]/300 , Loss : 0.0041
Epoch [260]/300 , Loss : 0.0038
Epoch [270]/300 , Loss : 0.0035
Epoch [280]/300 , Loss : 0.0033
Epoch [290]/300 , Loss : 0.0031
Epoch [300]/300 , Loss : 0.0029


#### <b><U>Linear Regression Model using Pytorch Components
 </U> </b>

1. Data gathering
2. Data preprocessing
3. Feature engineering
4. Model training
5. Testing



In [8]:
!pip install kagglehub


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.1.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [9]:
import kagglehub
import os
import shutil

# Download dataset (cached location by kagglehub)
path = kagglehub.dataset_download("mirichoi0218/insurance")
print(f"Downloaded dataset path: {path}")

# Define your custom target directory
target_dir = "./data/insurance"

# Make sure the directory exists
os.makedirs(target_dir, exist_ok=True)

# Copy all files from kagglehub cache to your custom directory
for file_name in os.listdir(path):
    src = os.path.join(path, file_name)
    dst = os.path.join(target_dir, file_name)
    shutil.copy2(src, dst)  # copy2 preserves metadata

print(f"Dataset copied to: {target_dir}")

  from .autonotebook import tqdm as notebook_tqdm


Downloaded dataset path: /home/codespace/.cache/kagglehub/datasets/mirichoi0218/insurance/versions/1
Dataset copied to: ./data/insurance


In [10]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

In [11]:
df = pd.read_csv('./data/insurance/insurance.csv')

In [12]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1338 entries, 0 to 1337
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       1338 non-null   int64  
 1   sex       1338 non-null   object 
 2   bmi       1338 non-null   float64
 3   children  1338 non-null   int64  
 4   smoker    1338 non-null   object 
 5   region    1338 non-null   object 
 6   charges   1338 non-null   float64
dtypes: float64(2), int64(2), object(3)
memory usage: 73.3+ KB


In [13]:
df.describe()

Unnamed: 0,age,bmi,children,charges
count,1338.0,1338.0,1338.0,1338.0
mean,39.207025,30.663397,1.094918,13270.422265
std,14.04996,6.098187,1.205493,12110.011237
min,18.0,15.96,0.0,1121.8739
25%,27.0,26.29625,0.0,4740.28715
50%,39.0,30.4,1.0,9382.033
75%,51.0,34.69375,2.0,16639.912515
max,64.0,53.13,5.0,63770.42801


In [14]:
# Split dataset before encoding
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
print(train_df.head())

      age     sex    bmi  children smoker     region      charges
560    46  female  19.95         2     no  northwest   9193.83850
1285   47  female  24.32         0     no  northeast   8534.67180
1142   52  female  24.86         0     no  southeast  27117.99378
969    39  female  34.32         5     no  southeast   8596.82780
486    54  female  21.47         3     no  northwest  12475.35130


In [15]:
# Encode cetagorical variable
label_encoder = {}
for col in ["sex", "smoker", "region"]:
    le = LabelEncoder()
    train_df[col] = le.fit_transform(train_df[col])
    test_df[col] = le.transform(test_df[col])
    label_encoder[col] = le
    print(f"Classes for {col}: {le.classes_}")

print("Final label_encoder keys:", label_encoder.values())
print(train_df.head())

Classes for sex: ['female' 'male']
Classes for smoker: ['no' 'yes']
Classes for region: ['northeast' 'northwest' 'southeast' 'southwest']
Final label_encoder keys: dict_values([LabelEncoder(), LabelEncoder(), LabelEncoder()])
      age  sex    bmi  children  smoker  region      charges
560    46    0  19.95         2       0       1   9193.83850
1285   47    0  24.32         0       0       0   8534.67180
1142   52    0  24.86         0       0       2  27117.99378
969    39    0  34.32         5       0       2   8596.82780
486    54    0  21.47         3       0       1  12475.35130


In [16]:
# Features and target
X_train = train_df.drop(columns=["charges"]) # Here we have excluded the charges column
y_train = train_df["charges"]

X_test = test_df.drop(columns=["charges"])
y_test = test_df["charges"]

In [17]:
print("X_train :\n",X_train.head())
print("y_train :\n",y_train.head())
print(y_train.values)

X_train :
       age  sex    bmi  children  smoker  region
560    46    0  19.95         2       0       1
1285   47    0  24.32         0       0       0
1142   52    0  24.86         0       0       2
969    39    0  34.32         5       0       2
486    54    0  21.47         3       0       1
y_train :
 560      9193.83850
1285     8534.67180
1142    27117.99378
969      8596.82780
486     12475.35130
Name: charges, dtype: float64
[ 9193.8385   8534.6718  27117.99378 ... 11931.12525 46113.511
 10214.636  ]


In [18]:
# Normalize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [19]:
print(X_train)

[[ 0.47222651 -1.0246016  -1.75652513  0.73433626 -0.50874702 -0.45611589]
 [ 0.54331294 -1.0246016  -1.03308239 -0.91119211 -0.50874702 -1.35325561]
 [ 0.8987451  -1.0246016  -0.94368672 -0.91119211 -0.50874702  0.44102382]
 ...
 [ 1.3252637   0.97598911 -0.89153925 -0.91119211 -0.50874702 -1.35325561]
 [-0.16755139 -1.0246016   2.82086429  0.73433626  1.96561348  1.33816354]
 [ 1.1120044   0.97598911 -0.10932713 -0.91119211 -0.50874702  1.33816354]]


In [20]:
# Convert to tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

In [21]:
print(y_train_tensor)
print(y_train_tensor.shape)

tensor([[ 9193.8389],
        [ 8534.6719],
        [27117.9941],
        ...,
        [11931.1250],
        [46113.5117],
        [10214.6357]])
torch.Size([1070, 1])


In [22]:
print(X_test_tensor)
print(X_test_tensor.shape)

tensor([[ 0.4011, -1.0246, -0.8915,  0.7343, -0.5087, -1.3533],
        [-0.2386, -1.0246, -0.0895, -0.9112, -0.5087, -0.4561],
        [ 1.7518, -1.0246, -0.6085, -0.9112,  1.9656, -0.4561],
        ...,
        [-0.0965,  0.9760, -0.4197, -0.0884, -0.5087, -1.3533],
        [ 1.0409, -1.0246,  2.7894, -0.9112,  1.9656,  0.4410],
        [ 0.8277, -1.0246,  0.6025, -0.0884, -0.5087,  1.3382]])
torch.Size([268, 6])


In [23]:
# Define Neural network model


class SimpleNNRegressionModel(nn.Module):
    def __init__(self, input_dim):
        super(SimpleNNRegressionModel, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128, 1),
        )

    def forward(self, x):
        return self.network(x)

In [24]:
X_train_tensor.shape

torch.Size([1070, 6])

In [25]:
input_dim = X_train_tensor.shape[1]
model = SimpleNNRegressionModel(input_dim)

In [26]:
print(model)

SimpleNNRegressionModel(
  (network): Sequential(
    (0): Linear(in_features=6, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=128, bias=True)
    (3): ReLU()
    (4): Linear(in_features=128, out_features=1, bias=True)
  )
)


In [27]:
# Loss and optmiser

criterion = nn.MSELoss()
optimiser = optim.Adam(model.parameters(), lr=0.01)

In [28]:
# Training loop
epochs = 30000

for epoch in range(epochs):
    model.train()
    optimiser.zero_grad()
    predictions = model(X_train_tensor)
    loss = criterion(predictions, y_train_tensor)
    loss.backward()

    optimiser.step()

    if (epoch + 1) % 100 == 0:
        print(f"Epoch [{epoch+1}/{epochs}], Loss : {loss.item():.4f}")

Epoch [100/30000], Loss : 45442196.0000
Epoch [200/30000], Loss : 31621372.0000


Epoch [300/30000], Loss : 28928400.0000
Epoch [400/30000], Loss : 27083222.0000
Epoch [500/30000], Loss : 26010146.0000
Epoch [600/30000], Loss : 25244390.0000
Epoch [700/30000], Loss : 24601858.0000
Epoch [800/30000], Loss : 24110354.0000
Epoch [900/30000], Loss : 23748598.0000
Epoch [1000/30000], Loss : 23458052.0000
Epoch [1100/30000], Loss : 23227280.0000
Epoch [1200/30000], Loss : 22999318.0000
Epoch [1300/30000], Loss : 22795648.0000
Epoch [1400/30000], Loss : 22596582.0000
Epoch [1500/30000], Loss : 22412480.0000
Epoch [1600/30000], Loss : 22156996.0000
Epoch [1700/30000], Loss : 21945278.0000
Epoch [1800/30000], Loss : 21756192.0000
Epoch [1900/30000], Loss : 21586256.0000
Epoch [2000/30000], Loss : 21383810.0000
Epoch [2100/30000], Loss : 21012632.0000
Epoch [2200/30000], Loss : 20480908.0000
Epoch [2300/30000], Loss : 19696538.0000
Epoch [2400/30000], Loss : 18976982.0000
Epoch [2500/30000], Loss : 18401600.0000
Epoch [2600/30000], Loss : 17993406.0000
Epoch [2700/30000], Los

In [29]:
# Model Evaluation

model.eval()
y_pred = model(X_test_tensor).detach().numpy()

In [30]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

y_test_numpy = y_test_tensor.numpy()

# Calculate metrics
mse = mean_squared_error(y_test_numpy, y_pred)
rmse = mse**0.5
mae = mean_absolute_error(y_test_numpy, y_pred)
r2 = r2_score(y_test_numpy, y_pred)

print(f"MSE : {mse}")
print(f"RMSE : {rmse}")
print(f"MAE : {mae}")
print(f"R2-Score : {r2}")

# 0 --> 0

MSE : 50296648.0
RMSE : 7092.012972351362
MAE : 4935.44580078125
R2-Score : 0.676025390625


In [31]:
def predict_charges(age, sex, bmi, children, smoker, region):
    input_data = pd.DataFrame(
        [[age, sex, bmi, children, smoker, region]],
        columns=["age", "sex", "bmi", "children", "smoker", "region"],
    )

    for col in ["sex", "smoker", "region"]:
        input_data[col] = label_encoder[col].transform(input_data[col])
    input_data = scaler.transform(input_data)
    input_tensor = torch.tensor(input_data, dtype=torch.float32)
    predicted_charge = model(input_tensor).item()
    return predicted_charge

In [32]:
predicted = predict_charges(50, "female", 27.9, 0, "yes", "southwest")
print(f"Predicted insurance charge: ${predicted:.2f}")

Predicted insurance charge: $39509.46
