## 1. Build the neural network

In [13]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define a simple neural network class
class SimpleNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

## 2. Initialize the model

In [40]:
input_size = 11
hidden_size = 10
output_size = 1  # For regression, output_size is typically 1

# Instantiate the model
model_nn = SimpleNN(input_size, hidden_size, output_size)

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=1)

## 3. Import data

In [34]:
import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.read_csv("insurance.csv")

# Perform one-hot encoding
encoded_df = pd.get_dummies(df, columns=['sex','smoker','region'], prefix=['sex','smoker','region'])
columns_to_convert = encoded_df.columns[encoded_df.columns.str.contains('sex_|smoker_|region_')]
encoded_df[columns_to_convert] = encoded_df[columns_to_convert].astype(int)

# Split the data into 80% training and 20% testing
X_train, X_test, y_train, y_test = train_test_split(encoded_df.drop('charges', axis=1), encoded_df['charges'], test_size=0.2, random_state=42)

In [42]:
X_train_tensor = torch.FloatTensor(X_train.values)
y_train_tensor = torch.FloatTensor(y_train.values).view(-1)

num_epochs = 10000

for epoch in range(num_epochs):
    # Forward pass
    outputs = model_nn(X_train_tensor)
    loss = criterion(outputs.view(-1), y_train_tensor)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Print the loss every 100 epochs
    if (epoch + 1) % 1000 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [1000/10000], Loss: 322453120.0000
Epoch [2000/10000], Loss: 322453120.0000
Epoch [3000/10000], Loss: 322453120.0000
Epoch [4000/10000], Loss: 322453120.0000
Epoch [5000/10000], Loss: 322453120.0000
Epoch [6000/10000], Loss: 322453120.0000
Epoch [7000/10000], Loss: 322453120.0000
Epoch [8000/10000], Loss: 322453120.0000
Epoch [9000/10000], Loss: 322453120.0000
Epoch [10000/10000], Loss: 322453120.0000


## 5. Test the model

In [32]:
from sklearn.metrics import mean_squared_error

X_test_tensor = torch.FloatTensor(X_test.values)
y_test_tensor = torch.FloatTensor(y_test.values)

# Set the model to evaluation mode
model_nn.eval()

# Make predictions on the test data
with torch.no_grad():
    y_pred = model(X_test_tensor)

# Convert predictions and true values to numpy arrays
y_pred_np = y_pred.numpy()
y_test_np = y_test_tensor.numpy()

# Calculate RMSE
rmse = mean_squared_error(y_test_np, y_pred_np, squared=False)
print(f'RMSE on test data: {rmse:.4f}')

RMSE on test data: 12465.6094


In [26]:
print(X_train.head())

      age    bmi  children  sex_female  sex_male  smoker_no  smoker_yes  \
560    46  19.95         2           1         0          1           0   
1285   47  24.32         0           1         0          1           0   
1142   52  24.86         0           1         0          1           0   
969    39  34.32         5           1         0          1           0   
486    54  21.47         3           1         0          1           0   

      region_northeast  region_northwest  region_southeast  region_southwest  
560                  0                 1                 0                 0  
1285                 1                 0                 0                 0  
1142                 0                 0                 1                 0  
969                  0                 0                 1                 0  
486                  0                 1                 0                 0  
