## 1. Build the neural network

In [13]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define a simple neural network class
class SimpleNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

## 2. Initialize the model

In [14]:
input_size = 9
hidden_size = 10
output_size = 1  # For regression, output_size is typically 1

# Instantiate the model
model = SimpleNN(input_size, hidden_size, output_size)

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

## 3. Import data

In [20]:
import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.read_csv("insurance.csv")

# Perform one-hot encoding
encoded_df = pd.get_dummies(df, columns=['sex','smoker','region'], prefix=['sex','smoker','region'])
columns_to_convert = encoded_df.columns[encoded_df.columns.str.contains('sex_|smoker_|region_')]
encoded_df[columns_to_convert] = encoded_df[columns_to_convert].astype(int)

# Split the data into 80% training and 20% testing
X_train, X_test, y_train, y_test = train_test_split(encoded_df.drop('charges', axis=1), encoded_df['charges'], test_size=0.2, random_state=42)

In [22]:
X_train_tensor = torch.FloatTensor(X_train.values)
y_train_tensor = torch.FloatTensor(y_train.values)

num_epochs = 1000

for epoch in range(num_epochs):
    # Forward pass
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Print the loss every 100 epochs
    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

RuntimeError: mat1 and mat2 shapes cannot be multiplied (1070x11 and 9x10)

## 5. Test the model

In [None]:
from sklearn.metrics import mean_squared_error

X_test_tensor = torch.FloatTensor(X_test.values)
y_test_tensor = torch.FloatTensor(y_test.values)

# Set the model to evaluation mode
model_nn.eval()

# Make predictions on the test data
with torch.no_grad():
    y_pred = model(X_test_tensor)

# Convert predictions and true values to numpy arrays
y_pred_np = y_pred.numpy()
y_test_np = y_test_tensor.numpy()

# Calculate RMSE
rmse = mean_squared_error(y_test_np, y_pred_np, squared=False)
print(f'RMSE on test data: {rmse:.4f}')

In [19]:
print(encoded_df.head())

   age     bmi  children      charges  sex_female  sex_male  smoker_no  \
0   19  27.900         0  16884.92400           1         0          0   
1   18  33.770         1   1725.55230           0         1          1   
2   28  33.000         3   4449.46200           0         1          1   
3   33  22.705         0  21984.47061           0         1          1   
4   32  28.880         0   3866.85520           0         1          1   

   smoker_yes  region_northeast  region_northwest  region_southeast  \
0           1                 0                 0                 0   
1           0                 0                 0                 1   
2           0                 0                 0                 1   
3           0                 0                 1                 0   
4           0                 0                 1                 0   

   region_southwest  
0                 1  
1                 0  
2                 0  
3                 0  
4                 