In [1]:
import numpy as np
import pandas as pd
import sklearn as sk
from sklearn.preprocessing import StandardScaler

In [2]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))
def sigmoid_derivative(x):
    return x * (1 - x)
def relu(x):
    return np.maximum(0, x)
def relu_derivative(x):
    return np.where(x > 0, 1, 0)

In [3]:
def MSELoss(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

In [4]:
class Layer:
    def __init__(self, input_size, output_size, activation = 'sigmoid'):
        self.activation = activation
        self.weights = np.random.rand(input_size, output_size) * np.sqrt(2. / input_size) 
        self.biases = np.random.randn(1, output_size) * 0.01
    
    def forward(self, inputs):
        self.input = inputs
        self.z = np.dot(inputs, self.weights) + self.biases
        if self.activation == 'sigmoid':
            self.output = sigmoid(self.z)
        elif self.activation == 'relu':
            self.output = relu(self.z)
        if self.activation == 'linear':
            self.output = self.z
        return self.output
    
    def backward(self, output_error, learning_rate):
        if self.activation == 'sigmoid':
            activation_gradient = sigmoid_derivative(self.output)
        elif self.activation == 'relu':
            activation_gradient = relu_derivative(self.output)
        else:
            activation_gradient = np.ones_like(self.output)
        

        
        input_gradient = np.dot(output_error * activation_gradient, self.weights.T)
        weights_gradient = np.dot(self.input.T, output_error * activation_gradient)

        max_grad = 1.0
        weights_gradient = np.clip(weights_gradient, -max_grad, max_grad)
        input_gradient = np.clip(input_gradient, -max_grad, max_grad)
        self.weights -= learning_rate * weights_gradient
        self.biases -= learning_rate * np.sum(output_error * activation_gradient, axis=0, keepdims=True)
    
        return input_gradient
    

In [5]:
class NeuralNetwork:
    def __init__(self, layers):
        self.layers = layers

    def forward(self, inputs):
        for layer in self.layers:
            inputs = layer.forward(inputs)
        return inputs

    def backward(self, y_true, y_pred, learning_rate):
        output_error =  - y_true + y_pred
        for layer in reversed(self.layers):
            output_error = layer.backward(output_error, learning_rate)

    def train(self, X, y, epochs, learning_rate):
        for epoch in range(epochs):
            y_pred = self.forward(X)
            self.backward(y, y_pred, learning_rate)
            
            if epoch % 100 == 0:
                loss = np.mean(np.square(y - y_pred))
                print(f'Epoch {epoch}, Loss: {loss}')

    def predict(self, X):
        return self.forward(X)
    
    def Loss(self, y_true, y_pred):
        return MSELoss(y_true, y_pred)
    

In [6]:
df = pd.read_csv('insurance (1).csv')
print(df.head())

   age     sex     bmi  children smoker     region      charges
0   19  female  27.900         0    yes  southwest  16884.92400
1   18    male  33.770         1     no  southeast   1725.55230
2   28    male  33.000         3     no  southeast   4449.46200
3   33    male  22.705         0     no  northwest  21984.47061
4   32    male  28.880         0     no  northwest   3866.85520


In [7]:
df = pd.get_dummies(df, columns=["sex", "smoker", "region"], drop_first=True)

In [8]:
target_col = 'charges'
y = df[target_col].astype(int)
X = df.drop(columns=[target_col])

In [9]:
scaler = StandardScaler()
numeric_cols = ['age', 'bmi', 'children']
X_scaled_numeric = scaler.fit_transform(X[numeric_cols])
bool_cols = ['sex_male', 'smoker_yes', 'region_northwest', 'region_southeast', 'region_southwest']

region_map = {
    'region_northwest': 1,
    'region_southeast': 2,
    'region_southwest': 3
}
region_cols = ['region_northwest', 'region_southeast', 'region_southwest']

def get_region(row):
    for col, val in region_map.items():
        if row[col]:
            return val
    return 0

for col in bool_cols + region_cols:
    X[col] = X[col].astype(int)
X['region'] = X[region_cols].apply(get_region, axis=1)
bool_cols = ['sex_male', 'smoker_yes']
X_scaled = pd.DataFrame(X_scaled_numeric, columns=numeric_cols, index=X.index)
X_scaled[bool_cols] = X[bool_cols]
X_scaled['region'] = X['region']
print(X_scaled.head())

        age       bmi  children  sex_male  smoker_yes  region
0 -1.438764 -0.453320 -0.908614         0           1       3
1 -1.509965  0.509621 -0.078767         1           0       2
2 -0.797954  0.383307  1.580926         1           0       2
3 -0.441948 -1.305531 -0.908614         1           0       1
4 -0.513149 -0.292556 -0.908614         1           0       1


In [10]:
print(X_scaled.shape)
print(y.shape)

(1338, 6)
(1338,)


In [11]:
X_train = X_scaled
y_train = y
print(X_train.shape)
print(y_train.shape)

(1338, 6)
(1338,)


In [12]:
X_check = X_scaled.sample(frac=0.2, random_state=42)
# Split X_train and y_train into 60% train, 20% validation, 20% check
# First, split 20% for check set
X_check = X_train.sample(frac=0.2, random_state=42)
X_train_remain = X_train.drop(X_check.index)
y_check = y_train.loc[X_check.index]
y_train_remain = y_train.drop(X_check.index)


X_train = X_train_remain
y_train = y_train_remain
y_check = y.loc[X_check.index]

In [13]:
print(X_train.shape)
print(y_train.shape)

(1070, 6)
(1070,)


In [14]:
print(X_check.shape)
print(y_check.shape)

(268, 6)
(268,)


In [15]:
Layer1 = Layer(input_size=X_train.shape[1], output_size=128, activation='relu')
Layer2 = Layer(input_size=128, output_size=64, activation='relu')
Layer3 = Layer(input_size=64, output_size=32, activation='relu')
Layer4 = Layer(input_size=32, output_size=1, activation='linear')
model = NeuralNetwork(layers=[Layer1, Layer2, Layer3, Layer4])

In [16]:
X_train = np.array(X_train)
y_train = np.array(y_train).reshape(-1, 1)
print(X_train)

[[-1.43876426 -0.45332    -0.90861367  0.          1.          3.        ]
 [-1.50996545  0.5096211  -0.07876719  1.          0.          2.        ]
 [-0.79795355  0.38330685  1.58092576  1.          0.          2.        ]
 ...
 [-1.50996545  1.0148781  -0.90861367  0.          0.          2.        ]
 [-1.29636188 -0.79781341 -0.90861367  0.          0.          3.        ]
 [ 1.55168573 -0.26138796 -0.90861367  0.          1.          1.        ]]


In [17]:
X_check = np.array(X_check)
y_check = np.array(y_check).reshape(-1, 1)

In [18]:
model.train(X_train, y_train, epochs=1000, learning_rate=0.001)

Epoch 0, Loss: 318884497.8300604


Epoch 100, Loss: 142385384.23501605
Epoch 200, Loss: 132092914.93735643
Epoch 300, Loss: 110163284.23413542
Epoch 400, Loss: 81357646.4162517
Epoch 500, Loss: 61057812.99806573
Epoch 600, Loss: 47198588.13036938
Epoch 700, Loss: 41525602.56426427
Epoch 800, Loss: 40675128.94408897
Epoch 900, Loss: 41623505.34882893


In [19]:
print(model.Loss(y_check, model.predict(X_check)))

43261530.95714483
