In [5]:
#load file one_hot_encoded.csv
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split


df = pd.read_csv('one_hot_encoded.csv')
#extract the 'claim' column
y = df['claim']
#extract the feature columns
X = df.drop(columns=['claim'])


#extract the feature names
feature_names = X.columns



#split the data into training and testing sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=69)
#create a pandas dataframe with X-train
X_train_pandas = pd.DataFrame(X_train, columns=feature_names)
# Second split: Split the 40% temporary set into 25% test and 15% evaluation
X_eval, X_test, y_eval, y_test = train_test_split(X_temp, y_temp, test_size=(0.25/0.4), random_state=69)

# Create pandas dataframes for the train, evaluation and test sets
X_train_pandas = pd.DataFrame(X_train, columns=feature_names)
X_eval_pandas = pd.DataFrame(X_eval, columns=feature_names)
X_test_pandas = pd.DataFrame(X_test, columns=feature_names)

#standardize the data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(X_train)

#scale the data
X_train_scaled = scaler.transform(X_train)
X_eval_scaled = scaler.transform(X_eval)
X_test_scaled = scaler.transform(X_test)

#Create pandas dataframes for the train, evaluation and test sets
X_train_scaled_pandas = pd.DataFrame(X_train_scaled, columns=feature_names)
X_eval_scaled_pandas = pd.DataFrame(X_eval_scaled, columns=feature_names)
X_test_scaled_pandas = pd.DataFrame(X_test_scaled, columns=feature_names)




In [6]:
#convert the data to tensor
import torch
X_train_scaled_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
X_eval_scaled_tensor = torch.tensor(X_eval_scaled, dtype=torch.float32)
X_test_scaled_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)

In [8]:
#Create a tensor for the target variable
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)
#display y_train_tensor
print(y_train_tensor)
y_eval_tensor = torch.tensor(y_eval.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)

tensor([18246.5000,  6184.2998,  9748.9004,  ...,  4934.7002,  8824.0000,
        12129.5996])


In [35]:
#create train loader
from torch.utils.data import TensorDataset, DataLoader
train_data_tensor_dataset = TensorDataset(X_train_scaled_tensor, y_train_tensor)
train_loader = DataLoader(train_data_tensor_dataset, batch_size=64, shuffle=True)

#Create evaluation loader
eval_data_tensor_dataset = TensorDataset(X_eval_scaled_tensor, y_eval_tensor)
eval_loader = DataLoader(eval_data_tensor_dataset, batch_size=64, shuffle=True)

#Create test loader
test_data_tensor_dataset = TensorDataset(X_test_scaled_tensor, y_test_tensor)
test_loader = DataLoader(test_data_tensor_dataset, batch_size=64, shuffle=True)

In [37]:
#Create an ANN model in pytorch with the following architecture:
#Input layer: 146 nodes
#Amount of hidden layers: 35
#Amount of nodes in each hidden layer: 110
#Output layer: 1 node
#Activation function for hidden layer: Leaky ReLU
#Activation function for output layer: Linear
#Loss function: Mean Squared Error
#Optimizer: AdamW
#Learning rate: 0.001
#Regularization: L2 with 0.3 weight decay
#Batch size: 64
#Epochs: 300
#Note: The model should be trained on the training set and evaluated on the evaluation set
#Note: The model should be tested on the test set
#metrics: MSE, RMSE, MAE, MAPE
#Note: model should train as fast as possible

import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Define your custom neural network
class CustomNeuralNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CustomNeuralNetwork, self).__init__()
        self.hidden_layers = nn.ModuleList([
            nn.Sequential(
                nn.Linear(input_size if i == 0 else hidden_size, hidden_size),
                nn.LeakyReLU(negative_slope=0.01)
            )
            for i in range(35)
        ])
        self.output_layer = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        for layer in self.hidden_layers:
            x = layer(x)
        output = self.output_layer(x)
        return output

# Set hyperparameters
input_size = 146
hidden_size = 110
output_size = 1
learning_rate = 0.001
weight_decay = 0.3
batch_size = 64
num_epochs = 300

# Instantiate the model
model = CustomNeuralNetwork(input_size, hidden_size, output_size)

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

# Assuming you have train_loader, eval_loader, and test_loader
for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        outputs = torch.squeeze(outputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    # Evaluate on the evaluation set
    model.eval()
    eval_loss = 0.0
    eval_predictions = []
    with torch.no_grad():
        for eval_inputs, eval_labels in eval_loader:
            eval_outputs = model(eval_inputs)
            eval_loss += criterion(eval_outputs, eval_labels).item()
            eval_predictions.extend(eval_outputs.numpy())
  

    print(f"Epoch [{epoch+1}/{num_epochs}] - Train Loss: {train_loss/len(train_loader):.4f}")

# Test the model on the training set
model.eval()
train_loss = 0.0
train_predictions = []
with torch.no_grad():
    for train_inputs, train_labels in train_loader:
        train_outputs = model(train_inputs)
        train_loss += criterion(train_outputs, train_labels).item()
        train_predictions.extend(train_outputs.numpy())

train_mse = mean_squared_error(train_labels, train_predictions)
train_rmse = mean_squared_error(train_labels, train_predictions, squared=False)
train_mae = mean_absolute_error(train_labels, train_predictions)
train_mape = torch.mean(torch.abs((train_labels - train_predictions) / train_labels)) * 100

print(f"Train Loss: {train_loss/len(train_loader):.4f} - Train MSE: {train_mse:.4f}, RMSE: {train_rmse:.4f}, MAE: {train_mae:.4f}, MAPE: {train_mape:.2f}%")

# Test the model on the evaluation set
model.eval()
eval_loss = 0.0
eval_predictions = []
with torch.no_grad():
    for eval_inputs, eval_labels in eval_loader:
        eval_outputs = model(eval_inputs)
        eval_loss += criterion(eval_outputs, eval_labels).item()
        eval_predictions.extend(eval_outputs.numpy())
eval_mse = mean_squared_error(eval_labels, eval_predictions)
eval_rmse = mean_squared_error(eval_labels, eval_predictions, squared=False)
eval_mae = mean_absolute_error(eval_labels, eval_predictions)
eval_mape = torch.mean(torch.abs((eval_labels - eval_predictions) / eval_labels)) * 100

print(f"Eval Loss: {eval_loss/len(eval_loader):.4f} - Eval MSE: {eval_mse:.4f}, RMSE: {eval_rmse:.4f}, MAE: {eval_mae:.4f}, MAPE: {eval_mape:.2f}%")

# Test the model on the test set
model.eval()
test_loss = 0.0
test_predictions = []
with torch.no_grad():
    for test_inputs, test_labels in test_loader:
        test_outputs = model(test_inputs)
        test_loss += criterion(test_outputs, test_labels).item()
        test_predictions.extend(test_outputs.numpy())
test_mse = mean_squared_error(test_labels, test_predictions)
test_rmse = mean_squared_error(test_labels, test_predictions, squared=False)
test_mae = mean_absolute_error(test_labels, test_predictions)
test_mape = torch.mean(torch.abs((test_labels - test_predictions) / test_labels)) * 100

print(f"Test Loss: {test_loss/len(test_loader):.4f} - Test MSE: {test_mse:.4f}, RMSE: {test_rmse:.4f}, MAE: {test_mae:.4f}, MAPE: {test_mape:.2f}%")

















  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch [1/300] - Train Loss: 201925343.6250
Epoch [2/300] - Train Loss: 149882941.9062
Epoch [3/300] - Train Loss: 147820715.5625
Epoch [4/300] - Train Loss: 148148253.8438
Epoch [5/300] - Train Loss: 148712984.1875
Epoch [6/300] - Train Loss: 147712663.5625
Epoch [7/300] - Train Loss: 146586535.1250
Epoch [8/300] - Train Loss: 147364875.6562
Epoch [9/300] - Train Loss: 146594922.2500
Epoch [10/300] - Train Loss: 145262070.3125
Epoch [11/300] - Train Loss: 145567210.4375
Epoch [12/300] - Train Loss: 146639308.1875
Epoch [13/300] - Train Loss: 146116047.1250
Epoch [14/300] - Train Loss: 148610977.1562
Epoch [15/300] - Train Loss: 148176688.6875
Epoch [16/300] - Train Loss: 146249785.7188
Epoch [17/300] - Train Loss: 146555098.1562
Epoch [18/300] - Train Loss: 145172882.3125
Epoch [19/300] - Train Loss: 147524331.4375
Epoch [20/300] - Train Loss: 147546299.7500
Epoch [21/300] - Train Loss: 145545870.6250
Epoch [22/300] - Train Loss: 146199573.5625
Epoch [23/300] - Train Loss: 145629733.43

KeyboardInterrupt: 