# Neural Network for Regression with Abalone Dataset

In [20]:
## Imports 
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

## Load Data
train = pd.read_csv('data/reduced_new_train.csv', index_col='id')
# test = pd.read_csv('data/test.csv', index_col='id')

X = train.drop('Rings', axis=1)
y = train['Rings']

X = pd.get_dummies(X)
# test = pd.get_dummies(test)

cuda


In [21]:

class RegressionNN(nn.Module):
    def __init__(self, input_size):
        super(RegressionNN, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.bn1 = nn.BatchNorm1d(num_features=128)
        self.dropout1 = nn.Dropout(0.1)
        
        self.fc2 = nn.Linear(128, 512)
        self.bn2 = nn.BatchNorm1d(num_features=512)
        self.dropout2 = nn.Dropout(0.2)
        
        self.fc3 = nn.Linear(512, 512)
        self.bn3 = nn.BatchNorm1d(num_features=512)
        self.dropout3 = nn.Dropout(0.2)
        
        self.fc4 = nn.Linear(512, 128)
        self.bn4 = nn.BatchNorm1d(num_features=128)
        self.dropout4 = nn.Dropout(0.1)
        
        self.output = nn.Linear(128, 1)

    def forward(self, x):
        x = F.relu(self.bn1(self.fc1(x)))
        x = self.dropout1(x)
        
        x = F.relu(self.bn2(self.fc2(x)))
        x = self.dropout2(x)
        
        x = F.relu(self.bn3(self.fc3(x)))
        x = self.dropout3(x)
        
        x = F.relu(self.bn4(self.fc4(x)))
        x = self.dropout4(x)
        
        x = self.output(x)
        return x
    
    
def rmsle(y_true, y_pred):
    # Ensure predictions are non-negative
    y_pred = torch.clamp(y_pred, 0, None)
    log_true = torch.log(y_true + 1)
    log_pred = torch.log(y_pred + 1)
    return torch.sqrt(torch.mean(torch.square(log_pred - log_true)))

In [22]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Example dataset loading and preprocessing
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# It's a good practice to scale your data for neural network models
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val.values, dtype=torch.float32).view(-1, 1)

# Create TensorDatasets and DataLoaders
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_val_tensor, y_val_tensor)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)


In [23]:

model = RegressionNN(input_size=X_train.shape[1]).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)

def rmsle(y_pred, y_true):
    # Ensure predictions are non-negative
    y_pred = torch.clamp(y_pred, 0, None)
    log_true = torch.log(y_true + 1)
    log_pred = torch.log(y_pred + 1)
    return torch.sqrt(torch.mean(torch.square(log_pred - log_true)))


# Training loop
model.train()
for epoch in range(100):  # Number of epochs
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)

        # Forward pass
        predictions = model(inputs)
        loss = rmsle(predictions, targets)

        # Backward pass and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch+1}, Loss: {loss.item()}")

Epoch 1, Loss: 0.1672517955303192


KeyboardInterrupt: 

In [None]:
model.eval()  # Set the model to evaluation mode
total_rmsle = 0
with torch.no_grad():
    for inputs, targets in val_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        predictions = model(inputs)
        rmsle_val = rmsle(targets, predictions)
        total_rmsle += rmsle_val.item()

average_rmsle = total_rmsle / len(val_loader)
print(f"Validataion RMSLE: {average_rmsle}")


Validataion RMSLE: 0.1531708564206555
