Simple feedforward neural net for tabular regression 

In [2]:
import numpy as np
from numpy import genfromtxt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import math 
from sklearn.preprocessing import PolynomialFeatures

# Load data into a Pandas DataFrame
x_data = genfromtxt('../data/embs/embsNMR-brooke/1layerNN-1L1NLbias258/CembsNPMRDNMR.csv',delimiter=',',encoding='utf-8-sig',skip_header=1)

# Split data into X and y
X = x_data[:,0:128]
#128 --> pka & nmr
#138 onwards --> electron density 
y = x_data[:,128]

#label to split as well
label = x_data[:,129:]

# Split the dataset into training and testing sets
X_train, X_valtest, y_train, y_valtest, label_train, label_valtest = train_test_split(X, y, label, test_size=0.25, random_state=50)
# Split the dataset into training and testing sets
X_val, X_test, y_val, y_test, label_val, label_test = train_test_split(X_valtest, y_valtest, label_valtest, test_size=0.5, random_state=42)

#input features
n_features = 128

#Save file directory
save_dir = '../data/embs/embspKA-stef/1layerNN-1L1NLbias258/'


Define pytorch model layers, number of nodes in each layer, activation,...

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import numpy as np
from numpy import genfromtxt, savetxt


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        self.lin1 = nn.Linear(n_features, 128)
        self.lin2 = nn.Linear(128, 1)

    def forward(self, x):
        x = F.relu(self.lin1(x))
        x = self.lin2(x)
        
        return x

net = Net()
print(net)

params = list(net.parameters())

print(len(params[0])+ len(params[1])+len(params[2])+len(params[3]))




Net(
  (lin1): Linear(in_features=128, out_features=128, bias=True)
  (lin2): Linear(in_features=128, out_features=1, bias=True)
)
258


Hyperparameters
Optimizer & Loss

In [4]:
import torch.optim as optim

lr = 0.0001
n_epochs = 10000

loss_fn = nn.MSELoss()
optimizer = optim.Adam(net.parameters(), lr=lr)

Run Training on Defined Model

In [5]:
from sklearn.metrics import r2_score

# For each epoch...
trainloss_vs_epochs = []
valloss_vs_epochs = []

for epoch in range(n_epochs):
    net.train()  # Set the model to training mode
    optimizer.zero_grad()  # Zero the gradients

    # Convert data to PyTorch tensors
    inputs = torch.from_numpy(X_train).float()
    targets = torch.from_numpy(y_train).view(-1, 1).float()

    # Forward pass
    outputs = net(inputs)
    loss = loss_fn(outputs, targets)

    # Backward pass and optimization
    loss.backward()
    optimizer.step()



    if epoch % 50 == 0:
        # Track training loss
        trainloss_vs_epochs.append([epoch,math.sqrt(loss.detach().item())])
        print(f"Epoch {epoch+1}/{n_epochs}, Loss: {loss.item()}")
        print('Training R2', r2_score(outputs.detach().numpy(), targets.detach().numpy()))

    # Validation
    if epoch % 50 == 0:
        net.eval()  # Set the model to evaluation mode
        val_inputs = torch.from_numpy(X_val).float()
        val_targets = torch.from_numpy(y_val).view(-1, 1).float()

        val_outputs = net(val_inputs)
        val_loss = loss_fn(val_outputs, val_targets)
        valloss_vs_epochs.append([epoch,math.sqrt(val_loss.item())])

        print(f"Validation Loss: {val_loss.detach().item()}")
# Evaluate on the test set
net.eval()
test_inputs = torch.from_numpy(X_test).float()
test_targets = torch.from_numpy(y_test).view(-1, 1).float()

test_outputs = net(test_inputs)
test_loss = loss_fn(test_outputs, test_targets)






Epoch 1/10000, Loss: 6661.51806640625
Training R2 -97385.86867471927
Validation Loss: 5930.50244140625
Epoch 51/10000, Loss: 6464.9560546875
Training R2 -32468.373303058983
Validation Loss: 5748.31494140625
Epoch 101/10000, Loss: 6205.60888671875
Training R2 -9456.983909845907
Validation Loss: 5505.9326171875
Epoch 151/10000, Loss: 5863.6787109375
Training R2 -3206.3600134471394
Validation Loss: 5188.63232421875
Epoch 201/10000, Loss: 5466.802734375
Training R2 -1297.1060944030557
Validation Loss: 4820.12060546875
Epoch 251/10000, Loss: 5035.20654296875
Training R2 -591.0142677716427
Validation Loss: 4420.71484375
Epoch 301/10000, Loss: 4585.31689453125
Training R2 -294.3713164802866
Validation Loss: 4006.251953125
Epoch 351/10000, Loss: 4132.01416015625
Training R2 -156.86421444928447
Validation Loss: 3591.00341796875
Epoch 401/10000, Loss: 3689.296630859375
Training R2 -88.04128299462012
Validation Loss: 3188.382080078125
Epoch 451/10000, Loss: 3267.763916015625
Training R2 -51.29762

In [6]:

print(f"Test Loss: {test_loss.detach().item()}")
print('Test R2', r2_score(test_outputs.detach().numpy(), test_targets.detach().numpy()))
# Save the trained model
torch.save(net.state_dict(), save_dir + 'trained_model.pth')

Test Loss: 166.6793212890625
Test R2 0.9451977300761792


Label data and save in specified directory

In [None]:
val_outputs_labelled = np.column_stack((val_outputs.detach().numpy(),y_val,label_val))
test_outputs_labelled = np.column_stack((test_outputs.detach().numpy(),y_test,label_test))
train_outputs_labelled = np.column_stack((outputs.detach().numpy(),y_train,label_train))

np.savetxt(save_dir+'trainloss.csv',trainloss_vs_epochs,delimiter=',')
np.savetxt(save_dir+'valloss.csv',valloss_vs_epochs,delimiter=',')
np.savetxt(save_dir+'testpred.csv', test_outputs_labelled,delimiter=',')
np.savetxt(save_dir+'trainpred.csv', train_outputs_labelled,delimiter=',')
np.savetxt(save_dir+'valpred.csv', val_outputs_labelled,delimiter=',')