## Using neural networks to predict on Kryptonite-9 dataset

In [74]:
import numpy as np
import os
import random
import torch
from torch import nn
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [75]:
print(torch.__version__)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Device: {0}'.format(device))

myseed = 6095 

random.seed(myseed)
os.environ['PYTHONHASHSEED'] = str(myseed)
np.random.seed(myseed)
torch.manual_seed(myseed)
torch.cuda.manual_seed(myseed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = True


2.5.1+cu118
Device: cuda


In [76]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_layer_stack = nn.Sequential(
            nn.Linear(9, 5),
            nn.ReLU(),
            nn.Linear(5,5), 
            nn.ReLU(),
            nn.Linear(5,1), 
        ) 
        self._initialize_weights()  # Call the initialization method

    def _initialize_weights(self):
        for layer in self.linear_layer_stack:
            if isinstance(layer, nn.Linear):
                # Xavier Initialization (Glorot)
                torch.nn.init.xavier_uniform_(layer.weight)
                torch.nn.init.zeros_(layer.bias)


    def forward(self, x):
        return self.linear_layer_stack(x)

In [77]:
""" required
- data normalisation
- random seed initialisation
- weight initialisation"""

' required\n- data normalisation\n- random seed initialisation\n- weight initialisation'

In [84]:


print(torch.__version__)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# device = "cpu"
print('Device: {0}'.format(device))


n = 9
X = np.load('Datasets/kryptonite-%s-X.npy'%(n))
y = np.load('Datasets/kryptonite-%s-y.npy'%(n))

# Shuffle and split the data
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.6, random_state=myseed)  # 60% training
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=myseed)  # 20% validation, 20% test

train_size = len(X_train)

model = NeuralNetwork()
model = model.to(device)
params = model.parameters()
# params = params.

optimiser = torch.optim.Adam(params, lr = 0.001)

loss_function = torch.nn.BCEWithLogitsLoss()

num_iter = 8

for epoch in range(num_iter):
    model.train()

    for point in torch.randperm(train_size):


        
        datapoint = X_train[point]
        datapoint = torch.from_numpy(datapoint)
        datapoint = datapoint.to(device)

        # print(datapoint.dtype)
        # print(datapoint.shape)
        # print(datapoint.device)

        label = y_train[point].astype(np.float32)
        label = torch.tensor([label])
        label = label.to(device)

        # print(label.dtype)
        # print(label.shape)
        # print(label.device)

        logit = model(datapoint)

        # print(logit.dtype)
        # print(logit.shape)
        # print(logit.device)
        loss = loss_function(logit, label)
        optimiser.zero_grad()

        if point == train_size-1:
            print(f"Loss at epoch {epoch+1}: {loss}")
        loss.backward()
        optimiser.step()





2.5.1+cu118
Device: cuda


RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [82]:
# accuracy scores:

val_data = torch.from_numpy(X_val)
val_data = val_data.to(device)

val_labels = y_val

model.eval()
with torch.no_grad():
    out = nn.Sigmoid()

    val_logits = model(val_data)
    val_logits = torch.round(out(val_logits.reshape(-1))).type(torch.int32).cpu().numpy()

    print(val_logits)
    print(val_labels)
    # test_logits.requires_grad(False)
    test_accuracy = accuracy_score(val_logits, val_labels)
    print(test_accuracy)

[1 1 1 ... 0 1 1]
[1 0 1 ... 0 0 1]
0.8427777777777777
