## Using neural networks to predict on Kryptonite-9 dataset

In [19]:
import numpy as np
import os
import random
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from itertools import product

In [20]:
print(torch.__version__)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Device: {0}'.format(device))

myseed = 6095 

random.seed(myseed)
os.environ['PYTHONHASHSEED'] = str(myseed)
np.random.seed(myseed)
torch.manual_seed(myseed)
torch.cuda.manual_seed(myseed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = True


2.5.1+cu118
Device: cuda


In [21]:
class NeuralNetwork(nn.Module):
    def __init__(self, input_size=9, hidden_size=5, depth= 2):
        super().__init__()
        # self.linear_layer_stack = nn.Sequential(
        #     nn.Linear(input_size, hidden_size),
        #     nn.ReLU(),
        #     nn.Linear(hidden_size,hidden_size), 
        #     nn.ReLU(),
        #     nn.Linear(hidden_size,1), 
        # )



        modules = [torch.nn.Linear(input_size,hidden_size)]

        for i in range(depth-1):
            modules.append(torch.nn.Linear(hidden_size,hidden_size))
            modules.append(torch.nn.ReLU())

        modules.append(torch.nn.Linear(hidden_size, 1))

        self.linear_layer_stack = torch.nn.Sequential(
            *modules,
        )
 
        self._initialize_weights()  

    def _initialize_weights(self):
        for layer in self.linear_layer_stack:
            if isinstance(layer, nn.Linear):
                torch.nn.init.xavier_uniform_(layer.weight)
                torch.nn.init.zeros_(layer.bias)


    def forward(self, x):
        return self.linear_layer_stack(x)

In [22]:



# train_size = len(X_train)

def train_nn(model, dataloader, criterion, optimizer, epochs=10):
    for epoch in range(epochs):
        model.train()
        epoch_loss = 0
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.unsqueeze(1).to(device)  # Move data to device
            
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            epoch_loss += loss.item()
        
        print(f'Epoch {epoch+1}/{epochs}, Loss: {epoch_loss/len(dataloader):.4f}')



#### Validation

In [23]:
def validate_nn(model, X_val, y_val):
    model.eval()
    with torch.no_grad():
        val_outputs = model(X_val.to(device))
        val_outputs = torch.round(torch.sigmoid(val_outputs)).cpu().numpy()
        accuracy = accuracy_score(y_val, val_outputs)
    return accuracy

#### Grid Search Hyperparameter Tuning

In [24]:
def grid_search(X_train, y_train, X_val, y_val, param_grid, krypto_n):
    best_accuracy = 0
    best_params = None
    best_model = None
    
    for depth, hidden_size, learning_rate, batch_size, epochs in product(*param_grid.values()):
        print(f"Training with depth={depth}, hidden_size={hidden_size}, learning_rate={learning_rate}, batch_size={batch_size}, epochs={epochs}")
        
        model = NeuralNetwork(input_size=krypto_n, hidden_size=hidden_size, depth=depth).to(device)
        criterion = nn.BCEWithLogitsLoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
        
        train_dataset = TensorDataset(X_train, y_train)
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        
        train_nn(model, train_loader, criterion, optimizer, epochs=epochs)
        
        accuracy = validate_nn(model, X_val, y_val)
        print(f"Validation Accuracy: {accuracy:.4f}")
        
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_params = {'depth':depth, 'hidden_size': hidden_size, 'learning_rate': learning_rate, 
                           'batch_size': batch_size, 'epochs': epochs}
            best_model = model

        print(f"Current best accuracy: {best_accuracy:.4f}")

    
    print("Best Parameters:", best_params)
    print("Best Validation Accuracy:", best_accuracy)
    print(f"Krypto variant: {krypto_n}")
    
    
    return best_model, best_params

#### Random Grid Search

In [25]:
def rand_grid_search(X_train, y_train, X_val, y_val, krypto_n):
    best_accuracy = 0
    best_params = None
    best_model = None
    
    combos = 0
    max_combos = 1

    while best_accuracy < 0.95:

        # hidden_sizes = param_grid['hidden_size']
        # learning_rates = param_grid['learning_rate']
        # batch_sizes = param_grid['batch_size']
        # epoch_list = param_grid['epochs']

        hidden_size = np.random.randint(8,100)
        learning_rate = np.random.uniform(0.00001, 0.004)
        batch_size = np.random.randint(32,160)
        epochs = np.random.randint(5, 200)
        depth = np.random.randint(2,8)

        print(f"Training with depth={depth}, hidden_size={hidden_size}, learning_rate={learning_rate}, batch_size={batch_size}, epochs={epochs}")
        
        model = NeuralNetwork(input_size=krypto_n, hidden_size=hidden_size, depth=depth).to(device)
        criterion = nn.BCEWithLogitsLoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
        
        train_dataset = TensorDataset(X_train, y_train)
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        
        train_nn(model, train_loader, criterion, optimizer, epochs=epochs)
        
        accuracy = validate_nn(model, X_val, y_val)
        print(f"Validation Accuracy: {accuracy:.4f}")
        
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_params = {'depth': depth, 'hidden_size': hidden_size, 'learning_rate': learning_rate, 
                           'batch_size': batch_size, 'epochs': epochs}
            best_model = model

        print(f"Current best accuracy: {best_accuracy:.4f}")
        combos += 1


    print("Best Parameters:", best_params)
    print("Best Validation Accuracy:", best_accuracy)
    print(f"Krypto variant: {krypto_n}")
    print(combos)
    return best_model, best_params

In [27]:
print(torch.__version__)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# device = "cpu"
print('Device: {0}'.format(device))


n = 15
scaler = StandardScaler()
X = np.load('Datasets/kryptonite-%s-X.npy'%(n))
y = np.load('Datasets/kryptonite-%s-y.npy'%(n))
X = scaler.fit_transform(X)

# Shuffle and split the data
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.6, random_state=myseed)  # 60% training
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=myseed)  # 20% validation, 20% test

X_train, y_train = torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.float32)
X_val, y_val = torch.tensor(X_val, dtype=torch.float32), torch.tensor(y_val, dtype=torch.float32)



# param_grid = {
#     'depth': [2,4,8,16],
#     'hidden_size': [8, 16, 24, 30], 
#     'learning_rate': [0.001, 0.005, 0.01, 0.05 , 0.1], 
#     'batch_size': [16, 32, 64, 96],     
#     'epochs': [5, 10, 20, 30]              
# }


param_grid = {
    'depth': [2],
    'hidden_size': [84],
    'learning_rate': [0.0009],
    'batch_size': [155],     
    'epochs': [146]               
}


best_model, best_params = grid_search(X_train, y_train, X_val, y_val, param_grid, n)

# best_model, best_params = rand_grid_search(X_train, y_train, X_val, y_val, n)


# Current Hyperparameters for each Kryptonite Variant:

# Best Parameters: {'hidden_size': 20, 'learning_rate': 0.007, 'batch_size': 64, 'epochs': 5}
# Best Validation Accuracy: 0.9538888888888889
# Krypto variant: 9

# Best Parameters: {'hidden_size': 16, 'learning_rate': 0.01, 'batch_size': 64, 'epochs': 20}
# Best Validation Accuracy: 0.9084722222222222 (subject to slight fluctuations due to non-deterministic nature of parallel computation used in training)
# Krypto variant: 12

# Best Parameters: {'depth': 2, 'hidden_size': 54, 'learning_rate': 0.002077090842646019, 'batch_size': 147, 'epochs': 165}
# Best Validation Accuracy: 0.9342222222222222
# Krypto variant: 15

# Best Parameters: {'hidden_size': 8, 'learning_rate': 0.001, 'batch_size': 16, 'epochs': 20}
# Best Validation Accuracy: 0.5137962962962963
# Krypto variant: 18

# Training with depth=2, hidden_size=84, learning_rate=0.0008952525037794653, batch_size=155, epochs=146
# Validation Accuracy: 0.9427
# Krypto variant: 15



2.5.1+cu118
Device: cuda
Training with depth=2, hidden_size=84, learning_rate=0.0009, batch_size=155, epochs=146
Epoch 1/146, Loss: 0.7013
Epoch 2/146, Loss: 0.6927
Epoch 3/146, Loss: 0.6901
Epoch 4/146, Loss: 0.6890
Epoch 5/146, Loss: 0.6868
Epoch 6/146, Loss: 0.6868
Epoch 7/146, Loss: 0.6855
Epoch 8/146, Loss: 0.6828
Epoch 9/146, Loss: 0.6825
Epoch 10/146, Loss: 0.6804
Epoch 11/146, Loss: 0.6778
Epoch 12/146, Loss: 0.6752
Epoch 13/146, Loss: 0.6726
Epoch 14/146, Loss: 0.6701
Epoch 15/146, Loss: 0.6658
Epoch 16/146, Loss: 0.6612
Epoch 17/146, Loss: 0.6536
Epoch 18/146, Loss: 0.6487
Epoch 19/146, Loss: 0.6412
Epoch 20/146, Loss: 0.6351
Epoch 21/146, Loss: 0.6261
Epoch 22/146, Loss: 0.6161
Epoch 23/146, Loss: 0.6026
Epoch 24/146, Loss: 0.5913
Epoch 25/146, Loss: 0.5783
Epoch 26/146, Loss: 0.5620
Epoch 27/146, Loss: 0.5492
Epoch 28/146, Loss: 0.5351
Epoch 29/146, Loss: 0.5207
Epoch 30/146, Loss: 0.5065
Epoch 31/146, Loss: 0.4897
Epoch 32/146, Loss: 0.4718
Epoch 33/146, Loss: 0.4546
Epoch