In [1]:
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.utils.data import DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from itertools import product
from sklearn.model_selection import KFold

In [2]:
class ThreeLayerNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(ThreeLayerNN, self).__init__()

        self.layer1=nn.Linear(input_size, hidden_size)
        self.relu1=nn.ReLU()

        self.layer2=nn.Linear(hidden_size, hidden_size)
        self.relu2=nn.ReLU()

        self.layer3=nn.Linear(hidden_size, output_size)
        self.sigmoid=nn.Sigmoid()

    def forward(self,x):
        x=self.layer1(x)
        x=self.relu1(x)
        x=self.layer2(x)
        x=self.relu2(x)
        x=self.layer3(x)
        x=self.sigmoid(x) 

        return x 

In [3]:
class MyDataset:
    def __init__(self,data,labels):
        self.data=data
        self.labels=labels 

    def __len__(self):
        return len(self.data) 

    def __getitem__(self,index):
        return self.data [index], self.labels[index]
    

In [55]:
path=r'/home/ludovico/ML-project/data/monks-1'
monk1_train = pd.read_csv(path+'.train', header=None, delim_whitespace=True, dtype=str)
monk1_test = pd.read_csv(path+'.test', header=None, delim_whitespace=True, dtype=str)

train_labels=monk1_train[0].astype(np.float32)
train_data=monk1_train[monk1_train.columns[1:-1]].astype(np.float32)

val_labels=monk1_test[0].astype(np.float32)
val_data=monk1_test[monk1_train.columns[1:-1]].astype(np.float32)

#train_data=pd.get_dummies(train_data).astype(np.float32)
#val_data=pd.get_dummies(val_data).astype(np.float32)

train_data=torch.tensor(train_data.values)
val_data=torch.tensor(val_data.values)
train_labels=torch.tensor(train_labels.values)
val_labels=torch.tensor(val_labels.values)



In [None]:
hidden_size=[2,3,4]
learning_rate=[0.1,1]
num_epochs=1000


for hidden_size,learning_rate in product(hidden_size,learning_rate):
    net= ThreeLayerNN(17,hidden_size,1)

    criterion=nn.MSELoss()
    optimizer= torch.optim.SGD(net.parameters(), lr=learning_rate)

    for epoch in range(num_epochs):
        outputs = net(train_data).squeeze()
        loss = criterion(outputs, train_labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    with torch.no_grad():
        val_outputs = net(val_data).squeeze()
        val_loss = criterion (val_outputs, val_labels)

    y_pred=val_outputs>=0.5
    test_acc = torch.sum(y_pred == val_labels)
    test_acc = test_acc/len(val_labels)

    print(hidden_size,learning_rate)
    print(f'Train loss: {loss:.4f}, Val loss: {val_loss:.4f}, test Acc: {test_acc:.4f}')

    

In [None]:
num_folds = 5
kf = KFold(n_splits=num_folds, shuffle=True)

# Define the dataset and dataloader
dataset = MyDataset(x_train, y_train)
dataloader = DataLoader(dataset, shuffle=True) 

param_grid = {
    'dropout_rate': [0],
    'num_hidden_layers': [0],
    'h_units': [8],
    'activation': [nn.ReLU],
    'lr': [0.1, 0.2, 0.3],
    'momentum': [0.1, 0.2, 0.3], 
    'weight_decay': [0], 
}

# Generate all possible combinations of hyperparameters
param_combinations = product(*param_grid.values())

# Map the train_and_evaluate function to the parameter combinations
res = [train_and_evaluate(params, x_train, y_train, dataset, param_grid) for params in param_combinations]

# Process the results
mean_val_acc_config = []
configs = []

for param_dict, mean_val_acc in res:
    mean_val_acc_config.append(mean_val_acc)
    configs.append(param_dict)

print(f'\nThe best configuration is {configs[np.argmax(mean_val_acc_config)]}' )