In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
import pandas as pd
import numpy as np

In [2]:
# parameters
test_split = 0.2
random_state = 1
batch_size = 128
learning_rate = 0.01
num_epochs = 999

In [3]:
# Define the neural network
class CustomModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CustomModel, self).__init__()
        self.hidden_layers = nn.ModuleList([
            nn.Linear(input_size, hidden_size),
        ])
        self.output_layer = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        for layer in self.hidden_layers:
            x = torch.relu(layer(x))
        x = self.output_layer(x)
        return x

In [4]:
# create data path function
csv_path = lambda r: f'../data/raw/R{r}_train.csv'  

In [5]:
# load labels
labels = pd.read_csv('../data/raw/labels_train.csv')
y = labels.values

# Split lables into test and train
y_train, y_test = train_test_split(y, test_size=test_split, random_state=random_state)

# Convert data to PyTorch tensors
y_train_tensor = torch.FloatTensor(y_train)
y_test_tensor = torch.FloatTensor(y_test)

In [6]:
# metrics init
losses = []
f1_micro = []

In [7]:
for r in range(1,6+1):
    data = pd.read_csv(csv_path(r))

    # Assuming your CSV has a column 'target' for the 19 binary labels and other columns as features
    X = data.values

    # Split the data into training and testing sets
    X_train, X_test = train_test_split(X, test_size=test_split, random_state=random_state)

    # Convert data to PyTorch tensors
    X_train_tensor = torch.FloatTensor(X_train)

    X_test_tensor = torch.FloatTensor(X_test)

    # Create DataLoader for training and testing sets
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    # Set the input, hidden, and output sizes
    input_size = X_train.shape[1]
    hidden_size = input_size * 10
    output_size = y_train.shape[1]

    # Create an instance of the model
    model = CustomModel(input_size, hidden_size, output_size)

    # Define the loss function
    criterion = nn.BCEWithLogitsLoss()

    # Define the optimizer
    optimizer = optim.SGD(model.parameters(), lr=learning_rate)

    f1_micro.append([])
    losses.append([])

    # Training loop
    for epoch in range(num_epochs):
        model.train()
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

        # Evaluate the model on the test set
        model.eval()
        with torch.no_grad():
            test_loss = 0.0
            all_predictions = []
            all_targets = []
            for inputs, labels in test_loader:
                outputs = model(inputs)
                test_loss += criterion(outputs, labels).item()
                predictions = torch.sigmoid(outputs)
                all_predictions.append(predictions.numpy())
                all_targets.append(labels.numpy())

            average_test_loss = test_loss / len(test_loader)

            # Calculate micro F1 score
            all_predictions = np.concatenate(all_predictions)
            all_targets = np.concatenate(all_targets)
            micro_f1 = f1_score(all_targets, (all_predictions > 0.5).astype(int), average='micro')

            # append metrics
            f1_micro[r-1].append(micro_f1)
            losses[r-1].append(average_test_loss)

            print(f'R: {r}, Epoch {epoch + 1}/{num_epochs}, Test Loss: {average_test_loss:.4f} Micro F1 Score: {micro_f1:.4f}')
        
 

R: 1, Epoch 1/999, Test Loss: 0.3830 Micro F1 Score: 0.0010
R: 1, Epoch 2/999, Test Loss: 0.3426 Micro F1 Score: 0.0977
R: 1, Epoch 3/999, Test Loss: 0.3232 Micro F1 Score: 0.2318
R: 1, Epoch 4/999, Test Loss: 0.3098 Micro F1 Score: 0.3191
R: 1, Epoch 5/999, Test Loss: 0.2995 Micro F1 Score: 0.3628
R: 1, Epoch 6/999, Test Loss: 0.2911 Micro F1 Score: 0.3967
R: 1, Epoch 7/999, Test Loss: 0.2842 Micro F1 Score: 0.4323
R: 1, Epoch 8/999, Test Loss: 0.2784 Micro F1 Score: 0.4561
R: 1, Epoch 9/999, Test Loss: 0.2734 Micro F1 Score: 0.4798
R: 1, Epoch 10/999, Test Loss: 0.2692 Micro F1 Score: 0.5000
R: 1, Epoch 11/999, Test Loss: 0.2655 Micro F1 Score: 0.5163
R: 1, Epoch 12/999, Test Loss: 0.2622 Micro F1 Score: 0.5297
R: 1, Epoch 13/999, Test Loss: 0.2593 Micro F1 Score: 0.5395
R: 1, Epoch 14/999, Test Loss: 0.2568 Micro F1 Score: 0.5504
R: 1, Epoch 15/999, Test Loss: 0.2545 Micro F1 Score: 0.5576
R: 1, Epoch 16/999, Test Loss: 0.2524 Micro F1 Score: 0.5644
R: 1, Epoch 17/999, Test Loss: 0.