In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
import pandas as pd
import numpy as np

# Load data from CSV
csv_path = '../data/raw/R6_train.csv'  # Update with your CSV file path
data = pd.read_csv(csv_path)
labels = pd.read_csv('../data/raw/labels_train.csv')

# Assuming your CSV has a column 'target' for the 19 binary labels and other columns as features
X = data.values
y = labels.values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert data to PyTorch tensors
X_train_tensor = torch.FloatTensor(X_train)
y_train_tensor = torch.FloatTensor(y_train)

X_test_tensor = torch.FloatTensor(X_test)
y_test_tensor = torch.FloatTensor(y_test)

# Create DataLoader for training and testing sets
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

batch_size = 64
learning_rate = 0.01
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Define the neural network
class CustomModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CustomModel, self).__init__()
        self.hidden_layers = nn.ModuleList([
            nn.Linear(input_size, hidden_size),
        ])
        self.output_layer = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        for layer in self.hidden_layers:
            x = torch.relu(layer(x))
        x = self.output_layer(x)
        return x

# Set the input, hidden, and output sizes
input_size = X_train.shape[1]
hidden_size = input_size * 10
output_size = y_train.shape[1]

# Create an instance of the model
model = CustomModel(input_size, hidden_size, output_size)

# Define the loss function
criterion = nn.BCEWithLogitsLoss()

# Define the optimizer
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

# Training loop
num_epochs = 100

for epoch in range(num_epochs):
    model.train()
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    # Evaluate the model on the test set
    model.eval()
    with torch.no_grad():
        test_loss = 0.0
        all_predictions = []
        all_targets = []
        for inputs, labels in test_loader:
            outputs = model(inputs)
            test_loss += criterion(outputs, labels).item()
            predictions = torch.sigmoid(outputs)
            all_predictions.append(predictions.numpy())
            all_targets.append(labels.numpy())

        average_test_loss = test_loss / len(test_loader)

        # Calculate micro F1 score
        all_predictions = np.concatenate(all_predictions)
        all_targets = np.concatenate(all_targets)
        micro_f1 = f1_score(all_targets, (all_predictions > 0.5).astype(int), average='micro')
        print(f'Epoch {epoch + 1}/{num_epochs}, Test Loss: {average_test_loss:.4f} Micro F1 Score: {micro_f1:.4f}')
 

Epoch 1/100, Test Loss: 0.3022 Micro F1 Score: 0.3724
Epoch 2/100, Test Loss: 0.2874 Micro F1 Score: 0.4252
Epoch 3/100, Test Loss: 0.2795 Micro F1 Score: 0.4393
Epoch 4/100, Test Loss: 0.2746 Micro F1 Score: 0.4583
Epoch 5/100, Test Loss: 0.2703 Micro F1 Score: 0.4956
Epoch 6/100, Test Loss: 0.2664 Micro F1 Score: 0.4948
Epoch 7/100, Test Loss: 0.2635 Micro F1 Score: 0.5007
Epoch 8/100, Test Loss: 0.2611 Micro F1 Score: 0.5098
Epoch 9/100, Test Loss: 0.2588 Micro F1 Score: 0.5037
Epoch 10/100, Test Loss: 0.2566 Micro F1 Score: 0.5304
Epoch 11/100, Test Loss: 0.2548 Micro F1 Score: 0.5365
Epoch 12/100, Test Loss: 0.2530 Micro F1 Score: 0.5354
Epoch 13/100, Test Loss: 0.2514 Micro F1 Score: 0.5436
Epoch 14/100, Test Loss: 0.2500 Micro F1 Score: 0.5541
Epoch 15/100, Test Loss: 0.2484 Micro F1 Score: 0.5623
Epoch 16/100, Test Loss: 0.2475 Micro F1 Score: 0.5605
Epoch 17/100, Test Loss: 0.2463 Micro F1 Score: 0.5539
Epoch 18/100, Test Loss: 0.2449 Micro F1 Score: 0.5731
Epoch 19/100, Test 