In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.utils.data import DataLoader, TensorDataset
from tabular_model import ArceneClassifier 
from sklearn.preprocessing import StandardScaler

In [8]:
# loading function because labels and set are seperated. also change labels to 0 and 1
def load_data(file_data, file_labels=None):
    data = np.loadtxt(file_data)
    if file_labels:
        labels = np.loadtxt(file_labels)
        labels = (labels + 1) / 2  
        return data, labels
    return data  

In [9]:
X_train, y_train = load_data("data/arcene/arcene_train.data", "data/arcene/arcene_train.labels")
X_valid, y_valid = load_data("data/arcene/arcene_valid.data", "data/arcene/arcene_valid.labels")
X_test = load_data("data/arcene/arcene_test.data")  

# Normalize values
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)  
X_valid = scaler.transform(X_valid)  
X_test = scaler.transform(X_test)  

X_train, y_train = torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.float32)
X_valid, y_valid = torch.tensor(X_valid, dtype=torch.float32), torch.tensor(y_valid, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)

In [10]:
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

valid_dataset = TensorDataset(X_valid, y_valid)
valid_loader = DataLoader(valid_dataset, batch_size=16, shuffle=False)

In [11]:
# Model 
input_dim = X_train.shape[1]  # Number of features
model = ArceneClassifier(input_dim)

criterion = nn.BCELoss()  
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [12]:
num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_X).squeeze()
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()


    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss:.4f}")


Epoch 1/50, Loss: 5.3465
Epoch 2/50, Loss: 3.4849
Epoch 3/50, Loss: 2.5115
Epoch 4/50, Loss: 0.8428
Epoch 5/50, Loss: 0.1644
Epoch 6/50, Loss: 0.0491
Epoch 7/50, Loss: 0.0101
Epoch 8/50, Loss: 0.0056
Epoch 9/50, Loss: 0.0021
Epoch 10/50, Loss: 0.0012
Epoch 11/50, Loss: 0.0006
Epoch 12/50, Loss: 0.0006
Epoch 13/50, Loss: 0.0004
Epoch 14/50, Loss: 0.0003
Epoch 15/50, Loss: 0.0003
Epoch 16/50, Loss: 0.0002
Epoch 17/50, Loss: 0.0002
Epoch 18/50, Loss: 0.0002
Epoch 19/50, Loss: 0.0001
Epoch 20/50, Loss: 0.0001
Epoch 21/50, Loss: 0.0001
Epoch 22/50, Loss: 0.0001
Epoch 23/50, Loss: 0.0001
Epoch 24/50, Loss: 0.0001
Epoch 25/50, Loss: 0.0001
Epoch 26/50, Loss: 0.0001
Epoch 27/50, Loss: 0.0001
Epoch 28/50, Loss: 0.0001
Epoch 29/50, Loss: 0.0001
Epoch 30/50, Loss: 0.0001
Epoch 31/50, Loss: 0.0000
Epoch 32/50, Loss: 0.0001
Epoch 33/50, Loss: 0.0000
Epoch 34/50, Loss: 0.0000
Epoch 35/50, Loss: 0.0000
Epoch 36/50, Loss: 0.0000
Epoch 37/50, Loss: 0.0000
Epoch 38/50, Loss: 0.0000
Epoch 39/50, Loss: 0.

In [16]:
# Validation step
model.eval()
correct, total = 0, 0
with torch.no_grad():
    for batch_X, batch_y in valid_loader:
        outputs = model(batch_X).squeeze()
        predicted = (outputs > 0.5).float()  # Convert probabilities to binary labels
        correct += (predicted == batch_y).sum().item()
        total += batch_y.size(0)

accuracy = correct / total

In [17]:
print(accuracy)

0.77


In [None]:
# conclusion: useless dataset, model doesnt even take enough time for dimensionality reduction
# if want to improve accuracy maybe worth a second try