In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, random_split
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
class MLP(nn.Module):
    def __init__(self, hidden_size=1024, num_layers=5, input_shape=310, output_shape=20):
        super().__init__()
        self.input = nn.Linear(input_shape, hidden_size)
        self.layers = nn.ModuleList([
            nn.Linear(hidden_size, hidden_size) for _ in range(num_layers)
        ])
        self.output = nn.Linear(hidden_size, output_shape)

    def forward(self, x):
        x = nn.functional.leaky_relu(self.input(x))
        for layer in self.layers:
            x = nn.functional.leaky_relu(layer(x))
        return self.output(x)

In [3]:
class SimpleTransformer(nn.Module):
    def __init__(self, input_dim=310, num_classes=20, num_heads=5, num_encoder_layers=2, dropout=0.1):
        super().__init__()
        self.transformer = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model=input_dim, nhead=num_heads, dropout=dropout),
            num_layers=num_encoder_layers
        )
        self.fc = nn.Linear(input_dim, num_classes)

    def forward(self, x):
        x = x.unsqueeze(1)
        x = x.transpose(0, 1) 
        x = self.transformer(x)
        x = x.mean(dim=0)
        x = self.fc(x)  
        return x

In [4]:
class CSVDataset(Dataset):
    def __init__(self, file_path):
        self.data = pd.read_csv(file_path)
        self.X = self.data.iloc[:, :-1].values
        self.y = self.data.iloc[:, -1].values

        # Standardize features
        self.scaler = StandardScaler()
        self.X = self.scaler.fit_transform(self.X)
        
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return torch.tensor(self.X[idx], dtype=torch.float32), torch.tensor(self.y[idx], dtype=torch.long)

In [5]:
# Load dataset
dataset = CSVDataset('../data/raw/test.csv')

# Split dataset into training and testing
train_size = int(0.8 * len(dataset))
train_dataset, test_dataset = random_split(dataset, [train_size, len(dataset) - train_size])

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=2**12, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=2**12, shuffle=False)

In [6]:
# Initialize the model
model = MLP().to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.001)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    for inputs, targets in train_loader:
        # Forward pass
        inputs = inputs.to(device)
        targets = targets.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


Epoch [1/10], Loss: 0.6142
Epoch [2/10], Loss: 0.4436
Epoch [3/10], Loss: 0.3357
Epoch [4/10], Loss: 0.3182
Epoch [5/10], Loss: 0.2560
Epoch [6/10], Loss: 0.1997
Epoch [7/10], Loss: 0.1769
Epoch [8/10], Loss: 0.1290
Epoch [9/10], Loss: 0.1207
Epoch [10/10], Loss: 0.1243


In [7]:
# Evaluate the model
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, targets in test_loader:
        inputs = inputs.to(device)
        targets = targets.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += (predicted == targets).sum().item()

accuracy = 100 * correct / total
print(f'Accuracy of the model on the test data: {accuracy:.2f}%')

Accuracy of the model on the test data: 92.38%
