In [5]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import ParameterGrid
from torch.optim.lr_scheduler import StepLR

In [6]:
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda:1


In [7]:
# Load data
data_train = pd.read_csv('train.csv').drop(['ID'], axis=1)
data_test = pd.read_csv('test.csv') #.drop(['ID'], axis=1)

X = data_train.drop(['label'], axis=1)
y = data_train['label']

# Split data
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.4, random_state=42)
X_test = data_test.drop(['ID'], axis=1)

In [8]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

# Convert to PyTorch tensors and move to GPU
X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train = torch.tensor(y_train.values, dtype=torch.long).to(device)
X_val = torch.tensor(X_val, dtype=torch.float32).to(device)
y_val = torch.tensor(y_val.values, dtype=torch.long).to(device)
X_test = torch.tensor(X_test, dtype=torch.float32).to(device)

RuntimeError: CUDA error: invalid device ordinal
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [22]:
class Net(nn.Module):
    def __init__(self, dropout_rate=0.0, activation=nn.ReLU):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(64, 1024)
        self.fc2 = nn.Linear(1024, 256)
        self.fc3 = nn.Linear(256, 100)
        self.dropout = nn.Dropout(dropout_rate)
        self.activation = activation()
    
    def forward(self, x):
        x = self.activation(self.fc1(x))
        x = self.dropout(x)
        x = self.activation(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)
        return torch.softmax(x, dim=1)


In [26]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    model.to(device)  # Move model to GPU
    for epoch in range(num_epochs):
        model.train()
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs, y_train)
        loss.backward()
        optimizer.step()
        scheduler.step()
        
        if epoch % 10 == 0:
            model.eval()
            with torch.no_grad():
                val_outputs = model(X_val)
                val_loss = criterion(val_outputs, y_val)
                val_accuracy = (val_outputs.argmax(dim=1) == y_val).float().mean()
                print(f'Epoch {epoch}/{num_epochs - 1}, Loss: {loss.item()}, Val Loss: {val_loss.item()}, Val Accuracy: {val_accuracy.item()}')
                
# Hyperparameter tuning
param_grid = {
    'lr': [0.07, 0.06, 0.05, 0.04, 0.03, 0.02, 0.01],
    'dropout_rate': [0.1, 0.2, 0.3],
    'activation': [nn.Tanh],
    'num_epochs': [30, 50, 65]
}

best_accuracy = 0.0
best_params = None
best_model = None

In [27]:
for params in ParameterGrid(param_grid):
    model = Net(dropout_rate=params['dropout_rate'], activation=params['activation']).to(device)  # Move model to GPU
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=params['lr'])
    scheduler = StepLR(optimizer, step_size=5, gamma=0.1)
    
    train_model(model, criterion, optimizer, scheduler, num_epochs=params['num_epochs'])
    
    model.eval()
    with torch.no_grad():
        val_outputs = model(X_val)
        val_accuracy = (val_outputs.argmax(dim=1) == y_val).float().mean().item()
    
    if val_accuracy > best_accuracy:
        best_accuracy = val_accuracy
        best_params = params
        best_model = model

print(f"Best Accuracy: {best_accuracy} with params: {best_params}")


Epoch 0/49, Loss: 4.605208396911621, Val Loss: 4.600996971130371, Val Accuracy: 0.021135985851287842
Epoch 10/49, Loss: 4.5524067878723145, Val Loss: 4.55354118347168, Val Accuracy: 0.0685877576470375
Epoch 20/49, Loss: 4.550892353057861, Val Loss: 4.552826881408691, Val Accuracy: 0.06936175376176834
Epoch 30/49, Loss: 4.551560401916504, Val Loss: 4.552826881408691, Val Accuracy: 0.06930221617221832
Epoch 40/49, Loss: 4.551501274108887, Val Loss: 4.552826881408691, Val Accuracy: 0.06930221617221832
Epoch 0/99, Loss: 4.605223178863525, Val Loss: 4.573415756225586, Val Accuracy: 0.047392237931489944
Epoch 10/99, Loss: 4.544630527496338, Val Loss: 4.547597885131836, Val Accuracy: 0.07454156130552292
Epoch 20/99, Loss: 4.543896198272705, Val Loss: 4.5475172996521, Val Accuracy: 0.07460109889507294
Epoch 30/99, Loss: 4.5442609786987305, Val Loss: 4.547520160675049, Val Accuracy: 0.07460109889507294
Epoch 40/99, Loss: 4.544368743896484, Val Loss: 4.547520160675049, Val Accuracy: 0.0746010988

In [28]:
best_model.eval()
with torch.no_grad():
    predictions = best_model(X_test).argmax(dim=1).cpu().numpy()  # Move predictions back to CPU

# Create submission file
submission = pd.DataFrame({'ID': data_test['ID'], 'label': predictions})
submission.to_csv('submission_nn.csv', index=False)

In [16]:
data_test['ID']

0        41988
1        41989
2        41990
3        41991
4        41992
         ...  
17991    59979
17992    59980
17993    59981
17994    59982
17995    59983
Name: ID, Length: 17996, dtype: int64

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim

# Check if GPU is available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(64, 1024)  # input layer (64) -> hidden layer (128)
        self.fc2 = nn.Linear(1024, 1024)  # hidden layer (128) -> hidden layer (256)
        self.fc3 = nn.Linear(1024, 100)  # hidden layer (256) -> output layer (100)

    def forward(self, x):
        x = torch.relu(self.fc1(x))  # activation function for hidden layer
        x = torch.relu(self.fc2(x))
        x = torch.softmax(self.fc3(x), dim=1)  # output layer with softmax
        return x

    def predict(self, x):
        with torch.no_grad():
            x = x.to(device)
            outputs = self.forward(x)
            _, predicted = torch.max(outputs, 1)
        return predicted

In [3]:
model = Net()
model.to(device)  # Move the model to the GPU
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [4]:
import torch
from torch.utils.data import TensorDataset, DataLoader

# Convert data to PyTorch tensors
X_tensor = torch.FloatTensor(X.values)
y_tensor = torch.LongTensor(y.values)

# Create dataset and dataloader
dataset = TensorDataset(X_tensor, y_tensor)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# Training loop
for epoch in range(100):  # loop over the dataset multiple times
    total_loss = 0
    for batch_x, batch_y in dataloader:
        # Move data to GPU if available
        batch_x, batch_y = batch_x.to(device), batch_y.to(device)

        # forward pass
        outputs = model(batch_x)
        loss = criterion(outputs, batch_y)

        # backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f'Epoch {epoch+1}, Average Loss: {total_loss / len(dataloader)}')

Epoch 1, Average Loss: 4.279366338879483


KeyboardInterrupt: 