In [9]:
import pandas as pd
import torch
from torch.utils.data import DataLoader, Dataset
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch.nn.functional as F
import torch.nn as nn
import numpy as np

# Assuming the training data is named 'train.csv' and the testing data is named 'test.csv'
train_data_path = '/kaggle/input/new-dataset/train.csv'
test_data_path = '/kaggle/input/new-dataset/test.csv'

# Load the data
train_data = pd.read_csv(train_data_path, sep=' ', header=None)
test_data = pd.read_csv(test_data_path, sep=' ', header=None)

# Split train data into features and labels
X = train_data.iloc[:, 1:].values
y = train_data.iloc[:, 0].values - 1  # Assuming labels are 1-12, we shift to 0-11 for PyTorch

# Normalize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Define a PyTorch dataset
class CustomDataset(Dataset):
    def __init__(self, features, labels=None, mode='train'):
        self.features = features
        self.labels = labels
        self.mode = mode
    
    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, idx):
        if self.mode == 'train' or self.mode == 'val':
            return torch.tensor(self.features[idx], dtype=torch.float), torch.tensor(self.labels[idx], dtype=torch.long)
        else:
            return torch.tensor(self.features[idx], dtype=torch.float)

# Create datasets and dataloaders
train_dataset = CustomDataset(X_train, y_train, mode='train')
val_dataset = CustomDataset(X_val, y_val, mode='val')
test_dataset = CustomDataset(scaler.transform(test_data.values), mode='test')

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
class MultiLayerNNModified(nn.Module):
    def __init__(self, input_size, output_size):
        super(MultiLayerNNModified, self).__init__()
        self.fc1 = nn.Linear(input_size, 512)
        self.bn1 = nn.BatchNorm1d(512)
        self.dropout1 = nn.Dropout(0.1)
        self.fc2 = nn.Linear(512, 256)
        self.bn2 = nn.BatchNorm1d(256)
        self.dropout2 = nn.Dropout(0.1)
        self.fc3 = nn.Linear(256, 128)
        self.bn3 = nn.BatchNorm1d(128)
        self.dropout3 = nn.Dropout(0.1)
        self.fc4 = nn.Linear(128, output_size)

    def forward(self, x):
        x = F.leaky_relu(self.bn1(self.fc1(x)), negative_slope=0.01)
        x = self.dropout1(x)
        x = F.leaky_relu(self.bn2(self.fc2(x)), negative_slope=0.01)
        x = self.dropout2(x)
        x = F.leaky_relu(self.bn3(self.fc3(x)), negative_slope=0.01)
        x = self.dropout3(x)
        x = self.fc4(x)
        return x


# Initialize the model, loss criterion, and optimizer
model = MultiLayerNNModified(input_size=400, output_size=12)  # 400 features and 12 possible labels
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=5, factor=0.5, min_lr=1e-6, verbose=True)

# Function to train the model
def train_model(model, train_loader, val_loader, criterion, optimizer, epochs=100):
    for epoch in range(epochs):
        model.train()  # Set model to training mode
        train_loss = 0
        for inputs, labels in train_loader:
            optimizer.zero_grad()          # Reset gradients
            outputs = model(inputs)        # Forward pass
            loss = criterion(outputs, labels)
            loss.backward()                # Backward pass
            optimizer.step()               # Update weights
            train_loss += loss.item() * inputs.size(0)
        train_loss = train_loss / len(train_loader.dataset)

        # Validation phase
        model.eval()   # Set model to evaluation mode
        val_loss = 0
        correct = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * inputs.size(0)
                _, preds = torch.max(outputs, 1)
                correct += torch.sum(preds == labels.data)
        val_loss = val_loss / len(val_loader.dataset)
        val_acc = correct.double() / len(val_loader.dataset)

        print(f'Epoch {epoch+1}/{epochs}.. '
              f'Train loss: {train_loss:.4f}.. '
              f'Validation loss: {val_loss:.4f}.. '
              f'Validation accuracy: {val_acc:.4f}')
        
        scheduler.step(val_loss)        

# Train the model
train_model(model, train_loader, val_loader, criterion, optimizer, epochs=100)

Epoch 1/100.. Train loss: 1.6645.. Validation loss: 1.3921.. Validation accuracy: 0.3822
Epoch 2/100.. Train loss: 1.3884.. Validation loss: 1.1368.. Validation accuracy: 0.5069
Epoch 3/100.. Train loss: 1.2163.. Validation loss: 1.0117.. Validation accuracy: 0.5790
Epoch 4/100.. Train loss: 1.1107.. Validation loss: 0.9294.. Validation accuracy: 0.6315
Epoch 5/100.. Train loss: 1.0293.. Validation loss: 0.8257.. Validation accuracy: 0.6321
Epoch 6/100.. Train loss: 0.9627.. Validation loss: 0.7725.. Validation accuracy: 0.6718
Epoch 7/100.. Train loss: 0.9123.. Validation loss: 0.7047.. Validation accuracy: 0.6947
Epoch 8/100.. Train loss: 0.8708.. Validation loss: 0.6802.. Validation accuracy: 0.7125
Epoch 9/100.. Train loss: 0.8403.. Validation loss: 0.6274.. Validation accuracy: 0.7340
Epoch 10/100.. Train loss: 0.8080.. Validation loss: 0.6017.. Validation accuracy: 0.7537
Epoch 11/100.. Train loss: 0.7685.. Validation loss: 0.5633.. Validation accuracy: 0.7594
Epoch 12/100.. Trai

In [None]:
def generate_predictions(model, test_loader):
    model.eval()  # Set model to evaluation mode
    predictions = []
    with torch.no_grad():
        for inputs in test_loader:
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            predictions.extend(preds.cpu().numpy() + 1)  # Adjust predictions to match original label range (1-12)
    return predictions

# Generate predictions
predictions = generate_predictions(model, test_loader)

# Prepare the submission file
submission = pd.DataFrame({'ID': np.arange(0, len(predictions)), 'Target': predictions})
submission.to_csv('/kaggle/working/submission3.csv', index=False)

print("Submission file has been created.")