# Imports

In [109]:
import os
import math
import torch
import torch.nn as nn
import numpy as np
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import time
from sklearn.preprocessing import LabelEncoder
import torch
import torch.nn as nn
import math

# Dataset

In [110]:
# Custom Dataset
from sklearn.discriminant_analysis import StandardScaler


class CustomDataset(Dataset):
    def __init__(self, csv_path="data/dataset_train_2024.csv"):
        # Load data from CSV
        data = pd.read_csv(csv_path)
        
        # Extract features
        self.sequences_1 = data.iloc[:, 1:129].values * 100  # Columns 1-128 (1-based indexing)
        self.sequences_2 = data.iloc[:, 129:257].values * 100  # Columns 129-256
        self.extra_feature = data.iloc[:, 257].values.reshape(-1, 1)  # Column 257

        # Combine features
        all_features = np.hstack([self.sequences_1, self.sequences_2, self.extra_feature])
        
        # Normalize features
        self.scaler = StandardScaler()
        self.normalized_features = self.scaler.fit_transform(all_features)
        self.features = torch.tensor(self.normalized_features, dtype=torch.float32)


        # Encode labels
        self.label_encoder = LabelEncoder()
        self.labels = torch.tensor(self.label_encoder.fit_transform(data.iloc[:, -1]), dtype=torch.long)

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]
    
    def inverseTransform(self, array):
        return self.label_encoder.inverse_transform(array)

# Classifier

In [111]:
class PositionalEncoding(nn.Module):

    def __init__(self, d_model: int, dropout: float = 0.1, seq_len: int = 128):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        position = torch.arange(seq_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        pe = torch.zeros(seq_len, 1, d_model)
        pe[:, 0, 0::2] = torch.sin(position * div_term)
        pe[:, 0, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x):
        """
        Arguments:
            x: Tensor, shape ``[seq_len, batch_size, embedding_dim]``
        """
        x = x + self.pe[:x.size(0)]
        return self.dropout(x)

def reformat_tensor(tensor):
  """
  Reformats a tensor from shape [32, 257] to [32, 128, 3] for transformer encoder input.
  """
  batch_size = tensor.shape[0]
  # Extract sequences
  seq1 = tensor[:, :128]
  seq2 = tensor[:, 128:256]
  # Extract noise and expand it to the correct shape
  noise = tensor[:, -1].unsqueeze(1).expand(batch_size, 128)
  # Stack the measures and noise along the last dimension
  return torch.stack([seq1, seq2, noise], dim=2)

# Transformer Encoder
class TransformerClassifier(nn.Module):
    def __init__(self, seq_len, input_dim, d_model, nhead, dim_feedforward, num_layers, num_classes):
        super(TransformerClassifier, self).__init__()
        # Project input to d_model
        self.input_fc = nn.Linear(input_dim, d_model)
        # Positional encoding
        self.pos_encoder = PositionalEncoding(d_model)
        # Transformer encoder layers
        encoder_layers = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=dim_feedforward,
            dropout=0.1
        )
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers=num_layers)
        # Classification
        self.fc = nn.Linear(d_model, num_classes)

        mlp_hidden_dim = 64
        self.mlp = nn.Sequential(
            nn.Linear(d_model, mlp_hidden_dim),  # Input layer
            nn.ReLU(),  # Activation
            # nn.Dropout(0.1),  # Optional dropout
            nn.Linear(mlp_hidden_dim, num_classes)  # Output layer
        )

    def forward(self, x):
        # Input shape: [batch_size, seq_len*2 + 1] -> [batch_size, 256 + 1] (for seq_len = 128)
        # Step 1: Reshape the input to the format [batch_size, seq_len, 3]
        x = reformat_tensor(x)

        # Step 1.1: Permute [seq_len, batch_size, 3]
        x = x.permute(1, 0, 2)
        # print(x.shape)
        # Step 2: Project input to d_model
        x = self.input_fc(x)  # [seq_len, batch_size, d_model]
        # print(x.shape)
        # Step 3: Add positional encoding
        x = self.pos_encoder(x)  # [seq_len, batch_size, d_model]
        # print(x.shape)
        # Step 4: Pass through the Transformer encoder
        x = self.transformer_encoder(x)  # [seq_len, batch_size, d_model]
        # print(x.shape)
        # Step 5: Pool over the sequence dimension (Global Average Pooling)
        x = x.mean(dim=0)  # [batch_size, d_model]
        # print(x.shape)
        # x = x.view(x.size(0), -1)
        # # Step 6: Final classification layer
        x = self.mlp(x)  # [batch_size, num_classes]
        return x


# Initialization

In [112]:
# Parameters

batch_size = 32
epochs = 50

learning_rate = 0.001
momentum = 0.001
weight_decay=0.001

seq_len = 128
input_dim = 3

num_layers = 1
nhead = 4
num_classes = 5
d_model = 128
dim_feedforward = 4 * d_model
# dim_feedforward = 2048

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load dataset
dataset = CustomDataset()
train_size = int(0.8 * len(dataset))  # 80% for training
test_size = len(dataset) - train_size  # 20% for testing
train_data, test_data = random_split(dataset, [train_size, test_size])

# train_data = torch.utils.data.Subset(dataset, range(train_size))

# Created using indices from train_size to train_size + test_size.
# test_data = torch.utils.data.Subset(dataset, range(train_size, train_size + test_size))

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data, batch_size=batch_size)

# Model
model = TransformerClassifier(
    seq_len=seq_len,
    input_dim=input_dim,
    d_model=d_model,
    nhead=nhead,
    num_layers=num_layers,
    dim_feedforward=dim_feedforward,
    num_classes=num_classes,
).to(device)

# model = TransformerClassifier(
#     model_dim=d_model,
#     num_heads=nhead,
#     num_layers=num_layers,
#     num_classes=num_classes,
# ).to(device)


# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)


Using device: cuda




# Training

In [113]:
# Training Loop
print("Training the model...")
for epoch in range(epochs):
    model.train()
    total_loss = 0
    start_time = time.time()

    for features, labels in train_loader:

        
        features, labels = features.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(features)
        # print(f"outputs.shape: {outputs.shape}")
        # print(f"labels.shape: {labels.shape}")
        # print(f"outputs.dtype: {outputs.dtype}")
        # print(f"labels.dtype: {labels.dtype}")

        # print("features")
        # print(features)
        # print("labels")
        # print(labels)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    end_time = time.time()
    epoch_time = end_time - start_time
    print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(train_loader):.4f}, Time: {epoch_time:.2f} seconds")


Training the model...
Epoch 1/50, Loss: 1.0127, Time: 2.44 seconds
Epoch 2/50, Loss: 0.5030, Time: 2.05 seconds
Epoch 3/50, Loss: 0.3602, Time: 2.01 seconds
Epoch 4/50, Loss: 0.2958, Time: 1.93 seconds
Epoch 5/50, Loss: 0.2696, Time: 2.15 seconds
Epoch 6/50, Loss: 0.2347, Time: 1.88 seconds
Epoch 7/50, Loss: 0.2205, Time: 1.96 seconds
Epoch 8/50, Loss: 0.2082, Time: 2.22 seconds
Epoch 9/50, Loss: 0.1907, Time: 2.31 seconds
Epoch 10/50, Loss: 0.1681, Time: 1.92 seconds
Epoch 11/50, Loss: 0.1699, Time: 2.10 seconds
Epoch 12/50, Loss: 0.1649, Time: 1.94 seconds
Epoch 13/50, Loss: 0.1437, Time: 2.25 seconds
Epoch 14/50, Loss: 0.1405, Time: 2.13 seconds
Epoch 15/50, Loss: 0.1315, Time: 2.10 seconds
Epoch 16/50, Loss: 0.1379, Time: 2.25 seconds
Epoch 17/50, Loss: 0.1251, Time: 2.29 seconds
Epoch 18/50, Loss: 0.1136, Time: 2.75 seconds
Epoch 19/50, Loss: 0.1259, Time: 2.13 seconds
Epoch 20/50, Loss: 0.1171, Time: 2.11 seconds
Epoch 21/50, Loss: 0.1043, Time: 2.14 seconds
Epoch 22/50, Loss: 0.

# Testing

In [114]:
# Testing Loop
print("Testing the model...")
model.eval()
all_preds, all_labels = [], []
with torch.no_grad():
    for features, labels in test_loader:
        features, labels = features.to(device), labels.to(device)
        outputs = model(features)
        _, preds = torch.max(outputs, dim=1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

accuracy = accuracy_score(all_labels, all_preds)
from sklearn.metrics import f1_score
f1 = f1_score(all_labels, all_preds, average='weighted')  # or 'macro', 'micro', depending on your use case
print(f"F1 Score: {f1:.4f}")
print(f"Accuracy: {accuracy:.4f}")


Testing the model...
F1 Score: 0.9499
Accuracy: 0.9504


# Prepare for Kaggle


In [115]:
#Using the model for prediction with the evaluation dataset

import torch
import pandas as pd
from torch.utils.data import Dataset, DataLoader

# Define the dataset class
class UnlabeledDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.data = dataframe

        self.scaler = StandardScaler()
        normalized_values = self.scaler.fit_transform(self.data.values)
        self.normalized_data = pd.DataFrame(
            normalized_values, columns=self.data.columns, index=self.data.index
        )
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        inputs = self.normalized_data.iloc[idx].values.astype('float32')  # Retrieve row as NumPy array
        if self.transform:
            inputs = self.transform(inputs)
        return inputs

# Load the unlabeled dataset
csv_path = "data/dataset_test_no_label_2024.csv"  # Path to the dataset CSV file
unlabeled_df = pd.read_csv(csv_path)  # Update the filename
unlabeled_df = unlabeled_df.drop(unlabeled_df.columns[0], axis=1)
unlabeled_dataset = UnlabeledDataset(unlabeled_df)
unlabeled_dataloader = DataLoader(unlabeled_dataset, batch_size=64, shuffle=False)

# Set the model to evaluation mode
model.eval()

# Store predictions and indices
predictions = []
indices = []

# Generate predictions
with torch.no_grad():
    for idx, inputs in enumerate(unlabeled_dataloader):
        inputs = inputs.to(device)  # Send inputs to the same device as the model
        
        # Forward pass
        outputs = model(inputs)
        preds = torch.argmax(outputs, dim=1)  # Get predicted class
        
        # Save predictions and indices
        start_idx = idx * unlabeled_dataloader.batch_size
        batch_indices = list(range(start_idx, start_idx + len(inputs)))  # Adjusting the index properly
        indices.extend(batch_indices)
        predictions.extend(preds.cpu().numpy())

# Create a DataFrame with indices and predictions
output_df = pd.DataFrame({"ID": indices, "MODULATION": dataset.inverseTransform(predictions)})

# Save to a CSV file
output_df.to_csv("predictions_with_indices.csv", index=False)

print("Predictions saved to 'predictions_with_indices.csv'")

Predictions saved to 'predictions_with_indices.csv'
