In [145]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
from torch.utils.data import TensorDataset, DataLoader
import os
import torch.nn as nn
import matplotlib.pyplot as plt

In [146]:
batch_size = 64
LR = 1e-3
epochs = 200
hidden_size = 100


# === Step 1: Load the dataset ===
train_path = "TRAIN.parquet"
df = pd.read_parquet(train_path)

# === Step 2: Explore the dataset ===
print("Shape:", df.shape)
# print("\nColumns:", df.columns.tolist())
print("\nFirst few rows:\n", df.head(2))
print("\nInfo:")
print(df.info())


Shape: (1428, 26001)

First few rows:
    label        1x1        1x2        1x3        1x4        1x5        1x6  \
0    0.0 -83.136936 -83.126828 -83.224906 -83.365157 -83.152547 -83.041702   
1    0.0 -82.332726 -82.306786 -82.309695 -82.244179 -82.442501 -82.280497   

         1x7        1x8        1x9  ...     500x43     500x44     500x45  \
0 -83.074658 -82.703755 -82.391378  ... -81.203511 -81.079094 -80.975485   
1 -82.319521 -82.142830 -81.959694  ... -79.930847 -79.863267 -79.863267   

      500x46     500x47     500x48     500x49     500x50     500x51     500x52  
0 -81.124954 -81.368314 -81.602038 -81.507725 -81.609008 -81.796603 -81.903254  
1 -79.613390 -80.108240 -80.384186 -80.500896 -80.451736 -80.620828 -80.746208  

[2 rows x 26001 columns]

Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1428 entries, 0 to 1427
Columns: 26001 entries, label to 500x52
dtypes: float64(26001)
memory usage: 283.3 MB
None


In [147]:
# === Step 3: Identify features and labels ===
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# You may need to change 'label' to the actual column name in your file
label_col = "label" if "label" in df.columns else df.columns[-1]

X = df.drop(columns=[label_col]).values
y = df[label_col].values

print("\nFeature shape:", X.shape)
print("Label shape:", y.shape)

# === Step 4: Normalize features ===
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


Feature shape: (1428, 26000)
Label shape: (1428,)


In [148]:
n_classes = int(max(df['label'].tolist())) + 1
n_features = int(len(df.columns)) - 1
TIME_FRAMES = 500
N_SUBCARRIERS = 52

In [149]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if device.type == "cpu":
    print("CUDA not available — running on CPU. You asked to transfer to GPU; if you have a CUDA GPU, ensure CUDA is available in this environment.")
else:
    print("CUDA is available — using device:", torch.cuda.get_device_name(0))


# === Step 5: Split for training and validation ===
X_train, X_val, y_train, y_val = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

X_train = X_train.reshape(-1, TIME_FRAMES, N_SUBCARRIERS)
X_val   = X_val.reshape(-1, TIME_FRAMES, N_SUBCARRIERS)


# === Step 6: Convert to PyTorch tensors ===
X_train_t = torch.tensor(X_train, dtype=torch.float32)
y_train_t = torch.tensor(y_train, dtype=torch.long)
X_val_t = torch.tensor(X_val, dtype=torch.float32)
y_val_t = torch.tensor(y_val, dtype=torch.long)

print(X_train_t.shape)


train_ds = TensorDataset(X_train_t, y_train_t)
val_ds = TensorDataset(X_val_t, y_val_t)
train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False)

print("\nData prepared for training!")

# for idx, data in enumerate(X_train[0:3]):
#     plt.plot(data, label=f"{y_train[idx]}")

# plt.legend()
# plt.xlabel("x")
# plt.ylabel("amplitude")
# plt.show()



CUDA is available — using device: NVIDIA GeForce RTX 4060 Laptop GPU
torch.Size([1142, 500, 52])

Data prepared for training!


In [150]:
class LSTMClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc1 = nn.Linear(hidden_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, num_classes)
        self.act = nn.LogSigmoid()

    def forward(self, x):
        # x: (batch, time, features)
        out, _ = self.lstm(x)
        out = out[:, -1, :]          # take the last time step
        out = self.act(self.fc1(out))
        out = self.act(self.fc2(out))
        out = self.fc3(out)
        return out




def evaluate(model, loader, device):
    model.eval()
    correct = 0
    total = 0
    running_loss = 0.0
    with torch.no_grad():
        for xb, yb in loader:
            xb = xb.to(device)
            yb = yb.to(device)
            logits = model(xb)
            loss = criterion(logits, yb)
            running_loss += loss.item() * xb.size(0)
            preds = logits.argmax(dim=1)
            correct += (preds == yb).sum().item()
            total += xb.size(0)
    return running_loss / total, correct / total if total>0 else 0.0

In [152]:
torch.manual_seed(42)
model = LSTMClassifier(input_size=52, hidden_size=100, num_layers=2, num_classes=5)
model.to(device)
print("Model parameters:", sum(p.numel() for p in model.parameters()))

optimizer = torch.optim.Adam(model.parameters(), lr=LR)
criterion = nn.CrossEntropyLoss()

print("Starting training...")
for epoch in range(1, epochs + 1):
    model.train()
    running_loss = 0.0
    total = 0
    correct = 0
    for xb, yb in train_loader:
        xb = xb.to(device)
        yb = yb.to(device)
        optimizer.zero_grad()
        logits = model(xb)
        loss = criterion(logits, yb)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * xb.size(0)
        preds = logits.argmax(dim=1)
        correct += (preds == yb).sum().item()
        total += xb.size(0)
    if epoch%10 == 0:
        train_loss = running_loss / total
        train_acc = correct / total if total>0 else 0.0
        val_loss, val_acc = evaluate(model, val_loader, device)
        print(f"Epoch {epoch}/{EPOCHS}  Train loss: {train_loss:.4f}  Train acc: {train_acc:.4f}  Val loss: {val_loss:.4f}  Val acc: {val_acc:.4f}")

print("Training finished. Final evaluation on validation set:")
val_loss, val_acc = evaluate(model, val_loader, device)
print(f"Val loss: {val_loss:.4f}, Val accuracy: {val_acc:.4f}")

Model parameters: 163105
Starting training...
Epoch 10/200  Train loss: 0.7684  Train acc: 0.6340  Val loss: 0.7710  Val acc: 0.6224
Epoch 20/200  Train loss: 0.4505  Train acc: 0.7907  Val loss: 0.3715  Val acc: 0.8287
Epoch 30/200  Train loss: 0.3052  Train acc: 0.8616  Val loss: 0.2848  Val acc: 0.8636
Epoch 40/200  Train loss: 0.3344  Train acc: 0.8257  Val loss: 0.2864  Val acc: 0.8636
Epoch 50/200  Train loss: 0.3404  Train acc: 0.8599  Val loss: 0.3186  Val acc: 0.8811
Epoch 60/200  Train loss: 0.3720  Train acc: 0.8406  Val loss: 0.3337  Val acc: 0.8427
Epoch 70/200  Train loss: 0.2775  Train acc: 0.8844  Val loss: 0.2075  Val acc: 0.9231
Epoch 80/200  Train loss: 0.1917  Train acc: 0.9159  Val loss: 0.2500  Val acc: 0.8986
Epoch 90/200  Train loss: 0.1776  Train acc: 0.9221  Val loss: 0.1675  Val acc: 0.9196
Epoch 100/200  Train loss: 0.1687  Train acc: 0.9299  Val loss: 0.1393  Val acc: 0.9406
Epoch 110/200  Train loss: 0.1282  Train acc: 0.9518  Val loss: 0.1699  Val acc: 0.