In [10]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

# ✅ Improved MLP definition
class ImprovedMLP(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(ImprovedMLP, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Linear(64, num_classes)
        )
    def forward(self, x):
        return self.net(x)

# ✅ Load and prepare data
df = pd.read_csv("Sleep Train 5000.csv")
X = df.drop(columns=[df.columns[0]])
y = df[df.columns[0]]

if y.dtype == 'object':
    y = LabelEncoder().fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ✅ Prepare PyTorch data
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
y_test_tensor = torch.tensor(y_test.to_numpy(), dtype=torch.long)


train_ds = TensorDataset(X_train_tensor, y_train_tensor)
train_dl = DataLoader(train_ds, batch_size=64, shuffle=True)

# ✅ Model, loss, optimizer, scheduler
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
mlp = ImprovedMLP(X_train.shape[1], len(np.unique(y))).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(mlp.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5, factor=0.5)

# ✅ Training loop
for epoch in range(80):
    mlp.train()
    running_loss = 0
    for xb, yb in train_dl:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        preds = mlp(xb)
        loss = criterion(preds, yb)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    
    scheduler.step(running_loss)

    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1}, Loss: {running_loss / len(train_dl):.4f}")

# ✅ Evaluation
mlp.eval()
with torch.no_grad():
    preds_mlp = mlp(X_test_tensor.to(device))
    pred_labels = torch.argmax(preds_mlp, dim=1).cpu().numpy()

acc_mlp = accuracy_score(y_test, pred_labels)
print("\n📊 MLP Accuracy:", acc_mlp)
print(classification_report(y_test, pred_labels))


Epoch 10, Loss: 0.9913
Epoch 20, Loss: 0.8453
Epoch 30, Loss: 0.7218
Epoch 40, Loss: 0.6475
Epoch 50, Loss: 0.5802
Epoch 60, Loss: 0.4872
Epoch 70, Loss: 0.4656
Epoch 80, Loss: 0.4255

📊 MLP Accuracy: 0.561
              precision    recall  f1-score   support

           0       0.61      0.44      0.51       111
           1       0.34      0.28      0.30       147
           2       0.61      0.65      0.63       386
           3       0.70      0.73      0.72       202
           4       0.41      0.47      0.44       154

    accuracy                           0.56      1000
   macro avg       0.53      0.51      0.52      1000
weighted avg       0.56      0.56      0.56      1000



In [17]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.decomposition import PCA
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import accuracy_score, classification_report

from imblearn.over_sampling import SMOTE

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

# 🔧 Load and preprocess data
df = pd.read_csv("Sleep Train 5000.csv")
X = df.drop(columns=[df.columns[0]])
y = df[df.columns[0]]

if y.dtype == 'object':
    y = LabelEncoder().fit_transform(y)

# Apply SMOTE for balancing
X_res, y_res = SMOTE().fit_resample(X, y)

# Scale and reduce features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_res)

# Reduce dimensionality
pca = PCA(n_components=0.95)  # preserve 95% variance
X_pca = pca.fit_transform(X_scaled)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X_pca, y_res, test_size=0.2, random_state=42)

# Torch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.to_numpy(), dtype=torch.long)
y_test_tensor = torch.tensor(y_test.to_numpy(), dtype=torch.long)

train_ds = TensorDataset(X_train_tensor, y_train_tensor)
train_dl = DataLoader(train_ds, batch_size=64, shuffle=True)

# ✅ Improved MLP with GELU and weight init
class SuperMLP(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(SuperMLP, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.BatchNorm1d(256),
            nn.GELU(),
            nn.Dropout(0.4),
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.GELU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.GELU(),
            nn.Linear(64, num_classes)
        )
        self.init_weights()

    def init_weights(self):
        for m in self.net:
            if isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight)
                if m.bias is not None:
                    nn.init.zeros_(m.bias)

    def forward(self, x):
        return self.net(x)

# ✅ Setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SuperMLP(X_train.shape[1], len(np.unique(y))).to(device)

# Class weights to handle imbalance
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train)
class_weights_tensor = torch.tensor(class_weights, dtype=torch.float32).to(device)

criterion = nn.CrossEntropyLoss(weight=class_weights_tensor)
optimizer = optim.AdamW(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=20)

# ✅ Training loop with early stopping
best_acc = 0
epochs_no_improve = 0
for epoch in range(100):
    model.train()
    running_loss = 0
    for xb, yb in train_dl:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        preds = model(xb)
        loss = criterion(preds, yb)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    
    scheduler.step()

    model.eval()
    with torch.no_grad():
        val_preds = model(X_test_tensor.to(device))
        val_pred_labels = torch.argmax(val_preds, dim=1).cpu().numpy()
        val_acc = accuracy_score(y_test, val_pred_labels)
    
    print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_dl):.4f}, Val Acc: {val_acc:.4f}")
    
    # Early stopping
    if val_acc > best_acc:
        best_acc = val_acc
        epochs_no_improve = 0
        best_model = model.state_dict()
    else:
        epochs_no_improve += 1
        if epochs_no_improve == 10:
            print(f"⏹️ Early stopping at epoch {epoch+1}")
            break

# ✅ Evaluation
model.load_state_dict(best_model)
model.eval()
with torch.no_grad():
    preds = model(X_test_tensor.to(device))
    pred_labels = torch.argmax(preds, dim=1).cpu().numpy()

acc = accuracy_score(y_test, pred_labels)
print("\n📊 Final MLP Accuracy:", round(acc, 4))
print(classification_report(y_test, pred_labels))


Epoch 1, Loss: 1.3661, Val Acc: 0.4850
Epoch 2, Loss: 1.2123, Val Acc: 0.5025
Epoch 3, Loss: 1.1455, Val Acc: 0.5625
Epoch 4, Loss: 1.0880, Val Acc: 0.5795
Epoch 5, Loss: 1.0355, Val Acc: 0.6135
Epoch 6, Loss: 0.9942, Val Acc: 0.6215
Epoch 7, Loss: 0.9739, Val Acc: 0.6435
Epoch 8, Loss: 0.9462, Val Acc: 0.6500
Epoch 9, Loss: 0.9148, Val Acc: 0.6575
Epoch 10, Loss: 0.8891, Val Acc: 0.6655
Epoch 11, Loss: 0.8702, Val Acc: 0.6855
Epoch 12, Loss: 0.8510, Val Acc: 0.6810
Epoch 13, Loss: 0.8412, Val Acc: 0.6910
Epoch 14, Loss: 0.8303, Val Acc: 0.6830
Epoch 15, Loss: 0.8099, Val Acc: 0.6885
Epoch 16, Loss: 0.7989, Val Acc: 0.6945
Epoch 17, Loss: 0.7986, Val Acc: 0.6935
Epoch 18, Loss: 0.7946, Val Acc: 0.6985
Epoch 19, Loss: 0.7937, Val Acc: 0.6875
Epoch 20, Loss: 0.7940, Val Acc: 0.6960
Epoch 21, Loss: 0.7955, Val Acc: 0.6980
Epoch 22, Loss: 0.7964, Val Acc: 0.6925
Epoch 23, Loss: 0.7941, Val Acc: 0.7010
Epoch 24, Loss: 0.7950, Val Acc: 0.6975
Epoch 25, Loss: 0.7991, Val Acc: 0.7040
Epoch 26,