In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import joblib

# ============================================================
# 1. LOAD DATA
# ============================================================
df = pd.read_csv("emotions.csv")

# ============================================================
# 2. CLEANING
# ============================================================
df = df.drop_duplicates()
df = df.dropna(subset=["label"])

numeric_cols = df.select_dtypes(include=[np.number]).columns
df[numeric_cols] = df[numeric_cols].fillna(df[numeric_cols].median())

# ============================================================
# 3. FEATURE SPLITTING
# ============================================================

# Identify LSTM sequential FFT features
seq_cols = [c for c in df.columns if "fft_" in c]

# Identify ANN statistical features
stat_cols = [c for c in df.columns if c not in seq_cols + ["label"]]

X_seq = df[seq_cols]           # shape → (N, 750)
X_stat = df[stat_cols]         # shape → (N, num_stats)

y = df["label"]

# ============================================================
# 4. LABEL ENCODING
# ============================================================
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

print("\nLabel Mapping:")
for i, c in enumerate(label_encoder.classes_):
    print(f"{c} → {i}")

# ============================================================
# 5. TRAIN–TEST SPLIT
# ============================================================
X_seq_train, X_seq_test, X_stat_train, X_stat_test, y_train, y_test = train_train_test_split = train_test_split(
    X_seq, X_stat, y_encoded,
    test_size=0.2, random_state=42, stratify=y_encoded
)

# ============================================================
# 6. SCALING (only statistical features)
# ============================================================
stat_scaler = StandardScaler()
X_stat_train = stat_scaler.fit_transform(X_stat_train)
X_stat_test = stat_scaler.transform(X_stat_test)

# Convert to float32 numpy
X_seq_train = X_seq_train.to_numpy().astype(np.float32)
X_seq_test = X_seq_test.to_numpy().astype(np.float32)

# Reshape FFT data for LSTM → (batch, seq_len, features)
X_seq_train = X_seq_train.reshape(len(X_seq_train), len(seq_cols), 1)
X_seq_test = X_seq_test.reshape(len(X_seq_test), len(seq_cols), 1)

X_stat_train = X_stat_train.astype(np.float32)
X_stat_test = X_stat_test.astype(np.float32)

y_train = np.array(y_train, dtype=np.int64)
y_test = np.array(y_test, dtype=np.int64)

# ============================================================
# 7. DATASET & DATA LOADER
# ============================================================
class HybridDataset(Dataset):
    def __init__(self, X_seq, X_stat, y):
        self.X_seq = torch.tensor(X_seq, dtype=torch.float32)
        self.X_stat = torch.tensor(X_stat, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X_seq[idx], self.X_stat[idx], self.y[idx]

train_dataset = HybridDataset(X_seq_train, X_stat_train, y_train)
test_dataset = HybridDataset(X_seq_test, X_stat_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)

# ============================================================
# 8. HYBRID LSTM + ANN MODEL
# ============================================================
class HybridModel(nn.Module):
    def __init__(self, seq_len, stat_dim, num_classes):
        super(HybridModel, self).__init__()

        # ---- LSTM BRANCH ----
        self.lstm = nn.LSTM(
            input_size=1,
            hidden_size=64,
            num_layers=2,
            batch_first=True,
            dropout=0.3,
            bidirectional=True
        )

        self.lstm_fc = nn.Linear(64 * 2, 128)  # BiLSTM → 128

        # ---- ANN BRANCH ----
        self.ann = nn.Sequential(
            nn.Linear(stat_dim, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.2)
        )

        # ---- COMBINED CLASSIFIER ----
        self.classifier = nn.Sequential(
            nn.Linear(128 + 64, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, num_classes)
        )

    def forward(self, seq_input, stat_input):
        # LSTM branch
        lstm_out, _ = self.lstm(seq_input)
        lstm_last = lstm_out[:, -1, :]          # last timestep
        lstm_feat = self.lstm_fc(lstm_last)

        # ANN branch
        ann_feat = self.ann(stat_input)

        # Concatenate
        combined = torch.cat([lstm_feat, ann_feat], dim=1)

        # Classification
        out = self.classifier(combined)
        return out

# ============================================================
# 9. TRAINING SETUP
# ============================================================
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = HybridModel(
    seq_len=len(seq_cols),
    stat_dim=len(stat_cols),
    num_classes=len(label_encoder.classes_)
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# ============================================================
# 10. TRAINING LOOP
# ============================================================
epochs = 40

for epoch in range(epochs):
    model.train()
    total_loss = 0
    correct = 0

    for seq_batch, stat_batch, y_batch in train_loader:
        seq_batch, stat_batch, y_batch = seq_batch.to(device), stat_batch.to(device), y_batch.to(device)

        optimizer.zero_grad()
        outputs = model(seq_batch, stat_batch)
        loss = criterion(outputs, y_batch)

        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, preds = torch.max(outputs, 1)
        correct += (preds == y_batch).sum().item()

    acc = correct / len(train_dataset)
    print(f"Epoch {epoch+1}/{epochs} | Loss: {total_loss:.4f} | Accuracy: {acc:.4f}")

# ============================================================
# 11. TESTING
# ============================================================
model.eval()
correct = 0

with torch.no_grad():
    for seq_batch, stat_batch, y_batch in test_loader:
        seq_batch, stat_batch, y_batch = seq_batch.to(device), stat_batch.to(device), y_batch.to(device)

        outputs = model(seq_batch, stat_batch)
        _, preds = torch.max(outputs, 1)
        correct += (preds == y_batch).sum().item()

test_acc = correct / len(test_dataset)
print(f"\nTest Accuracy: {test_acc:.4f}")

# ============================================================
# 12. SAVE EVERYTHING
# ============================================================
torch.save(model.state_dict(), "hybrid_lstm_ann.pth")
joblib.dump(stat_scaler, "stat_scaler.pkl")
joblib.dump(label_encoder, "label_encoder.pkl")

print("\nHybrid model + scaler + label encoder saved successfully.")



Label Mapping:
NEGATIVE → 0
NEUTRAL → 1
POSITIVE → 2
Epoch 1/40 | Loss: 27.3493 | Accuracy: 0.8194
Epoch 2/40 | Loss: 8.0683 | Accuracy: 0.9402
Epoch 3/40 | Loss: 4.3506 | Accuracy: 0.9701
Epoch 4/40 | Loss: 1.3628 | Accuracy: 0.9906
Epoch 5/40 | Loss: 1.0058 | Accuracy: 0.9947
Epoch 6/40 | Loss: 1.2619 | Accuracy: 0.9953
Epoch 7/40 | Loss: 0.7281 | Accuracy: 0.9965
Epoch 8/40 | Loss: 0.9949 | Accuracy: 0.9971
Epoch 9/40 | Loss: 0.8614 | Accuracy: 0.9953
Epoch 10/40 | Loss: 0.5500 | Accuracy: 0.9971
Epoch 11/40 | Loss: 0.3641 | Accuracy: 0.9982
Epoch 12/40 | Loss: 2.8043 | Accuracy: 0.9842
Epoch 13/40 | Loss: 1.1589 | Accuracy: 0.9947
Epoch 14/40 | Loss: 0.3119 | Accuracy: 0.9988
Epoch 15/40 | Loss: 0.8301 | Accuracy: 0.9953
Epoch 16/40 | Loss: 0.9585 | Accuracy: 0.9971
Epoch 17/40 | Loss: 0.8163 | Accuracy: 0.9959
Epoch 18/40 | Loss: 1.0623 | Accuracy: 0.9941
Epoch 19/40 | Loss: 0.7919 | Accuracy: 0.9953
Epoch 20/40 | Loss: 0.1268 | Accuracy: 1.0000
Epoch 21/40 | Loss: 0.1024 | Accur

In [5]:
import pandas as pd
df1=pd.read_csv('emotions.csv')
df1.head()

Unnamed: 0,# mean_0_a,mean_1_a,mean_2_a,mean_3_a,mean_4_a,mean_d_0_a,mean_d_1_a,mean_d_2_a,mean_d_3_a,mean_d_4_a,...,fft_741_b,fft_742_b,fft_743_b,fft_744_b,fft_745_b,fft_746_b,fft_747_b,fft_748_b,fft_749_b,label
0,4.62,30.3,-356.0,15.6,26.3,1.07,0.411,-15.7,2.06,3.15,...,23.5,20.3,20.3,23.5,-215.0,280.0,-162.0,-162.0,280.0,NEGATIVE
1,28.8,33.1,32.0,25.8,22.8,6.55,1.68,2.88,3.83,-4.82,...,-23.3,-21.8,-21.8,-23.3,182.0,2.57,-31.6,-31.6,2.57,NEUTRAL
2,8.9,29.4,-416.0,16.7,23.7,79.9,3.36,90.2,89.9,2.03,...,462.0,-233.0,-233.0,462.0,-267.0,281.0,-148.0,-148.0,281.0,POSITIVE
3,14.9,31.6,-143.0,19.8,24.3,-0.584,-0.284,8.82,2.3,-1.97,...,299.0,-243.0,-243.0,299.0,132.0,-12.4,9.53,9.53,-12.4,POSITIVE
4,28.3,31.3,45.2,27.3,24.5,34.8,-5.79,3.06,41.4,5.52,...,12.0,38.1,38.1,12.0,119.0,-17.6,23.9,23.9,-17.6,NEUTRAL
