In [15]:
import os
import librosa
import numpy as np

# Define the directory containing the audio files
AUDIO_DIR = r"D:\Program\ML\Emotion Recognition\audiodata"

# Initialize lists to hold features and labels
features = []
labels = []

# Loop through each file in the directory
for filename in os.listdir(AUDIO_DIR):
    if filename.endswith('.wav'):  # Check if the file is a .wav file
        # Load the audio file
        file_path = os.path.join(AUDIO_DIR, filename)
        audio, sr = librosa.load(file_path, sr=None)  # Load with original sampling rate
        
        # Extract features (e.g., MFCCs)
        mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
        mfccs_mean = np.mean(mfccs.T, axis=0)  # Take the mean of the MFCCs
        
        # Append features and label (you may need to extract the label from the filename)
        features.append(mfccs_mean)
        
        # Extract label from filename (assuming the label is part of the filename)
        label = filename.split('-')[2]  # Adjust this based on your filename format
        labels.append(label)

# Convert features and labels to numpy arrays
features = np.array(features)
labels = np.array(labels)

print("Features shape:", features.shape)
print("Labels shape:", labels.shape)


Features shape: (1440, 13)
Labels shape: (1440,)


In [18]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset, DataLoader

# Device setup
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyperparameters
epochs = 79
batch_size = 32
learning_rate = 0.001

# === Load features from CSV ===
df = pd.read_csv(r"D:\Program\ML\Emotion Recognition\features.csv")

# === Encode emotion labels (string → int) ===
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['label'])

# === Normalize features ===
scaler = StandardScaler()
X = scaler.fit_transform(df.drop('label', axis=1))  # (samples, 193 features)
y = df['label'].values

# === Train-test split (stratified to balance classes) ===
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# === Convert to PyTorch tensors ===
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# === Wrap tensors into TensorDatasets ===
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

# === Create DataLoaders ===
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

# === Define FCNN Model ===
class EmotionClassifier(nn.Module):
    def __init__(self, input_size, num_classes):
        super(EmotionClassifier, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_size, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, num_classes)
        )

    def forward(self, x):
        return self.net(x)

input_size = X.shape[1]
num_classes = len(np.unique(y))

model = EmotionClassifier(input_size, num_classes).to(device)

# === Loss and Optimizer ===
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# === Training Loop ===
for epoch in range(epochs):
    model.train()
    correct, total, running_loss = 0, 0, 0.0

    for features, labels in train_loader:
        features, labels = features.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * labels.size(0)
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    train_acc = correct / total
    train_loss = running_loss / total

    # === Evaluation on test set ===
    model.eval()
    test_correct, test_total = 0, 0
    with torch.no_grad():
        for features, labels in test_loader:
            features, labels = features.to(device), labels.to(device)
            outputs = model(features)
            _, predicted = torch.max(outputs, 1)
            test_correct += (predicted == labels).sum().item()
            test_total += labels.size(0)

    test_acc = test_correct / test_total

    print(f"Epoch {epoch+1}/{epochs} - Train Loss: {train_loss:.4f} - Train Acc: {train_acc:.4f} - Test Acc: {test_acc:.4f}")



Epoch 1/79 - Train Loss: 1.8170 - Train Acc: 0.3102 - Test Acc: 0.4109
Epoch 2/79 - Train Loss: 1.4731 - Train Acc: 0.4650 - Test Acc: 0.5278
Epoch 3/79 - Train Loss: 1.2935 - Train Acc: 0.5286 - Test Acc: 0.5868
Epoch 4/79 - Train Loss: 1.1339 - Train Acc: 0.5923 - Test Acc: 0.6331
Epoch 5/79 - Train Loss: 1.0317 - Train Acc: 0.6285 - Test Acc: 0.6620
Epoch 6/79 - Train Loss: 0.9558 - Train Acc: 0.6574 - Test Acc: 0.7037
Epoch 7/79 - Train Loss: 0.8512 - Train Acc: 0.6892 - Test Acc: 0.7384
Epoch 8/79 - Train Loss: 0.7886 - Train Acc: 0.7245 - Test Acc: 0.7604
Epoch 9/79 - Train Loss: 0.7228 - Train Acc: 0.7459 - Test Acc: 0.7836
Epoch 10/79 - Train Loss: 0.6942 - Train Acc: 0.7509 - Test Acc: 0.7975
Epoch 11/79 - Train Loss: 0.6623 - Train Acc: 0.7700 - Test Acc: 0.8194
Epoch 12/79 - Train Loss: 0.5875 - Train Acc: 0.7925 - Test Acc: 0.8461
Epoch 13/79 - Train Loss: 0.5738 - Train Acc: 0.8079 - Test Acc: 0.8449
Epoch 14/79 - Train Loss: 0.5180 - Train Acc: 0.8189 - Test Acc: 0.8519
E