<a href="https://colab.research.google.com/github/Boyaradhika123/ZomatoEDA/blob/main/Speechemotionrecognition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
import os
import librosa
import torch
from torch.utils.data import Dataset
import numpy as np
from torch.utils.data import DataLoader, random_split
import torch.nn as nn
import torchvision.models as models
import matplotlib.pyplot as plt
import librosa
import torch

class EmotionDataset(Dataset):
    def __init__(self, data_path, emotions, transform=None):
        self.data_path = data_path
        self.emotions = emotions
        self.file_list = []
        self.labels = []
        self.transform = transform

        if not os.path.exists(data_path):
            print(f"Base data path does not exist: {data_path}")
            return

        for idx, emotion in enumerate(emotions):
            emotion_folders = [f'YAF_{emotion}', f'OAF_{emotion}']
            for folder in emotion_folders:
                folder_path = os.path.join(data_path, folder)
                print(f"Checking folder path: {folder_path}")
                if os.path.exists(folder_path):
                    print(f"Folder path exists: {folder_path}")
                    for file_name in os.listdir(folder_path):
                        file_path = os.path.join(folder_path, file_name)
                        self.file_list.append(file_path)
                        self.labels.append(idx)
                        print(f"Processing file: {file_path}")

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, idx):
        file_path = self.file_list[idx]
        label = self.labels[idx]
        y, sr = librosa.load(file_path, sr=16000)
        mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
        mel_spectrogram_db = librosa.power_to_db(mel_spectrogram, ref=np.max)
        max_length = 128
        pad_width = max_length - mel_spectrogram_db.shape[1]
        if pad_width > 0:
            mel_spectrogram_db = np.pad(mel_spectrogram_db, pad_width=((0, 0), (0, pad_width)), mode='constant')
        else:
            mel_spectrogram_db = mel_spectrogram_db[:, :max_length]
        mel_spectrogram_3ch = np.repeat(mel_spectrogram_db[np.newaxis, :, :], 3, axis=0)
        return torch.tensor(mel_spectrogram_3ch, dtype=torch.float32), torch.tensor(label)

class EmotionRecognitionModel(nn.Module):
    def __init__(self, num_classes):
        super(EmotionRecognitionModel, self).__init__()
        self.vgg = models.vgg16(pretrained=True)
        for param in self.vgg.parameters():
            param.requires_grad = False
        self.vgg.classifier[6] = nn.Linear(self.vgg.classifier[6].in_features, num_classes)

    def forward(self, x):
        return self.vgg(x)

emotions = ['anger', 'disgust', 'fear', 'happiness', 'pleasant_surprise', 'sadness', 'neutral']
data_path = '/content/drive/MyDrive/extract_speech/TESS Toronto emotional speech set data'
dataset = EmotionDataset(data_path, emotions)

print(f"Length of dataset: {len(dataset)}")

train_size = int(0.7 * len(dataset))
val_size = int(0.15 * len(dataset))
test_size = len(dataset) - train_size - val_size
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)


model = EmotionRecognitionModel(num_classes=len(emotions))
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)


total_train_correct = 0
total_train_samples = 0
total_val_correct = 0
total_val_samples = 0


num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0


    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        total_train_correct += (outputs.argmax(dim=1) == labels).sum().item()
        total_train_samples += labels.size(0)

    avg_train_loss = train_loss / len(train_loader)
    train_accuracy = total_train_correct / total_train_samples
    print(f"Epoch [{epoch+1}/{num_epochs}], Training Loss: {avg_train_loss:.4f}, Training Accuracy: {train_accuracy:.4f}")


    model.eval()
    val_loss = 0.0

    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            val_loss += loss.item()
            total_val_correct += (outputs.argmax(dim=1) == labels).sum().item()
            total_val_samples += labels.size(0)

    avg_val_loss = val_loss / len(val_loader)
    val_accuracy = total_val_correct / total_val_samples
    print(f"Epoch [{epoch+1}/{num_epochs}], Validation Loss: {avg_val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}")


final_train_accuracy = total_train_correct / total_train_samples
final_val_accuracy = total_val_correct / total_val_samples
print(f"Final Training Accuracy: {final_train_accuracy:.4f}")
print(f"Final Validation Accuracy: {final_val_accuracy:.4f}")


torch.save(model.state_dict(), 'emotion_recognition_model.pth')

test_loader = DataLoader(test_dataset, batch_size=32)

model.load_state_dict(torch.load('emotion_recognition_model.pth'))

model.eval()
test_loss = 0.0
total_test_correct = 0
total_test_samples = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        test_loss += loss.item()
        total_test_correct += (outputs.argmax(dim=1) == labels).sum().item()
        total_test_samples += labels.size(0)

avg_test_loss = test_loss / len(test_loader)
test_accuracy = total_test_correct / total_test_samples
print(f"Test Loss: {avg_test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")


def predict_emotion(audio_path):

    y, sr = librosa.load(audio_path, sr=16000)
    mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
    mel_spectrogram_db = librosa.power_to_db(mel_spectrogram, ref=np.max)
    max_length = 128
    pad_width = max_length - mel_spectrogram_db.shape[1]
    if pad_width > 0:
        mel_spectrogram_db = np.pad(mel_spectrogram_db, pad_width=((0, 0), (0, pad_width)), mode='constant')
    else:
        mel_spectrogram_db = mel_spectrogram_db[:, :max_length]
    mel_spectrogram_3ch = np.repeat(mel_spectrogram_db[np.newaxis, :, :], 3, axis=0)
    input_tensor = torch.tensor(mel_spectrogram_3ch, dtype=torch.float32).unsqueeze(0)
    model.eval()
    with torch.no_grad():
        output = model(input_tensor)
        predicted_class = output.argmax(dim=1).item()
    return emotions[predicted_class]

audio_file_path = '/content/drive/MyDrive/extract_speech/TESS Toronto emotional speech set data/OAF_Fear/OAF_bar_fear.wav'  # Replace with your audio file path
predicted_emotion = predict_emotion(audio_file_path)
print(f'Predicted Emotion: {predicted_emotion}')

Base data path does not exist: /content/drive/MyDrive/extract_speech/TESS Toronto emotional speech set data
Length of dataset: 0


ValueError: num_samples should be a positive integer value, but got num_samples=0

**Error Explanation:**

The error `ValueError: num_samples should be a positive integer value, but got num_samples=0` occurred because the dataset is empty. This is indicated by `Length of dataset: 0` in the output. The `EmotionDataset` class was unable to find any audio files in the specified `data_path` and its subfolders.

**Possible Causes:**

1.  **Incorrect Data Path:** The path `/content/drive/MyDrive/extract_speech/TESS Toronto emotional speech set data` might be incorrect. Double-check the path to ensure it points to the correct location of your data on Google Drive.
2.  **Google Drive Not Mounted:** Your Google Drive might not be correctly mounted in your Colab environment. You need to mount your drive to access files stored there.
3.  **Folder Structure:** Ensure that the emotion folders (e.g., `YAF_anger`, `OAF_anger`) exist within the specified `data_path` and contain the audio files.

**Troubleshooting Steps:**

1.  **Mount Google Drive:** If you haven't already, mount your Google Drive by running the following code in a new cell: