<a href="https://colab.research.google.com/github/LittlePandaCode/CodeAlpha_Tasks/blob/main/codeAlpha_Emotion_Recognition_from__Speech.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**In this project we will work on radvess dataset to train our deeplearning model, here is the link to the dataset on kaggle**
[https://www.kaggle.com/datasets/uwrfkaggler/ravdess-emotional-speech-audio](https://)

## Install libraraies

In [None]:
!pip install librosa
!pip install tensorflow
from tensorflow import keras
!pip install librosa soundfile numpy sklearn pyaudio
!pip install soundfile
!pip install resampy

## Make the necessary imports

In [None]:
import os
import librosa
import numpy as np
import resampy
import os, glob, pickle
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from tensorflow.keras import models, layers, optimizers, callbacks

In [None]:
audio_path= r'/content/drive/MyDrive/Datasets/audio_speech_actors'

In [None]:
emotions={
    '01':'neutral',
    '02':'calm',
    '03':'happy',
    '04':'sad',
    '05':'angry',
    '06':'fearful',
    '07':'disgust',
    '08':'surprised'
}
observed_emotions=['neutral','calm','happy','sad','angry','fearful','disgust','surprised']

## Load Dataset and Extract Features

In [None]:
def extract_features(file_path, mfcc=True, chroma=True, mel=True):
    audio, sr = librosa.load(file_path, sr=None)
    features = []
    if mfcc:
        mfccs = np.mean(librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40).T, axis=0)
        features.extend(mfccs)
    if chroma:
        chroma = np.mean(librosa.feature.chroma_stft(y=audio, sr=sr).T,axis=0)
        features.extend(chroma)
    if mel:
        mel = np.mean(librosa.feature.melspectrogram(y=audio, sr=sr).T,axis=0)
        features.extend(mel)
    return features

# Load and preprocess data
data = []
labels = []
for actor_folder in os.listdir(audio_path):
    actor_path = os.path.join(audio_path, actor_folder)
    for file_name in os.listdir(actor_path):
        file_path = os.path.join(actor_path, file_name)
        parts = file_name.split("-")
        if len(parts) < 3:
            continue
        emotion = emotions[parts[2].split(".")[0]]  # Get emotion from file name
        if emotion not in observed_emotions:
            continue
        features = extract_features(file_path)
        data.append(features)
        labels.append(emotion)

X = np.array(data)
y = np.array(labels)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=9)


In [None]:
print(X_train.shape[0],y_train.shape[0])

2030 2030


## Model Training

In [None]:
model=MLPClassifier(alpha=0.01, batch_size=256, epsilon=1e-08,hidden_layer_sizes=(300,),learning_rate='adaptive',max_iter=500)
model.fit(X_train,y_train)

## Model Evaluation

In [None]:
y_pred=model.predict(X_test)


## calculate the accuracy of the model

In [None]:
accuracy=accuracy_score(y_true=y_test,y_pred=y_pred)
print("Accuracy: {:.2f}%".format(accuracy*100))

Accuracy: 81.10%


In [None]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

       angry       0.95      0.69      0.80        59
        calm       0.83      0.90      0.86        58
     disgust       0.72      0.90      0.80        83
     fearful       0.87      0.78      0.82        74
       happy       0.78      0.91      0.84        70
     neutral       0.78      0.52      0.62        27
         sad       0.83      0.71      0.77        70
   surprised       0.82      0.87      0.84        67

    accuracy                           0.81       508
   macro avg       0.82      0.79      0.80       508
weighted avg       0.82      0.81      0.81       508

