<a href="https://colab.research.google.com/github/ASMASHAIKH04/Emotion/blob/main/Emotion_Prediction_using_Audio.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/gdrive')


Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [2]:
import os

base_path = '/content/gdrive/MyDrive/emotion/Datasets/RAVDESS_audio/audio_speech_actors_01-24'

# List the contents of the base_path
print(os.listdir(base_path))


['Actor_24', 'Actor_17', 'Actor_16', 'Actor_18', 'Actor_19', 'Actor_23', 'Actor_21', 'Actor_20', 'Actor_22', 'Actor_15', 'Actor_14', 'Actor_13', 'Actor_11', 'Actor_12', 'Actor_10', 'Actor_08', 'Actor_07', 'Actor_06', 'Actor_09', 'Actor_05', 'Actor_04', 'Actor_03', 'Actor_01', 'Actor_02']


In [3]:
import os
import numpy as np
import pandas as pd
import librosa
from sklearn.preprocessing import LabelEncoder

# Define the paths and labels
emotion_labels = ['neutral', 'calm', 'happy', 'sad', 'angry', 'fearful', 'disgust', 'surprised'] # Example labels

# Function to load audio files and extract features
def extract_features(file_path):
    y, sr = librosa.load(file_path, sr=None)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
    mfccs_scaled = np.mean(mfccs.T, axis=0)
    return mfccs_scaled

# Lists to hold features and labels
features = []
labels = []

for dirpath, dirnames, filenames in os.walk(base_path):
    for file in filenames:
        if file.endswith('.wav'):
            file_path = os.path.join(dirpath, file)
            emotion = file.split('-')[2]  # Adjust based on your file naming convention
            features.append(extract_features(file_path))
            labels.append(emotion)

# Convert to DataFrame
features_df = pd.DataFrame(features)
labels_df = pd.DataFrame(labels, columns=['emotion'])

# Combine features and labels into one DataFrame
df = pd.concat([features_df, labels_df], axis=1)

# Encode the labels
label_encoder = LabelEncoder()
df['emotion'] = label_encoder.fit_transform(df['emotion'])

# Output the resulting DataFrame
print(df.head())


            0          1          2          3         4         5          6  \
0 -712.388489  63.240292  -6.697998  12.928338 -9.692719  7.185036 -12.298458   
1 -748.791626  68.438736   0.857495  16.576460 -3.619491  6.739974  -9.081865   
2 -562.022461  51.749161 -19.801012   3.015481 -6.242053 -2.124168 -17.851171   
3 -717.182312  61.524731  -9.561805   4.530459 -4.689971  0.425915 -12.630183   
4 -760.015991  58.005562  -1.649008  18.476496 -4.011447  6.138883  -9.888475   

          7          8         9  ...        31        32        33        34  \
0 -2.933380  -8.629324 -5.959417  ... -0.839247 -0.979986  2.792931  1.156611   
1 -0.186805 -10.171622 -4.933789  ... -2.547779 -1.476792 -1.915742 -1.715452   
2 -5.088872 -10.204763 -8.797137  ...  3.234146  3.186999  5.048421  2.605889   
3 -3.081867 -11.383424 -8.497087  ... -1.258482 -0.510048  0.011909  0.410215   
4  0.571403 -12.488593 -3.485360  ... -4.558718 -2.542081 -2.223548 -0.417488   

         35        36     

In [4]:
from sklearn.model_selection import train_test_split

X = df.iloc[:, :-1].values
y = df['emotion'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [5]:
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.utils import to_categorical

# One-hot encode the labels
y_train_encoded = to_categorical(y_train)
y_test_encoded = to_categorical(y_test)

# Define the model
model = Sequential()
model.add(Dense(256, input_shape=(X_train.shape[1],), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(emotion_labels), activation='softmax'))

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


In [6]:
from keras.callbacks import EarlyStopping, ModelCheckpoint

# Define callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='min')
model_checkpoint = ModelCheckpoint('best_model.h5', monitor='val_loss', save_best_only=True, mode='min', verbose=1)

# Train the model
history = model.fit(X_train, y_train_encoded, validation_split=0.2, epochs=100, batch_size=32, callbacks=[early_stopping, model_checkpoint])

Epoch 1/100
Epoch 1: val_loss improved from inf to 12.28604, saving model to best_model.h5
Epoch 2/100

  saving_api.save_model(


Epoch 2: val_loss improved from 12.28604 to 2.06717, saving model to best_model.h5
Epoch 3/100
Epoch 3: val_loss did not improve from 2.06717
Epoch 4/100
Epoch 4: val_loss did not improve from 2.06717
Epoch 5/100
Epoch 5: val_loss did not improve from 2.06717
Epoch 6/100
Epoch 6: val_loss did not improve from 2.06717
Epoch 7/100
Epoch 7: val_loss did not improve from 2.06717
Epoch 8/100
Epoch 8: val_loss did not improve from 2.06717
Epoch 9/100
Epoch 9: val_loss did not improve from 2.06717
Epoch 10/100
Epoch 10: val_loss did not improve from 2.06717
Epoch 11/100
Epoch 11: val_loss did not improve from 2.06717
Epoch 12/100
Epoch 12: val_loss did not improve from 2.06717
Epoch 12: early stopping


In [7]:
# Load the best model
model.load_weights('best_model.h5')

# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test_encoded, verbose=0)
print(f'Test Accuracy: {test_accuracy}')

# Save the final model
model.save('emotion_detection_model.h5')

Test Accuracy: 0.0833333358168602


In [9]:
from keras.models import load_model

# Load the pre-trained model
model = load_model('/content/emotion_detection_model.h5')


In [10]:
import librosa
import numpy as np

# Function to load and preprocess the audio file
def preprocess_audio(file_path):
    y, sr = librosa.load(file_path, sr=None)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
    mfccs_scaled = np.mean(mfccs.T, axis=0)
    return np.array([mfccs_scaled])  # Return as 2D array for model prediction

# Function to predict the emotion
def predict_emotion(model, file_path):
    # Preprocess the audio file
    features = preprocess_audio(file_path)
    # Make a prediction
    prediction = model.predict(features)
    # Get the emotion label
    predicted_emotion = emotion_labels[np.argmax(prediction)]
    return predicted_emotion


In [12]:
# Path to the audio file you want to predict
audio_file_path = '/content/03-01-01-01-01-01-02.wav'

# Predict the emotion
predicted_emotion = predict_emotion(model, audio_file_path)
print(f'Predicted Emotion: {predicted_emotion}')


Predicted Emotion: happy


In [14]:
!pip install gradio


Collecting gradio
  Downloading gradio-4.39.0-py3-none-any.whl.metadata (15 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi (from gradio)
  Downloading fastapi-0.111.1-py3-none-any.whl.metadata (26 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.3.2.tar.gz (5.5 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting gradio-client==1.1.1 (from gradio)
  Downloading gradio_client-1.1.1-py3-none-any.whl.metadata (7.1 kB)
Collecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.0-py3-none-any.whl.metadata (7.2 kB)
Collecting orjson~=3.0 (from gradio)
  Downloading orjson-3.10.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.4/50.4 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting p

In [20]:
!pip install --upgrade gradio




In [21]:
import gradio as gr
from keras.models import load_model
import librosa
import numpy as np

# Load the pre-trained model
model = load_model('/content/emotion_detection_model.h5')

# Define emotion labels
emotion_labels = ['neutral', 'calm', 'happy', 'sad', 'angry', 'fearful', 'disgust', 'surprised']  # Example labels

# Function to load and preprocess the audio file
def preprocess_audio(file_path):
    y, sr = librosa.load(file_path, sr=None)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
    mfccs_scaled = np.mean(mfccs.T, axis=0)
    return np.array([mfccs_scaled])  # Return as 2D array for model prediction

# Function to predict the emotion
def predict_emotion(audio_file):
    # Preprocess the audio file
    features = preprocess_audio(audio_file)
    # Make a prediction
    prediction = model.predict(features)
    # Get the emotion label
    predicted_emotion = emotion_labels[np.argmax(prediction)]
    return predicted_emotion

# Gradio Interface
def main():
    io = gr.Interface(
        fn=predict_emotion,
        inputs=[gr.Audio(label="Upload an Audio File", type="filepath")],
        outputs=[gr.Textbox(label="Predicted Emotion")],
        title="Emotion Prediction from Audio",
        description="Upload an audio file and get the predicted emotion.",
    )
    io.launch(share=True)

if __name__ == "__main__":
    main()


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://29d5f98642bccdf735.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)
