In [None]:
!pip install opendatasets

Collecting opendatasets
  Downloading opendatasets-0.1.22-py3-none-any.whl.metadata (9.2 kB)
Downloading opendatasets-0.1.22-py3-none-any.whl (15 kB)
Installing collected packages: opendatasets
Successfully installed opendatasets-0.1.22


In [None]:
import opendatasets as od
import pandas

od.download(
    "https://www.kaggle.com/datasets/uldisvalainis/audio-emotions/data")

Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds
Your Kaggle username: gehansherif
Your Kaggle Key: ··········
Dataset URL: https://www.kaggle.com/datasets/uldisvalainis/audio-emotions
Downloading audio-emotions.zip to ./audio-emotions


100%|██████████| 1.12G/1.12G [01:06<00:00, 18.0MB/s]





In [None]:
!pip uninstall librosa resampy -y
!pip install librosa resampy

Found existing installation: librosa 0.10.2.post1
Uninstalling librosa-0.10.2.post1:
  Successfully uninstalled librosa-0.10.2.post1
[0mCollecting librosa
  Using cached librosa-0.10.2.post1-py3-none-any.whl.metadata (8.6 kB)
Collecting resampy
  Downloading resampy-0.4.3-py3-none-any.whl.metadata (3.0 kB)
Using cached librosa-0.10.2.post1-py3-none-any.whl (260 kB)
Downloading resampy-0.4.3-py3-none-any.whl (3.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m28.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: resampy, librosa
Successfully installed librosa-0.10.2.post1 resampy-0.4.3


In [None]:
import os
import librosa
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

def extract_features(file_name):
    """
    Extracts MFCC features from an audio file.
    """
    audio, sample_rate = librosa.load(file_name,res_type='kaiser_fast')
    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    return mfccs.T

def load_data():
    """
    Loads the dataset, extracts features and labels, and returns them as numpy arrays.
    """
    dataset_path = "/content/audio-emotions/Emotions"
    features = []
    labels = []

    for emotion_dir in os.listdir(dataset_path):
        emotion_path = os.path.join(dataset_path, emotion_dir)
        if os.path.isdir(emotion_path):
            for file_name in os.listdir(emotion_path):
                file_path = os.path.join(emotion_path, file_name)
                data = extract_features(file_path)
                if data is not None:
                    features.append(data)
                    labels.append(emotion_dir)

    # Pad sequences to ensure they are all the same length
    max_length = max([len(f) for f in features])
    padded_features = np.array([np.pad(f, ((0, max_length - len(f)), (0, 0)), mode='constant') for f in features])

    X = np.array(padded_features)
    y = np.array(labels)

    label_encoder = LabelEncoder()
    y = label_encoder.fit_transform(y)
    y = tf.keras.utils.to_categorical(y, num_classes=len(label_encoder.classes_))

    return X, y

# Load data, extract features, and train the model
X, y = load_data()

In [None]:

def build_model(input_shape, num_classes):
    model = tf.keras.models.Sequential([
        tf.keras.layers.Conv1D(64, kernel_size=5, strides=1, padding="same", input_shape=input_shape),
        tf.keras.layers.Activation("relu"),
        tf.keras.layers.MaxPooling1D(pool_size=8),
        tf.keras.layers.Dropout(0.25),
        tf.keras.layers.LSTM(128, return_sequences=True),
        tf.keras.layers.LSTM(64),
        tf.keras.layers.Dense(num_classes, activation='softmax')  # Adjusted to num_classes
    ])
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

def train_model(X_train, y_train, X_val, y_val, num_classes):
    model = build_model((X_train.shape[1], X_train.shape[2]), num_classes)
    history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=30, batch_size=32)
    return model, history

num_classes = y.shape[1]

# First, split the data into training+validation (80%) and test (20%)
X_temp, X_test, y_temp, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Now, split the training+validation into training (80% of 80% -> 64%) and validation (20% of 80% -> 16%)
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.2, random_state=42)

# Check shapes
print(f"Training set shape: {X_train.shape}, {y_train.shape}")
print(f"Validation set shape: {X_val.shape}, {y_val.shape}")
print(f"Test set shape: {X_test.shape}, {y_test.shape}")

# Train the model with the training and validation sets
model, history = train_model(X_train, y_train, X_val, y_val, y_train.shape[1])

# Evaluate on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test accuracy: {test_accuracy}")

Training set shape: (8190, 308, 40), (8190, 7)
Validation set shape: (2048, 308, 40), (2048, 7)
Test set shape: (2560, 308, 40), (2560, 7)
Epoch 1/30
[1m256/256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 121ms/step - accuracy: 0.2392 - loss: 1.7908 - val_accuracy: 0.4146 - val_loss: 1.4829
Epoch 2/30
[1m256/256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 121ms/step - accuracy: 0.4145 - loss: 1.4609 - val_accuracy: 0.4419 - val_loss: 1.3524
Epoch 3/30
[1m256/256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 118ms/step - accuracy: 0.4519 - loss: 1.3529 - val_accuracy: 0.4834 - val_loss: 1.2998
Epoch 4/30
[1m256/256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 114ms/step - accuracy: 0.4858 - loss: 1.2877 - val_accuracy: 0.5190 - val_loss: 1.1881
Epoch 5/30
[1m256/256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 123ms/step - accuracy: 0.5123 - loss: 1.1968 - val_accuracy: 0.4893 - val_loss: 1.2927
Epoch 6/30
[1m256/256[0m [32m━━━━

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Conv1D, LSTM, Dense, Dropout, MaxPooling1D, Input, Attention, Bidirectional
from tensorflow.keras.models import Model

def build_enhanced_model(input_shape, num_classes):
    input_layer = Input(shape=input_shape)

    # Convolutional layers
    x = Conv1D(64, kernel_size=5, strides=1, padding="same", activation='relu')(input_layer)
    x = MaxPooling1D(pool_size=8)(x)
    x = Dropout(0.3)(x)

    # LSTM layers with Bidirectional and Attention
    x = Bidirectional(LSTM(128, return_sequences=True))(x)
    x = Attention()([x, x])
    x = Bidirectional(LSTM(64))(x)

    # Output layer
    output_layer = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs=input_layer, outputs=output_layer)
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    return model


def train_model(X_train, y_train, X_val, y_val, num_classes):
    model = build_enhanced_model((X_train.shape[1], X_train.shape[2]), num_classes)
    history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=30, batch_size=32)
    return model, history

# Train the model with the training and validation sets
model, history = train_model(X_train, y_train, X_val, y_val, y_train.shape[1])

# Evaluate on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test accuracy: {test_accuracy}")

Training set shape: (8190, 308, 40), (8190, 7)
Validation set shape: (2048, 308, 40), (2048, 7)
Test set shape: (2560, 308, 40), (2560, 7)
Epoch 1/30
[1m256/256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 219ms/step - accuracy: 0.2638 - loss: 1.7602 - val_accuracy: 0.4536 - val_loss: 1.3656
Epoch 2/30
[1m256/256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 225ms/step - accuracy: 0.4546 - loss: 1.3612 - val_accuracy: 0.5200 - val_loss: 1.1861
Epoch 3/30
[1m256/256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 225ms/step - accuracy: 0.5289 - loss: 1.1741 - val_accuracy: 0.5635 - val_loss: 1.0862
Epoch 4/30
[1m256/256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 215ms/step - accuracy: 0.5655 - loss: 1.0992 - val_accuracy: 0.5503 - val_loss: 1.1140
Epoch 5/30
[1m256/256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 224ms/step - accuracy: 0.5892 - loss: 1.0554 - val_accuracy: 0.6011 - val_loss: 0.9828
Epoch 6/30
[1m256/256[0m [32m━━━━