In [1]:
import os
import librosa
import numpy as np
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.sequence import pad_sequences
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [19]:

data_path = '/content/drive/MyDrive/speaker_identification_Report_rnn/IEMOCAP Dataset'

In [20]:
def extract_mfcc(audio_file, n_mfcc=13, sr=16000, duration=5):
    try:
        audio, sample_rate = librosa.load(audio_file, sr=sr, duration=duration)
        mfcc = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=n_mfcc)
        return mfcc.T
    except Exception as e:
        print(f"Error processing {audio_file}: {e}")
        return None


In [21]:
def load_data(data_path):
    X, y = [],[]
    for speaker_folder in os.listdir(data_path):
        speaker_folder_path = os.path.join(data_path, speaker_folder)
        if os.path.isdir(speaker_folder_path):
            for audio_file in os.listdir(speaker_folder_path):
                audio_file_path = os.path.join(speaker_folder_path, audio_file)
                mfcc_features = extract_mfcc(audio_file_path)
                if mfcc_features is not None:
                    X.append(mfcc_features)
                    y.append(speaker_folder)
    return X, y

In [None]:
X, y = load_data(data_path)
print(X,y)


In [None]:
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
y_categorical = to_categorical(y_encoded)

X_padded = pad_sequences(X, dtype='float32', padding='post')

X_train, X_val, y_train, y_val = train_test_split(X_padded, y_categorical, test_size=0.2, random_state=42)

input_shape = (X_padded.shape[1], X_padded.shape[2])

In [None]:
model = Sequential()
model.add(LSTM(128, input_shape=input_shape, return_sequences=False))
model.add(Dropout(0.3))
model.add(Dense(64, activation='relu'))
model.add(Dense(len(label_encoder.classes_), activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=20, batch_size=32)

test_loss, test_acc = model.evaluate(X_val, y_val)
print(f'Test accuracy: {test_acc}')