In [None]:
import librosa
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Attention, Flatten, Input

class EmotionRecognitionModel:
    def __init__(self, input_shape):
        self.model = self.create_model(input_shape)
        
    def create_model(self, input_shape):
        inputs = Input(shape=input_shape)
        x = LSTM(128, return_sequences=True)(inputs)
        x = Attention()([x, x])
        x = Flatten()(x)
        x = Dense(64, activation='relu')(x)
        outputs = Dense(8, activation='softmax')(x)
        model = tf.keras.Model(inputs, outputs)
        model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
        return model

    def extract_features(self, file_path):
        y, sr = librosa.load(file_path)
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
        return np.mean(mfccs.T, axis=0)

    def train(self, audio_files, labels):
        features = np.array([self.extract_features(file) for file in audio_files])
        self.model.fit(features, labels, epochs=50, batch_size=32)

audio_files = ['audio1.wav', 'audio2.wav']  # Paths to audio files
labels = np.array([[1, 0, 0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0, 0]])  # One-hot encoded labels
emotion_model = EmotionRecognitionModel(input_shape=(None, 40))
emotion_model.train(audio_files, labels)