In [1]:
pip install pyttsx3 pydub numpy

Collecting pyttsx3
  Downloading pyttsx3-2.91-py3-none-any.whl.metadata (3.8 kB)
Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pyttsx3-2.91-py3-none-any.whl (33 kB)
Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pyttsx3, pydub
Successfully installed pydub-0.25.1 pyttsx3-2.91


In [3]:
pip install gtts pydub

Collecting gtts
  Downloading gTTS-2.5.3-py3-none-any.whl.metadata (4.1 kB)
Downloading gTTS-2.5.3-py3-none-any.whl (29 kB)
Installing collected packages: gtts
Successfully installed gtts-2.5.3


In [4]:
import os
from gtts import gTTS
from pydub import AudioSegment

# Directory to save the dataset
DATASET_PATH = 'synthetic_emotion_dataset'
if not os.path.exists(DATASET_PATH):
    os.makedirs(DATASET_PATH)

EMOTIONS = {
    'happy': ['I am so happy today!', 'This is a wonderful day!', 'I feel great!'],
    'angry': ['I am very angry with you!', 'This is so frustrating!', 'Why would you do that?'],
    'sad': ['I feel so sad right now.', 'This is a very bad day.', 'I am heartbroken.']
}

def save_audio(text, filename, lang='en'):
    tts = gTTS(text=text, lang=lang)
    tts.save(filename)
    # Convert to .wav using pydub
    sound = AudioSegment.from_file(filename, format="mp3")
    sound.export(filename.replace(".mp3", ".wav"), format="wav")

def create_dataset(emotions, dataset_path):
    for emotion, sentences in emotions.items():
        emotion_path = os.path.join(dataset_path, emotion)
        os.makedirs(emotion_path, exist_ok=True)

        for i, sentence in enumerate(sentences):
            filename = os.path.join(emotion_path, f'{emotion}_{i}.mp3')
            save_audio(sentence, filename)
            print(f'Generated: {filename}')

create_dataset(EMOTIONS, DATASET_PATH)


Generated: synthetic_emotion_dataset/happy/happy_0.mp3
Generated: synthetic_emotion_dataset/happy/happy_1.mp3
Generated: synthetic_emotion_dataset/happy/happy_2.mp3
Generated: synthetic_emotion_dataset/angry/angry_0.mp3
Generated: synthetic_emotion_dataset/angry/angry_1.mp3
Generated: synthetic_emotion_dataset/angry/angry_2.mp3
Generated: synthetic_emotion_dataset/sad/sad_0.mp3
Generated: synthetic_emotion_dataset/sad/sad_1.mp3
Generated: synthetic_emotion_dataset/sad/sad_2.mp3


In [11]:
# Install missing dependencies
!pip install resampy




In [9]:
pip install resampy


Collecting resampy
  Downloading resampy-0.4.3-py3-none-any.whl.metadata (3.0 kB)
Downloading resampy-0.4.3-py3-none-any.whl (3.1 MB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/3.1 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━[0m [32m2.6/3.1 MB[0m [31m78.0 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m47.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: resampy
Successfully installed resampy-0.4.3


In [14]:
pip uninstall librosa resampy -y

Found existing installation: librosa 0.10.2.post1
Uninstalling librosa-0.10.2.post1:
  Successfully uninstalled librosa-0.10.2.post1
Found existing installation: resampy 0.4.3
Uninstalling resampy-0.4.3:
  Successfully uninstalled resampy-0.4.3


In [15]:

pip install librosa


Collecting librosa
  Using cached librosa-0.10.2.post1-py3-none-any.whl.metadata (8.6 kB)
Using cached librosa-0.10.2.post1-py3-none-any.whl (260 kB)
Installing collected packages: librosa
Successfully installed librosa-0.10.2.post1


In [2]:
pip install soundfile scipy pydub keras




In [4]:
import os
import numpy as np
import librosa
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Dropout, Conv1D, MaxPooling1D, Flatten
from keras.optimizers import Adam

# Directory where the synthetic dataset is stored
DATASET_PATH = 'synthetic_emotion_dataset'
EMOTIONS = ['happy', 'angry', 'sad']  # Emotion classes
SAMPLE_RATE = 22050
DURATION = 3  # Duration of each audio sample in seconds
NUM_MFCC = 40

# Function to extract features from audio
def extract_features(file_name):
    try:
        audio, sample_rate = librosa.load(file_name, sr=SAMPLE_RATE)

        # Ensure audio length
        if len(audio) < SAMPLE_RATE * DURATION:
            audio = np.pad(audio, (0, max(0, SAMPLE_RATE * DURATION - len(audio))), mode='constant')
        else:
            audio = audio[:SAMPLE_RATE * DURATION]

        # Compute MFCCs
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=NUM_MFCC)
        mfccs = np.mean(mfccs.T, axis=0)  # Average MFCCs
        return mfccs
    except Exception as e:
        print(f"Error encountered while parsing file: {file_name}\n{str(e)}")
        return None

# Create dataset from synthetic data
def load_data(dataset_path, emotions):
    features, labels = [], []

    for emotion in emotions:
        emotion_path = os.path.join(dataset_path, emotion)
        files = os.listdir(emotion_path)

        for file in files:
            file_path = os.path.join(emotion_path, file)
            data = extract_features(file_path)
            if data is not None:
                features.append(data)
                labels.append(emotion)

    return np.array(features), np.array(labels)

# Load dataset
X, y = load_data(DATASET_PATH, EMOTIONS)

# Check if any valid data was loaded
if len(X) == 0:
    raise ValueError("No valid audio data was loaded. Please check the dataset files.")

# Encode labels
le = LabelEncoder()
y = to_categorical(le.fit_transform(y))

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model Architecture
model = Sequential()

model.add(Conv1D(64, kernel_size=5, strides=1, activation='relu', input_shape=(X_train.shape[1], 1)))
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.2))

model.add(Conv1D(128, kernel_size=5, strides=1, activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.2))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.3))

model.add(Dense(len(EMOTIONS), activation='softmax'))

# Compile Model
optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

# Reshape for CNN
X_train = np.expand_dims(X_train, axis=2)
X_test = np.expand_dims(X_test, axis=2)

# Train the Model
model.fit(X_train, y_train, batch_size=32, epochs=50, validation_data=(X_test, y_test), verbose=1)

# Evaluate the Model
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
print(f'Test Accuracy: {test_acc * 100:.2f}%')

# Save the model
model.save("emotion_recognition_model.h5")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - accuracy: 0.2857 - loss: 1.1014 - val_accuracy: 0.5000 - val_loss: 1.2862
Epoch 2/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 108ms/step - accuracy: 0.2857 - loss: 2.4991 - val_accuracy: 0.0000e+00 - val_loss: 1.9956
Epoch 3/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 87ms/step - accuracy: 0.1429 - loss: 4.5922 - val_accuracy: 0.0000e+00 - val_loss: 1.7764
Epoch 4/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 118ms/step - accuracy: 0.2857 - loss: 3.0019 - val_accuracy: 0.5000 - val_loss: 1.9346
Epoch 5/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 133ms/step - accuracy: 0.4286 - loss: 1.8460 - val_accuracy: 0.5000 - val_loss: 1.6802
Epoch 6/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 100ms/step - accuracy: 0.2857 - loss: 2.2669 - val_accuracy: 0.5000 - val_loss: 1.2269
Epoch 7/50
[1m1/1[0m [32m━━━━━━━━



Test Accuracy: 100.00%
