In [10]:
import os
import warnings
import time
import random
import shutil
import joblib
import numpy as np
import librosa
import logging
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from keras.models import Sequential, load_model
from keras.layers import Conv1D, Activation, Dropout, Flatten, Dense
from keras.utils import plot_model as KerasPlotModel
from absl import logging as absl_logging

In [11]:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
warnings.filterwarnings('ignore')
absl_logging.set_verbosity(absl_logging.ERROR)
logging.getLogger('tensorflow').setLevel(logging.ERROR)

TrainingDataDirectory = r"X:\Projects\CodeAlpha\Task 2\features"
FeatureStorageDirectory = r"X:\Projects\CodeAlpha\Task 2\joblib_features"
ModelOutputDirectory = r"X:\Projects\CodeAlpha\Task 2\model"
TessSourceDirectory = r"X:\Projects\CodeAlpha\Task 2\TESS"
ExampleAudioDirectory = r"X:\Projects\CodeAlpha\Task 2\examples"

EmotionLabels = {
    0: 'neutral', 1: 'calm', 2: 'happy', 3: 'sad',
    4: 'angry', 5: 'fearful', 6: 'disgust', 7: 'surprised'
}

In [12]:
def PrepareTessAudioDirectories(sourceDir, destDir=TrainingDataDirectory):
    print(f"[TESS] Preparing WAV files from {sourceDir}")
    mappingCodes = {'01':'neutral','03':'happy','04':'sad','05':'angry','06':'fear','07':'disgust','08':'ps'}
    for basePath, _, files in os.walk(sourceDir):
        for fileName in files:
            if not fileName.endswith('.wav'): continue
            actorFolder = 'Actor_26' if fileName.startswith('OAF') else 'Actor_25'
            targetDir = os.path.join(destDir, actorFolder)
            os.makedirs(targetDir, exist_ok=True)
            nameRoot, ext = os.path.splitext(fileName)
            for code, label in mappingCodes.items():
                if nameRoot.endswith(label):
                    randomId = random.sample(range(10,99), 7)
                    newName = '-'.join(map(str, randomId))
                    finalName = newName[:6] + code + newName[8:] + ext
                    shutil.copy(os.path.join(basePath, fileName), os.path.join(targetDir, finalName))
    print("[TESS] Audio directories populated")

In [13]:
def ExtractMfccFromAudio(sourceDir=TrainingDataDirectory, outputDir=FeatureStorageDirectory):
    print(f"[Features] Extracting MFCCs from {sourceDir}")
    featureList = []
    startTime = time.time()
    for rootPath, _, files in os.walk(sourceDir):
        for audioFile in files:
            if not audioFile.endswith('.wav'): continue
            filePath = os.path.join(rootPath, audioFile)
            try:
                data, sr = librosa.load(filePath, sr=None)
                mfccVector = np.mean(librosa.feature.mfcc(y=data, sr=sr, n_mfcc=40).T, axis=0)
                labelIndex = int(audioFile[7:8]) - 1
                featureList.append((mfccVector, labelIndex))
            except Exception as error:
                print(f"[Features] Error processing {audioFile}: {error}")
    duration = time.time() - startTime
    print(f"[Features] Extracted {len(featureList)} MFCCs in {duration:.2f}s")
    features, labels = zip(*featureList)
    features = np.array(features)
    labels = np.array(labels)
    os.makedirs(outputDir, exist_ok=True)
    joblib.dump(features, os.path.join(outputDir, 'X.joblib'))
    joblib.dump(labels, os.path.join(outputDir, 'y.joblib'))
    print(f"[Features] Features saved to {outputDir}")

In [14]:
def TrainConv1DEmotionModel(featuresDir=FeatureStorageDirectory, modelDir=ModelOutputDirectory):
    print(f"[Training] Loading data from {featuresDir}")
    X = joblib.load(os.path.join(featuresDir, 'X.joblib'))
    y = joblib.load(os.path.join(featuresDir, 'y.joblib'))
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
    X_train_expanded = X_train[..., np.newaxis]
    X_test_expanded = X_test[..., np.newaxis]
    print("[Training] Building Conv1D network")
    model = Sequential([
        Conv1D(64, 5, padding='same', input_shape=(40,1)),
        Activation('relu'),
        Dropout(0.2),
        Flatten(),
        Dense(8),
        Activation('softmax')
    ])
    model.compile(loss='sparse_categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
    print("[Training] Starting training")
    history = model.fit(X_train_expanded, y_train, batch_size=16, epochs=50, validation_data=(X_test_expanded, y_test))
    os.makedirs(modelDir, exist_ok=True)
    print("[Training] Saving performance charts")
    plt.figure()
    plt.plot(history.history['loss'], label='TrainLoss')
    plt.plot(history.history['val_loss'], label='ValLoss')
    plt.legend()
    plt.savefig(os.path.join(modelDir, 'LossChart.png'))
    plt.close()
    plt.figure()
    plt.plot(history.history['accuracy'], label='TrainAcc')
    plt.plot(history.history['val_accuracy'], label='ValAcc')
    plt.legend()
    plt.savefig(os.path.join(modelDir, 'AccuracyChart.png'))
    plt.close()
    print("[Training] Generating evaluation report")
    preds = np.argmax(model.predict(X_test_expanded), axis=1)
    print(classification_report(y_test, preds))
    print(confusion_matrix(y_test, preds))
    modelPath = os.path.join(modelDir, 'EmotionDetectionModel.h5')
    model.save(modelPath)
    print(f"[Training] Model saved to {modelPath}")

In [15]:
def GenerateModelDiagram(modelDir=ModelOutputDirectory):
    print("[Plot] Visualizing model architecture")
    trainedModel = load_model(os.path.join(modelDir, 'EmotionDetectionModel.h5'))
    KerasPlotModel(trainedModel, to_file=os.path.join(modelDir, 'ModelDiagram.png'), show_shapes=True)
    print("[Plot] Model diagram created")

In [16]:
def RunPredictionsOnExamples(examplesDir=ExampleAudioDirectory, modelDir=ModelOutputDirectory):
    if not os.path.isdir(examplesDir):
        print(f"[Predict] No examples in {examplesDir}")
        return
    print(f"[Predict] Classifying examples in {examplesDir}")
    model = load_model(os.path.join(modelDir, 'EmotionDetectionModel.h5'))
    for fileName in os.listdir(examplesDir):
        if not fileName.endswith('.wav'): continue
        filePath = os.path.join(examplesDir, fileName)
        data, sr = librosa.load(filePath, sr=None)
        mfccVec = np.mean(librosa.feature.mfcc(y=data, sr=sr, n_mfcc=40).T, axis=0)
        index = np.argmax(model.predict(mfccVec[np.newaxis,...,np.newaxis]))
        print(f"[Predict] {fileName} => {EmotionLabels.get(index,'Unknown')}")

In [17]:
print("[Pipeline] Running full emotion detection pipeline")
PrepareTessAudioDirectories(TessSourceDirectory)
ExtractMfccFromAudio()
TrainConv1DEmotionModel()
GenerateModelDiagram()

[Pipeline] Running full emotion detection pipeline
[TESS] Preparing WAV files from X:\Projects\CodeAlpha\Task 2\TESS
[TESS] Audio directories populated
[Features] Extracting MFCCs from X:\Projects\CodeAlpha\Task 2\features
[Features] Extracted 5252 MFCCs in 39.88s
[Features] Features saved to X:\Projects\CodeAlpha\Task 2\joblib_features
[Training] Loading data from X:\Projects\CodeAlpha\Task 2\joblib_features
[Training] Building Conv1D network
[Training] Starting training
Epoch 1/50
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.3589 - loss: 5.9117 - val_accuracy: 0.5185 - val_loss: 2.3107
Epoch 2/50
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5817 - loss: 2.0285 - val_accuracy: 0.6494 - val_loss: 1.1063
Epoch 3/50
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6836 - loss: 1.0772 - val_accuracy: 0.6840 - val_loss: 0.8826
Epoch 4/50
[1m220/220[0m [32m━━━━━

In [18]:
RunPredictionsOnExamples()

[Predict] Classifying examples in X:\Projects\CodeAlpha\Task 2\examples
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[Predict] 03-01-01-01-01-02-05.wav => neutral
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[Predict] 10-16-07-29-82-30-63.wav => disgust
