<a href="https://colab.research.google.com/github/Rashmikr1203/Emotion_Detection_Multimodal/blob/main/AudioCombine.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

###Rashmi

4 sessions, relu, sigmoid, with 10 features


In [4]:
#rashmi- #
#session 4 sigmoid softmax
import os
import librosa
import numpy as np
from sklearn.impute import KNNImputer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

def extract_features(file_path):
    try:
        y, sr = librosa.load(file_path, sr=None)

        # Extracting features from librosa.effects and other suitable librosa functions
        rms = librosa.feature.rms(y=y)[0]
        zcr = librosa.feature.zero_crossing_rate(y=y)[0]
        spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)[0]
        spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)[0]
        spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
        spectral_flatness = librosa.feature.spectral_flatness(y=y)
        spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)[0]
        chroma_cens = librosa.feature.chroma_cens(y=y, sr=sr)

        # Additional features from librosa (these are not in librosa.effects)
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
        tonnetz = librosa.feature.tonnetz(y=y, sr=sr)

        # Combine all features into a single feature vector
        features = np.hstack((
            np.mean(rms), np.std(rms),
            np.mean(zcr), np.std(zcr),
            np.mean(spectral_centroid), np.std(spectral_centroid),
            np.mean(spectral_bandwidth), np.std(spectral_bandwidth),
            np.mean(spectral_contrast, axis=1), np.std(spectral_contrast, axis=1),
            np.mean(spectral_flatness), np.std(spectral_flatness),
            np.mean(spectral_rolloff), np.std(spectral_rolloff),
            np.mean(chroma_cens, axis=1), np.std(chroma_cens, axis=1),
            np.mean(mfccs, axis=1), np.std(mfccs, axis=1),
            np.mean(tonnetz, axis=1), np.std(tonnetz, axis=1)
        ))

        return features

    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

# Define your dataset path
dataset_path = '/content/drive/My Drive/session 1 2 3 4'

# Prepare data containers
X = []
y = []

# Emotion labels mapping
emotion_labels = {
    'happy': 0,
    'sadness': 1,
    'anger': 2,
    'disgust': 3,
    'fear': 4,
    'neutral': 5,
    'surprise': 6,
    'sarcastic': 7,
}

# Load the dataset and extract features
for emotion, label in emotion_labels.items():
    emotion_path = os.path.join(dataset_path, emotion)
    if not os.path.exists(emotion_path):
        print(f"Directory {emotion_path} does not exist!")
        continue

    for file_name in os.listdir(emotion_path):
        if file_name.endswith('.wav'):
            file_path = os.path.join(emotion_path, file_name)
            if not os.path.isfile(file_path):
                print(f"File {file_path} does not exist!")
                continue

            features = extract_features(file_path)
            if features is not None:
                X.append(features)
                y.append(label)

X = np.array(X)
y = np.array(y)

print(f"Extracted features for {len(X)} files.")

if X.size == 0:
    print("No features were extracted. Exiting.")
else:
    # Impute missing values using KNN
    imputer = KNNImputer(n_neighbors=5)
    X_imputed = imputer.fit_transform(X)

    # Standardize features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X_imputed)

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

    # Define the Keras model with 5 layers
    model = Sequential([
        Dense(10, input_dim=X_train.shape[1], activation='relu'),
        Dense(20, activation='relu'),
        Dense(30, activation='relu'),
        Dense(15, activation='relu'),
        Dense(8, activation='relu'),
        Dense(len(emotion_labels), activation='softmax')
    ])

# Compile the Keras model
    optimizer = Adam(learning_rate=0.001)
    model.compile(optimizer=optimizer,
                        loss='sparse_categorical_crossentropy',
                        metrics=['accuracy'])

    # Train the Keras model
    model.fit(X_train, y_train,
                    epochs=100,
                    batch_size=32,
                    validation_data=(X_test, y_test),
                    verbose=1)
  # Evaluate the Keras model
    _, keras_accuracy = keras_model.evaluate(X_test, y_test, verbose=0)
    print(f'Keras Model Accuracy: {keras_accuracy*100:.2f}%')

    # Define a model to extract embeddings
    embedding_model = Model(inputs=model.input,
                            outputs=model.layers[-2].output)

    # Generate embeddings for train and test sets
    X_train_embeddings = embedding_model.predict(X_train)
    X_test_embeddings = embedding_model.predict(X_test)

    # Define the Random Forest Classifier model
    rf_model = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)

    # Train the Random Forest model on embeddings
    rf_model.fit(X_train_embeddings, y_train)

    # Evaluate the Random Forest model
    y_pred = rf_model.predict(X_test_embeddings)
    rf_accuracy = accuracy_score(y_test, y_pred)
    print(f'Random Forest Model Accuracy: {rf_accuracy*100:.2f}%')


Directory /content/drive/My Drive/session 1 2 3 4/happy does not exist!
Directory /content/drive/My Drive/session 1 2 3 4/sadness does not exist!
Directory /content/drive/My Drive/session 1 2 3 4/anger does not exist!
Directory /content/drive/My Drive/session 1 2 3 4/disgust does not exist!
Directory /content/drive/My Drive/session 1 2 3 4/fear does not exist!
Directory /content/drive/My Drive/session 1 2 3 4/neutral does not exist!
Directory /content/drive/My Drive/session 1 2 3 4/surprise does not exist!
Directory /content/drive/My Drive/session 1 2 3 4/sarcastic does not exist!
Extracted features for 0 files.
No features were extracted. Exiting.


In [5]:
#rashmi
#1185files, 16 features, keras NN input to random forest
import os
import librosa
import numpy as np
from sklearn.impute import KNNImputer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

def extract_features(file_path):
    try:
        y, sr = librosa.load(file_path, sr=None)

        # Extracting features from librosa.effects and other suitable librosa functions
        rms = librosa.feature.rms(y=y)[0]
        zcr = librosa.feature.zero_crossing_rate(y=y)[0]
        spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)[0]
        spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)[0]
        spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
        spectral_flatness = librosa.feature.spectral_flatness(y=y)
        spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)[0]
        chroma_cens = librosa.feature.chroma_cens(y=y, sr=sr)
        spectral_flux = np.mean(librosa.onset.onset_strength(y=y, sr=sr)) # Spectral flux
        pitches, magnitudes = librosa.core.piptrack(y=y, sr=sr)
        pitch = np.mean(pitches[pitches > 0]) # Pitch

        # Additional features from librosa (these are not in librosa.effects)
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
        tonnetz = librosa.feature.tonnetz(y=y, sr=sr)

        # Simplified jitter and shimmer calculation
        frame_length = int(0.025 * sr)
        hop_length = int(0.01 * sr)
        pitches, magnitudes = librosa.core.piptrack(y=y, sr=sr, n_fft=frame_length, hop_length=hop_length)
        valid_pitches = pitches[pitches > 0]
        jitter = np.mean(np.abs(np.diff(valid_pitches))) / np.mean(valid_pitches)
        shimmer = np.std(valid_pitches) / np.mean(valid_pitches)

        # Extract Harmonics-to-Noise Ratio (HNR)
        hnr = librosa.effects.harmonic(y)
        mean_hnr = np.mean(hnr)
        std_hnr = np.std(hnr)

        # Extract Linear Predictive Coding (LPC)
        lpc = librosa.lpc(y, order=10)
        mean_lpc = np.mean(lpc)
        std_lpc = np.std(lpc)

        # Estimate speech rate and rhythm (number of onsets)
        onsets = librosa.onset.onset_detect(y=y, sr=sr)
        speech_rate = len(onsets) / (len(y) / sr)
        rhythm = np.std(onsets)

        # Combine all features into a single feature vector
        features = np.hstack((
            np.mean(rms), np.std(rms),
            np.mean(zcr), np.std(zcr),
            np.mean(spectral_centroid), np.std(spectral_centroid),
            np.mean(spectral_bandwidth), np.std(spectral_bandwidth),
            np.mean(spectral_contrast, axis=1), np.std(spectral_contrast, axis=1),
            np.mean(spectral_flatness), np.std(spectral_flatness),
            np.mean(spectral_rolloff), np.std(spectral_rolloff),
            np.mean(chroma_cens, axis=1), np.std(chroma_cens, axis=1),
            np.mean(mfccs, axis=1), np.std(mfccs, axis=1),
            np.mean(tonnetz, axis=1), np.std(tonnetz, axis=1),
            spectral_flux, pitch,
            jitter, shimmer,
            mean_hnr, std_hnr,
            mean_lpc, std_lpc,
            speech_rate, rhythm
        ))

        return features

    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

# Define your dataset path
dataset_path = '/content/drive/My Drive/session 1 2 3 4 5'

# Prepare data containers
X = []
y = []

# Emotion labels mapping
emotion_labels = {
    'happy': 0,
    'sadness': 1,
    'anger': 2,
    'disgust': 3,
    'fear': 4,
    'neutral': 5,
    'surprise': 6,
    'sarcastic': 7,
}

# Load the dataset and extract features
for emotion, label in emotion_labels.items():
    emotion_path = os.path.join(dataset_path, emotion)
    if not os.path.exists(emotion_path):
        print(f"Directory {emotion_path} does not exist!")
        continue

    for file_name in os.listdir(emotion_path):
        if file_name.endswith('.wav'):
            file_path = os.path.join(emotion_path, file_name)
            if not os.path.isfile(file_path):
                print(f"File {file_path} does not exist!")
                continue

            features = extract_features(file_path)
            if features is not None:
                X.append(features)
                y.append(label)

X = np.array(X)
y = np.array(y)

print(f"Extracted features for {len(X)} files.")

if X.size == 0:
    print("No features were extracted. Exiting.")
else:
    # Impute missing values using KNN
    imputer = KNNImputer(n_neighbors=5)
    X_imputed = imputer.fit_transform(X)

    # Standardize features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X_imputed)

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

    # Define the Keras model with 5 layers
    keras_model = Sequential([
        Dense(10, input_dim=X_train.shape[1], activation='relu'),
        Dense(20, activation='relu'),
        Dense(30, activation='relu'),
        Dense(15, activation='relu'),
        Dense(8, activation='relu'),
        Dense(len(emotion_labels), activation='softmax')
    ])

    # Compile the Keras model
    optimizer = Adam(learning_rate=0.001)
    keras_model.compile(optimizer=optimizer,
                        loss='sparse_categorical_crossentropy',
                        metrics=['accuracy'])

    # Train the Keras model
    keras_model.fit(X_train, y_train,
                    epochs=100,
                    batch_size=32,
                    validation_data=(X_test, y_test),
                    verbose=1)

     # Evaluate the Keras model
    _, keras_accuracy = keras_model.evaluate(X_test, y_test, verbose=0)
    print(f'Keras Model Accuracy: {keras_accuracy*100:.2f}%')

    # Define a model to extract embeddings
    embedding_model = Model(inputs=keras_model.input,
                            outputs=keras_model.layers[-2].output)

    # Generate embeddings for train and test sets
    X_train_embeddings = embedding_model.predict(X_train)
    X_test_embeddings = embedding_model.predict(X_test)

    # Define the Random Forest Classifier model
    rf_model = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)

    # Train the Random Forest model on embeddings
    rf_model.fit(X_train_embeddings, y_train)

    # Evaluate the Random Forest model
    y_pred = rf_model.predict(X_test_embeddings)
    rf_accuracy = accuracy_score(y_test, y_pred)
    print(f'Random Forest Model Accuracy: {rf_accuracy*100:.2f}%')


Extracted features for 1185 files.
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


###Nishita

15 features, 5 sessions, keras model and random forest with hyperparameter tuning using GridSearchCV

In [6]:
import os
import librosa
import numpy as np
import soundfile as sf
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.feature_selection import SelectKBest, f_classif
import parselmouth
from parselmouth.praat import call

# EXTRACTING FEATURES

def extract_features(file_path):
    try:
        y, sr = librosa.load(file_path, sr=None)

        # MFCCs
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)

        # Chroma
        chroma = librosa.feature.chroma_stft(y=y, sr=sr)

        # Spectral Contrast
        contrast = librosa.feature.spectral_contrast(y=y, sr=sr)

        # RMS (Root Mean Square) Energy
        rms = librosa.feature.rms(y=y)

        # Zero Crossing Rate
        zcr = librosa.feature.zero_crossing_rate(y)

        # Spectral Centroid
        centroid = librosa.feature.spectral_centroid(y=y, sr=sr)

        # Spectral Bandwidth
        bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)

        # Spectral Roll-off
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)

        # Spectral Flux
        spectral_flux = np.mean(librosa.onset.onset_strength(y=y, sr=sr))

        # Pitch (using the harmonic-percussive source separation)
        pitches, magnitudes = librosa.core.piptrack(y=y, sr=sr)
        pitch = []
        for t in range(pitches.shape[1]):
            index = magnitudes[:, t].argmax()
            pitch.append(pitches[index, t])
        pitch = np.array(pitch)
        pitch = pitch[pitch > 0]  # Remove zero values

        # Intensity (Using RMS Energy)
        intensity = np.mean(rms)

        # Load with parselmouth for advanced features
        snd = parselmouth.Sound(file_path)

        # Formants
        formant1 = formant2 = jitter = hnr = np.nan
        try:
            formant_burg = snd.to_formant_burg()
            formant1 = call(formant_burg, "Get mean", 1, 0, 0, "Hertz")
            formant2 = call(formant_burg, "Get mean", 2, 0, 0, "Hertz")
        except Exception as e:
            print(f"Error extracting formants: {e}")

        # Jitter
        try:
            point_process = call(snd, "To PointProcess (periodic, cc)", 75, 500)
            jitter = call(point_process, "Get jitter (local)", 0, 0, 0.0001, 0.02, 1.3)
        except Exception as e:
            print(f"Error extracting jitter: {e}")

        # Harmonics to Noise Ratio
        try:
            harmonicity = call(snd, "To Harmonicity (cc)", 0.01, 75, 0.1, 1.0)
            hnr = call(harmonicity, "Get mean", 0, 0)
        except Exception as e:
            print(f"Error extracting HNR: {e}")

        # Assemble the feature vector
        features = np.hstack((
            np.mean(mfccs, axis=1), np.std(mfccs, axis=1),
            np.mean(chroma, axis=1), np.std(chroma, axis=1),
            np.mean(contrast, axis=1), np.std(contrast, axis=1),
            np.mean(rms), np.std(rms),
            np.mean(zcr), np.std(zcr),
            np.mean(centroid), np.std(centroid),
            np.mean(bandwidth), np.std(bandwidth),
            np.mean(rolloff), np.std(rolloff),
            np.mean(pitch), np.std(pitch),
            intensity,
            spectral_flux,
            formant1, formant2,
            jitter,
            hnr
        ))

        return features
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

# Define your dataset path
dataset_path = '/content/drive/MyDrive/session 1 2 3 4 5'

# Prepare data containers
X = []
y = []

# Emotion labels mapping
emotion_labels = {
    'anger': 0,
    'disgust': 1,
    'fear': 2,
    'happy': 3,
    'neutral': 4,
    'sadness': 5,
    'sarcastic': 6,
    'surprise': 7
}


# Load the dataset
for emotion, label in emotion_labels.items():
    emotion_path = os.path.join(dataset_path, emotion)
    if not os.path.exists(emotion_path):
        continue
    for file_name in os.listdir(emotion_path):
        file_path = os.path.join(emotion_path, file_name)
        features = extract_features(file_path)
        if features is not None:
            X.append(features)
            y.append(label)

# Convert to numpy arrays
X = np.array(X)
y = np.array(y)

print(f"Extracted features for {len(X)} files.")

ModuleNotFoundError: No module named 'parselmouth'

Keras model

In [7]:
# Standardize features
# Impute missing values using KNN
imputer = KNNImputer(n_neighbors=5)
X_imputed = imputer.fit_transform(X)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_imputed)
import os
import librosa
import numpy as np
from sklearn.impute import KNNImputer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import accuracy_score

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

    # Define the Keras model
model = Sequential([
        Dense(10, input_dim=X_train.shape[1], activation='relu'),
        Dense(20, activation='relu'),
        Dense(30, activation='relu'),
        Dense(15, activation='relu'),
        Dense(8, activation='relu'),
        Dense(len(emotion_labels), activation='sigmoid')
    ])

    # Compile the model
optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer,
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    # Train the model
history = model.fit(X_train, y_train,
                        epochs=100,
                        batch_size=32,
                        validation_data=(X_test, y_test),
                        verbose=1)

# Evaluate the model
_, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f'Accuracy: {accuracy*100:.2f}%')




Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Running keras model such that its output becomes input for random forest

In [8]:
# Standardize features
# Impute missing values using KNN
import librosa
import numpy as np
from sklearn.impute import KNNImputer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
imputer = KNNImputer(n_neighbors=5)
X_imputed = imputer.fit_transform(X)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_imputed)
import os
import librosa
import numpy as np
from sklearn.impute import KNNImputer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import accuracy_score

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Define the Keras model with 5 layers
model = Sequential([
        Dense(10, input_dim=X_train.shape[1], activation='relu'),
        Dense(20, activation='relu'),
        Dense(30, activation='relu'),
        Dense(15, activation='relu'),
        Dense(8, activation='relu'),
        Dense(len(emotion_labels), activation='softmax')
    ])

# Compile the Keras model
optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer,
                        loss='sparse_categorical_crossentropy',
                        metrics=['accuracy'])

    # Train the Keras model
model.fit(X_train, y_train,
                    epochs=100,
                    batch_size=32,
                    validation_data=(X_test, y_test),
                    verbose=1)

    # Define a model to extract embeddings
embedding_model = Model(inputs=model.input,
                            outputs=model.layers[-2].output)

    # Generate embeddings for train and test sets
X_train_embeddings = embedding_model.predict(X_train)
X_test_embeddings = embedding_model.predict(X_test)

    # Define the Random Forest Classifier model
rf_model = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)

    # Train the Random Forest model on embeddings
rf_model.fit(X_train_embeddings, y_train)

    # Evaluate the Random Forest model
y_pred = rf_model.predict(X_test_embeddings)
rf_accuracy = accuracy_score(y_test, y_pred)
print(f'Random Forest Model Accuracy: {rf_accuracy*100:.2f}%')

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

###Arundhati

 16 features, 5 sessions, Random forest and keras models separately along with making into a dataframe

In [9]:
#arundhati
import os
import librosa
import numpy as np
import pandas as pd
from sklearn.impute import KNNImputer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras.optimizers import Adam

def extract_features(file_path):
    try:
        y, sr = librosa.load(file_path, sr=None)

        # Extracting features from librosa.effects and other suitable librosa functions
        rms = librosa.feature.rms(y=y)[0]
        zcr = librosa.feature.zero_crossing_rate(y=y)[0]
        spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)[0]
        spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)[0]
        spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
        spectral_flatness = librosa.feature.spectral_flatness(y=y)
        spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)[0]
        chroma_cens = librosa.feature.chroma_cens(y=y, sr=sr)
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
        tonnetz = librosa.feature.tonnetz(y=y, sr=sr)
        spectral_flux = librosa.onset.onset_strength(y=y, sr=sr)

        # Pitch extraction
        pitches, magnitudes = librosa.core.piptrack(y=y, sr=sr)
        pitch = [pitches[magnitudes[:, i].argmax(), i] for i in range(magnitudes.shape[1])]
        pitch = np.array(pitch)

        # Placeholder for jitter and shimmer (implement appropriate methods)
        jitter = np.std(pitch)  # This is a placeholder for jitter calculation
        shimmer = np.std(rms)   # This is a placeholder for shimmer calculation
        hnr = librosa.effects.harmonic(y)  # Placeholder for HNR

        # Formants
        lpc_coeffs = librosa.lpc(y, order=2)

        # Speech rate and rhythm (placeholders, actual extraction requires more complex algorithms)
        speech_rate = len(librosa.effects.split(y, top_db=20)) / (len(y) / sr)
        rhythm = np.std(librosa.beat.beat_track(y=y, sr=sr)[1])  # Placeholder for rhythm

        # Combine all features into a single feature vector
        features = np.hstack((
            np.mean(rms), np.std(rms),
            np.mean(zcr), np.std(zcr),
            np.mean(spectral_centroid), np.std(spectral_centroid),
            np.mean(spectral_bandwidth), np.std(spectral_bandwidth),
            np.mean(spectral_contrast, axis=1), np.std(spectral_contrast, axis=1),
            np.mean(spectral_flatness), np.std(spectral_flatness),
            np.mean(spectral_rolloff), np.std(spectral_rolloff),
            np.mean(chroma_cens, axis=1), np.std(chroma_cens, axis=1),
            np.mean(mfccs, axis=1), np.std(mfccs, axis=1),
            np.mean(tonnetz, axis=1), np.std(tonnetz, axis=1),
            np.mean(spectral_flux), np.std(spectral_flux),
            np.mean(pitch), np.std(pitch),
            jitter, shimmer,
            np.mean(hnr), np.std(hnr),
            np.mean(lpc_coeffs), np.std(lpc_coeffs),
            speech_rate, rhythm
        ))

        return features

    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

# Define your dataset path
dataset_path = '/content/drive/MyDrive/session 1 2 3 4 5'

# Prepare data containers
X = []
y = []

# Emotion labels mapping
emotion_labels = {
    'happy': 0,
    'sadness': 1,
    'anger': 2,
    'disgust': 3,
    'fear': 4,
    'neutral': 5,
    'surprise': 6,
    'sarcastic': 7,
}

# Load the dataset and extract features
for emotion, label in emotion_labels.items():
    emotion_path = os.path.join(dataset_path, emotion)
    if not os.path.exists(emotion_path):
        print(f"Directory {emotion_path} does not exist!")
        continue

    for file_name in os.listdir(emotion_path):
        if file_name.endswith('.wav'):
            file_path = os.path.join(emotion_path, file_name)
            if not os.path.isfile(file_path):
                print(f"File {file_path} does not exist!")
                continue

            features = extract_features(file_path)
            if features is not None:
                X.append(features)
                y.append(label)

X = np.array(X)
y = np.array(y)

# Create a DataFrame with feature columns
columns = [
    'mean_rms', 'std_rms',
    'mean_zcr', 'std_zcr',
    'mean_spectral_centroid', 'std_spectral_centroid',
    'mean_spectral_bandwidth', 'std_spectral_bandwidth',
    'mean_spectral_contrast_0', 'mean_spectral_contrast_1', 'mean_spectral_contrast_2',
    'mean_spectral_contrast_3', 'mean_spectral_contrast_4', 'mean_spectral_contrast_5', 'mean_spectral_contrast_6',
    'std_spectral_contrast_0', 'std_spectral_contrast_1', 'std_spectral_contrast_2',
    'std_spectral_contrast_3', 'std_spectral_contrast_4', 'std_spectral_contrast_5', 'std_spectral_contrast_6',
    'mean_spectral_flatness', 'std_spectral_flatness',
    'mean_spectral_rolloff', 'std_spectral_rolloff',
    'mean_chroma_cens_0', 'mean_chroma_cens_1', 'mean_chroma_cens_2',
    'mean_chroma_cens_3', 'mean_chroma_cens_4', 'mean_chroma_cens_5',
    'mean_chroma_cens_6', 'mean_chroma_cens_7', 'mean_chroma_cens_8', 'mean_chroma_cens_9', 'mean_chroma_cens_10',
    'mean_chroma_cens_11',
    'std_chroma_cens_0', 'std_chroma_cens_1', 'std_chroma_cens_2',
    'std_chroma_cens_3', 'std_chroma_cens_4', 'std_chroma_cens_5',
    'std_chroma_cens_6', 'std_chroma_cens_7', 'std_chroma_cens_8', 'std_chroma_cens_9', 'std_chroma_cens_10',
    'std_chroma_cens_11',
    'mean_mfcc_0', 'mean_mfcc_1', 'mean_mfcc_2', 'mean_mfcc_3', 'mean_mfcc_4',
    'mean_mfcc_5', 'mean_mfcc_6', 'mean_mfcc_7', 'mean_mfcc_8', 'mean_mfcc_9',
    'mean_mfcc_10', 'mean_mfcc_11', 'mean_mfcc_12',
    'std_mfcc_0', 'std_mfcc_1', 'std_mfcc_2', 'std_mfcc_3', 'std_mfcc_4',
    'std_mfcc_5', 'std_mfcc_6', 'std_mfcc_7', 'std_mfcc_8', 'std_mfcc_9',
    'std_mfcc_10', 'std_mfcc_11', 'std_mfcc_12',
    'mean_tonnetz_0', 'mean_tonnetz_1', 'mean_tonnetz_2',
    'mean_tonnetz_3', 'mean_tonnetz_4', 'mean_tonnetz_5',
    'std_tonnetz_0', 'std_tonnetz_1', 'std_tonnetz_2',
    'std_tonnetz_3', 'std_tonnetz_4', 'std_tonnetz_5',
    'mean_spectral_flux', 'std_spectral_flux',
    'mean_pitch', 'std_pitch',
    'jitter', 'shimmer',
    'mean_hnr', 'std_hnr',
    'mean_lpc', 'std_lpc',
    'speech_rate', 'rhythm'
]

df = pd.DataFrame(X, columns=columns)
df['label'] = y

print(df.head())

# Preprocessing
imputer = KNNImputer(n_neighbors=5)
X_imputed = imputer.fit_transform(X)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_imputed)

# Apply PCA
pca = PCA(n_components=0.95)  # Preserve 95% of variance
X_pca = pca.fit_transform(X_scaled)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)

# Define the Keras model
model = Sequential([
    Dense(10, input_dim=X_train.shape[1], activation='relu'),
    Dense(20, activation='relu'),
    Dense(30, activation='relu'),
    Dense(15, activation='relu'),
    Dense(8, activation='relu'),
    Dense(len(emotion_labels), activation='sigmoid')
])

# Compile the model
optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer,
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train,
                    epochs=100,
                    batch_size=32,
                    validation_data=(X_test, y_test),
                    verbose=1)

# Evaluate the model
_, nn_accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f'Neural Network Accuracy: {nn_accuracy*100:.2f}%')

# Random Forest
rf_clf = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)
rf_clf.fit(X_train, y_train)
y_pred_rf = rf_clf.predict(X_test)
rf_accuracy = accuracy_score(y_test, y_pred_rf)
print(f'Random Forest Accuracy: {rf_accuracy*100:.2f}%')



   mean_rms   std_rms  mean_zcr   std_zcr  mean_spectral_centroid  \
0  0.021318  0.034507  0.342473  0.193381             2964.015876   
1  0.035290  0.041271  0.278797  0.202399             2532.520663   
2  0.031057  0.042614  0.289204  0.184970             2588.143235   
3  0.053509  0.051106  0.259393  0.208550             2307.247640   
4  0.025330  0.033304  0.323277  0.187290             2902.953816   

   std_spectral_centroid  mean_spectral_bandwidth  std_spectral_bandwidth  \
0            1399.518880              1875.236206              560.685464   
1            1472.584819              1784.454456              537.136479   
2            1388.300873              1750.071833              577.251201   
3            1548.336571              1593.412791              590.146747   
4            1274.078400              1891.857977              436.548851   

   mean_spectral_contrast_0  mean_spectral_contrast_1  ...    std_pitch  \
0                 30.355393                 14.