In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
import numpy as np
import os
import seaborn as sns
import matplotlib.pyplot as plt
#package for music and audio analysis
import librosa
import librosa.display
from IPython.display import Audio
import warnings
warnings.filterwarnings('ignore')

# Load data from the TESS dataset
data = []
labels = []
for dirname, _, filenames in os.walk('/content/drive/MyDrive/TESS Toronto emotional speech set data'):
    for filename in filenames:
        data.append(os.path.join(dirname, filename))
        label = filename.split('_')[-1]
        label = label.split('.')[0]
        labels.append(label.lower())
print('Dataset is Loaded')

Dataset is Loaded


In [3]:
import os
import librosa
import numpy as np
import librosa.display
import matplotlib.pyplot as plt

# Function to generate and save spectrogram
def generate_and_save_spectrogram(file_path, output_dir):
    # Load audio file
    audio_data, sr = librosa.load(file_path)

    # Generate spectrogram
    spectrogram = librosa.feature.melspectrogram(y=audio_data, sr=sr)
    spectrogram_db = librosa.power_to_db(spectrogram, ref=np.max)

    # Plot and save the spectrogram
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(spectrogram_db, y_axis='mel', x_axis='time')
    plt.colorbar(format='%+2.0f dB')
    plt.title('Spectrogram')

    # Create the output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)

    # Save the spectrogram as an image file if it doesn't exist
    output_file = os.path.join(output_dir, os.path.splitext(os.path.basename(file_path))[0] + '_spectrogram.png')
    if not os.path.exists(output_file):
        plt.savefig(output_file)

    plt.close()

# Path to the TESS dataset
data_path = '/content/drive/MyDrive/TESS Toronto emotional speech set data'  # Update this path to the location of your TESS dataset
output_directory = '/content/drive/MyDrive/Final Year/Spectogram'  # Update this path to the desired output directory

# Iterate through audio files in the dataset
for root, dirs, files in os.walk(data_path):
    for file in files:
        if file.endswith('.wav'):
            file_path = os.path.join(root, file)
            generate_and_save_spectrogram(file_path, output_directory)


In [4]:
import os
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler

# Function to extract features from spectrogram
def extract_features(file_path, target_shape):
    audio_data, _ = librosa.load(file_path)
    spectrogram = librosa.feature.melspectrogram(y=audio_data)
    spectrogram_flat = np.ravel(spectrogram)

    # Pad or truncate to the target shape
    if len(spectrogram_flat) < target_shape:
        spectrogram_flat = np.pad(spectrogram_flat, (0, target_shape - len(spectrogram_flat)))
    else:
        spectrogram_flat = spectrogram_flat[:target_shape]

    return spectrogram_flat

# Path to the TESS dataset
data_path = '/content/drive/MyDrive/TESS Toronto emotional speech set data'
output_directory = '/content/drive/MyDrive/Final Year/Spectrogram'

# Define a target shape for the features
target_shape = 10000  # You can adjust this value based on your needs

# Create a list to store features and labels
features = []
labels = []

# Iterate through audio files in the dataset
for root, dirs, files in os.walk(data_path):
    for file in files:
        if file.endswith('.wav'):
            file_path = os.path.join(root, file)
            spectrogram_features = extract_features(file_path, target_shape)
            features.append(spectrogram_features)
            label = file.split('_')[-1].split('.')[0].lower()
            labels.append(label)

# Convert labels to numerical format
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, encoded_labels, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train a Random Forest classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")


Accuracy: 87.32%


In [5]:
import os
import librosa
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
import joblib
from IPython.display import Audio

# Function to extract features from spectrogram
def extract_features(file_path, target_shape):
    audio_data, _ = librosa.load(file_path)
    spectrogram = librosa.feature.melspectrogram(y=audio_data)
    spectrogram_flat = np.ravel(spectrogram)

    # Pad or truncate to the target shape
    if len(spectrogram_flat) < target_shape:
        spectrogram_flat = np.pad(spectrogram_flat, (0, target_shape - len(spectrogram_flat)))
    else:
        spectrogram_flat = spectrogram_flat[:target_shape]

    return spectrogram_flat

# Path to the TESS dataset
data_path = '/content/drive/MyDrive/TESS Toronto emotional speech set data'
output_directory = '/content/drive/MyDrive/Final Year/Spectrogram'

# Define a target shape for the features
target_shape = 10000  # You can adjust this value based on your needs

# Create a list to store features and labels
features = []
labels = []

# Iterate through audio files in the dataset
for root, dirs, files in os.walk(data_path):
    for file in files:
        if file.endswith('.wav'):
            file_path = os.path.join(root, file)
            spectrogram_features = extract_features(file_path, target_shape)
            features.append(spectrogram_features)
            label = file.split('_')[-1].split('.')[0].lower()
            labels.append(label)

# Convert labels to numerical format
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, encoded_labels, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train a Random Forest classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Save the trained model to a pickle file
model_save_path = '/content/drive/MyDrive/Final Year/RandomForestModel.h5'
joblib.dump(model, model_save_path)

# Function to predict emotion from audio path
def predict_emotion(audio_path):
    # Load the trained Random Forest model
    model = joblib.load(model_save_path)

    # Extract features from the audio file
    features = extract_features(audio_path, target_shape)

    # Standardize features
    features = scaler.transform([features])

    # Make prediction
    prediction = model.predict(features)

    # Decode the numerical prediction to emotion label
    predicted_emotion = label_encoder.inverse_transform(prediction)[0]

    return predicted_emotion

# Example usage
audio_path_to_predict = '/content/drive/MyDrive/TESS Toronto emotional speech set data/YAF_sad/YAF_back_sad.wav'  # Replace with the actual path to your audio file
predicted_emotion = predict_emotion(audio_path_to_predict)
print(f"Predicted Emotion: {predicted_emotion}")

# Optionally, play the audio for verification
audio, _ = librosa.load(audio_path_to_predict)
Audio(audio, rate=_)

Predicted Emotion: sad


In [6]:
audio_files_to_predict = [

    '/content/drive/MyDrive/Final Year/angry6.wav',
    '/content/drive/MyDrive/Final Year/angry5.wav',
    '/content/drive/MyDrive/Final Year/happy5(disgust).wav',
    '/content/drive/MyDrive/Final Year/neutral5(sad).wav',
    '/content/drive/MyDrive/Final Year/neutral4.wav',
    '/content/drive/MyDrive/Final Year/happy4(neutraldisgustsad).wav',
    '/content/drive/MyDrive/Final Year/negative(neutral).wav',
    '/content/drive/MyDrive/Final Year/angry3.wav',
    '/content/drive/MyDrive/Final Year/silences(happy).wav',
    '/content/drive/MyDrive/Final Year/happy3.wav',
    '/content/drive/MyDrive/Final Year/angry4.wav',
    '/content/drive/MyDrive/Final Year/neutral3(sad).wav',
    '/content/drive/MyDrive/Final Year/happy2.wav',
    '/content/drive/MyDrive/Final Year/neutral2(sad).wav',
    '/content/drive/MyDrive/Final Year/happy.wav'
]

# Predict emotions for each audio file
for audio_path_to_predict in audio_files_to_predict:
    predicted_emotion = predict_emotion(audio_path_to_predict)
    print(f"Audio: {audio_path_to_predict}, Predicted Emotion: {predicted_emotion}")

Audio: /content/drive/MyDrive/Final Year/angry6.wav, Predicted Emotion: disgust
Audio: /content/drive/MyDrive/Final Year/angry5.wav, Predicted Emotion: angry
Audio: /content/drive/MyDrive/Final Year/happy5(disgust).wav, Predicted Emotion: angry
Audio: /content/drive/MyDrive/Final Year/neutral5(sad).wav, Predicted Emotion: angry
Audio: /content/drive/MyDrive/Final Year/neutral4.wav, Predicted Emotion: angry
Audio: /content/drive/MyDrive/Final Year/happy4(neutraldisgustsad).wav, Predicted Emotion: angry
Audio: /content/drive/MyDrive/Final Year/negative(neutral).wav, Predicted Emotion: angry
Audio: /content/drive/MyDrive/Final Year/angry3.wav, Predicted Emotion: happy
Audio: /content/drive/MyDrive/Final Year/silences(happy).wav, Predicted Emotion: angry
Audio: /content/drive/MyDrive/Final Year/happy3.wav, Predicted Emotion: angry
Audio: /content/drive/MyDrive/Final Year/angry4.wav, Predicted Emotion: angry
Audio: /content/drive/MyDrive/Final Year/neutral3(sad).wav, Predicted Emotion: disg

In [7]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Path to the directory containing spectrogram images
spectrogram_dir = '/content/drive/MyDrive/Final Year/Spectogram'

# Function to load spectrogram images and corresponding labels
def load_data(directory):
    spectrograms = []
    labels = []

    for filename in os.listdir(directory):
        if filename.endswith('.png'):
            filepath = os.path.join(directory, filename)
            label = filename.split('_')[0]  # Assuming the label is the first part of the filename
            img = tf.keras.preprocessing.image.load_img(filepath, target_size=(224, 224))
            img_array = tf.keras.preprocessing.image.img_to_array(img)
            spectrograms.append(img_array)
            labels.append(label)

    return np.array(spectrograms), np.array(labels)

# Load spectrograms and labels
spectrograms, labels = load_data(spectrogram_dir)

# Encode labels
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(spectrograms, encoded_labels, test_size=0.2, random_state=42)

# Define the CNN model
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(len(label_encoder.classes_), activation='softmax'))

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

# Evaluate the model
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f'Test accuracy: {test_acc}')

# Save the model
model.save('/content/drive/MyDrive/Final Year/spectrogram_cnn_model.h5')


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test accuracy: 0.9982143044471741


In [8]:
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

# Load the trained CNN model
model = tf.keras.models.load_model('/content/drive/MyDrive/Final Year/spectrogram_cnn_model.h5')

# Function to preprocess audio and make predictions
def predict_emotion(audio_path):
    # Load audio file
    audio_data, sr = librosa.load(audio_path)

    # Generate spectrogram
    spectrogram = librosa.feature.melspectrogram(y=audio_data, sr=sr)
    spectrogram_db = librosa.power_to_db(spectrogram, ref=np.max)

    # Resize the spectrogram image to match the model input shape
    spectrogram_image = tf.image.resize(np.expand_dims(spectrogram_db, axis=-1), (224, 224))

    # Replicate the single channel to create three identical channels
    spectrogram_image = tf.image.grayscale_to_rgb(spectrogram_image)

    # Reshape and normalize the spectrogram
    spectrogram_image = np.expand_dims(spectrogram_image, axis=0)
    spectrogram_image = tf.keras.utils.normalize(spectrogram_image, axis=1)

    # Make prediction
    prediction = model.predict(spectrogram_image)

    # Decode the predicted label
    predicted_label = label_encoder.classes_[np.argmax(prediction)]

    return predicted_label

# Example usage
audio_path = '/content/drive/MyDrive/Final Year/angry3.wav'
predicted_emotion = predict_emotion(audio_path)
print(f'Predicted Emotion: {predicted_emotion}')


Predicted Emotion: OAF


In [9]:
audio_files_to_predict = [

    '/content/drive/MyDrive/Final Year/angry6.wav',
    '/content/drive/MyDrive/Final Year/angry5.wav',
    '/content/drive/MyDrive/Final Year/happy5(disgust).wav',
    '/content/drive/MyDrive/Final Year/neutral5(sad).wav',
    '/content/drive/MyDrive/Final Year/neutral4.wav',
    '/content/drive/MyDrive/Final Year/happy4(neutraldisgustsad).wav',
    '/content/drive/MyDrive/Final Year/negative(neutral).wav',
    '/content/drive/MyDrive/Final Year/angry3.wav',
    '/content/drive/MyDrive/Final Year/silences(happy).wav',
    '/content/drive/MyDrive/Final Year/happy3.wav',
    '/content/drive/MyDrive/Final Year/angry4.wav',
    '/content/drive/MyDrive/Final Year/neutral3(sad).wav',
    '/content/drive/MyDrive/Final Year/happy2.wav',
    '/content/drive/MyDrive/Final Year/neutral2(sad).wav',
    '/content/drive/MyDrive/Final Year/happy.wav'
]

# Predict emotions for each audio file
for audio_path_to_predict in audio_files_to_predict:
    predicted_emotion = predict_emotion(audio_path_to_predict)
    print(f"Audio: {audio_path_to_predict}, Predicted Emotion: {predicted_emotion}")

Audio: /content/drive/MyDrive/Final Year/angry6.wav, Predicted Emotion: OAF
Audio: /content/drive/MyDrive/Final Year/angry5.wav, Predicted Emotion: OAF
Audio: /content/drive/MyDrive/Final Year/happy5(disgust).wav, Predicted Emotion: OAF
Audio: /content/drive/MyDrive/Final Year/neutral5(sad).wav, Predicted Emotion: OAF
Audio: /content/drive/MyDrive/Final Year/neutral4.wav, Predicted Emotion: OAF
Audio: /content/drive/MyDrive/Final Year/happy4(neutraldisgustsad).wav, Predicted Emotion: OAF
Audio: /content/drive/MyDrive/Final Year/negative(neutral).wav, Predicted Emotion: OAF
Audio: /content/drive/MyDrive/Final Year/angry3.wav, Predicted Emotion: OAF
Audio: /content/drive/MyDrive/Final Year/silences(happy).wav, Predicted Emotion: OAF
Audio: /content/drive/MyDrive/Final Year/happy3.wav, Predicted Emotion: OAF
Audio: /content/drive/MyDrive/Final Year/angry4.wav, Predicted Emotion: OAF
Audio: /content/drive/MyDrive/Final Year/neutral3(sad).wav, Predicted Emotion: OAF
Audio: /content/drive/My

In [None]:
import os
import librosa
import numpy as np
from keras.models import load_model

# Function to extract features from spectrogram
def extract_features(file_path, pad_length=128):
    audio_data, _ = librosa.load(file_path)
    spectrogram = librosa.feature.melspectrogram(y=audio_data)
    spectrogram = librosa.power_to_db(spectrogram, ref=np.max)  # Convert to dB scale

    # Pad or truncate to a fixed length
    if spectrogram.shape[1] < pad_length:
        spectrogram = np.pad(spectrogram, ((0, 0), (0, pad_length - spectrogram.shape[1])))
    else:
        spectrogram = spectrogram[:, :pad_length]

    return spectrogram

# Load the trained CNN model
model_path = '/content/drive/MyDrive/emotionRecognitionModelcnn.h5'  # Update with the path to your trained model file
model = load_model(model_path)

# Function to predict emotion from an audio file
def predict_emotion(audio_path):
    # Extract features from the audio file
    spectrogram = extract_features(audio_path)
    spectrogram = spectrogram[np.newaxis, :, :, np.newaxis]  # Add batch and channel dimensions

    # Make prediction
    predictions = model.predict(spectrogram)
    predicted_class = np.argmax(predictions)

    # Decode the predicted class to emotion label
    emotion_label = label_encoder.classes_[predicted_class]

    return emotion_label

# Example usage:
audio_path_to_predict = '//content/drive/MyDrive/Final Year/call.wav'  # Update with the path to the audio file you want to predict
predicted_emotion = predict_emotion(audio_path_to_predict)

print(f"Predicted Emotion: {predicted_emotion}")


In [None]:
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Function to extract emotion label from the file name
def extract_emotion_label(file):
    # Assuming the emotion label is encoded in the file name
    # You may need to adjust this based on your file naming convention
    emotion = file.split('_')[0]  # Extract emotion from the file name (e.g., 'happy_spectrogram.png')
    return emotion

# Path to the directory containing generated spectrograms
output_directory = '/content/drive/MyDrive/Final Year/Spectogram'  # Update this path to the directory containing your generated spectrograms

# Load spectrograms and labels
X, y = [], []

for root, dirs, files in os.walk(output_directory):
    for file in files:
        if file.endswith('_spectrogram.png'):
            file_path = os.path.join(root, file)

            # Extract emotion label from the file name
            emotion_label = extract_emotion_label(file)

            # Append spectrogram data and emotion label to lists
            X.append(file_path)
            y.append(emotion_label)

# Encode emotion labels to numerical values
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Print the sizes of the training and testing sets
print("Number of training samples:", len(X_train))
print("Number of testing samples:", len(X_test))


Extracted Emotion Labels: []
Encoded Labels: []


ValueError: zero-size array to reduction operation maximum which has no identity

In [None]:
import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# Function to load spectrograms and labels
def load_data(data_path):
    X, y = [], []

    for root, dirs, files in os.walk(data_path):
        for file in files:
            if file.endswith('_spectrogram.png'):  # Assuming your spectrogram files end with "_spectrogram.png"
                file_path = os.path.join(root, file)

                # Load your spectrogram image data (you may use a library like PIL or OpenCV for this)
                spectrogram_data = load_spectrogram(file_path)

                # Extract emotion label from the file name or metadata
                emotion = extract_emotion_label(file)

                X.append(spectrogram_data)
                y.append(emotion)

    return np.array(X), np.array(y)

# Placeholder functions for loading spectrogram data and extracting emotion labels
def load_spectrogram(file_path):
    # Implement code to load your spectrogram image data (e.g., using PIL or OpenCV)
    # Return the loaded data as a NumPy array
    return np.zeros((your_spectrogram_height, your_spectrogram_width, your_spectrogram_channels))

def extract_emotion_label(file):
    # Assuming the emotion label is encoded in the file name
    # You may need to adjust this based on your file naming convention
    emotion = file.split('_')[0]  # Extract emotion from the file name (e.g., 'happy_spectrogram.png')
    return emotion

# Path to the directory containing generated spectrograms
data_path = '/content/drive/MyDrive/TESS Toronto emotional speech set data'

# Load spectrograms and labels
X, y = load_data(data_path)

# Print extracted emotion labels
print("Extracted Emotion Labels:", y)

# Check if there are any extracted labels
if not y:
    print("No emotion labels found. Check your label extraction logic.")
else:
    # Encode emotion labels to numerical values
    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y)

    # Print encoded labels
    print("Encoded Labels:", y_encoded)

    # Check if there are any encoded labels
    if not np.any(y_encoded):
        print("No encoded labels found. Check your label encoding logic.")
    else:
        # Convert numerical labels to one-hot encoding
        y_one_hot = to_categorical(y_encoded)

        # Print one-hot encoded labels
        print("One-Hot Encoded Labels:", y_one_hot)

        # Split the data into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(X, y_one_hot, test_size=0.2, random_state=42)

        # Build a simple CNN model
        model = Sequential()
        model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(your_spectrogram_height, your_spectrogram_width, your_spectrogram_channels)))
        model.add(MaxPooling2D((2, 2)))
        model.add(Conv2D(64, (3, 3), activation='relu'))
        model.add(MaxPooling2D((2, 2)))
        model.add(Flatten())
        model.add(Dense(128, activation='relu'))
        model.add(Dropout(0.5))
        model.add(Dense(7, activation='softmax'))  # Adjust num_classes based on the number of emotion classes

        # Compile the model
        model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

        # Train the model
        model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

        # Evaluate the model on the test set
        loss, accuracy = model.evaluate(X_test, y_test)
        print(f'Loss: {loss}, Accuracy: {accuracy}')


Extracted Emotion Labels: []
No emotion labels found. Check your label extraction logic.


  if not y:


In [None]:
import os
import librosa
import numpy as np
import librosa.display
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Function to extract features from audio file
def extract_features(file_path):
    audio_data, sr = librosa.load(file_path)
    spectrogram = librosa.feature.melspectrogram(y=audio_data, sr=sr)
    spectrogram_db = librosa.power_to_db(spectrogram, ref=np.max)
    return spectrogram_db

# Load and preprocess the dataset
data_path = '/content/drive/MyDrive/TESS Toronto emotional speech set data'
labels = []  # Assuming you have labels for each audio file
data = []

for root, dirs, files in os.walk(data_path):
    for file in files:
        if file.endswith('.wav'):
            file_path = os.path.join(root, file)
            label = # Extract label from the file name or any other way
            labels.append(label)
            features = extract_features(file_path)
            data.append(features)

# Convert labels to numerical format
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)
encoded_labels_categorical = to_categorical(encoded_labels)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(np.array(data), encoded_labels_categorical, test_size=0.2, random_state=42)

# Define the model
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(X_train.shape[1], X_train.shape[2], 1)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))  # Adjust num_classes based on your task

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(np.expand_dims(X_train, axis=-1), y_train, epochs=10, batch_size=32, validation_split=0.2)

# Evaluate the model
accuracy = model.evaluate(np.expand_dims(X_test, axis=-1), y_test)[1]
print(f'Test Accuracy: {accuracy}')


SyntaxError: invalid syntax (<ipython-input-3-d11b36ed2990>, line 28)

In [None]:
import os
import numpy as np
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Path to the directory containing spectrogram images
spectrogram_dir = '/content/drive/MyDrive/Final Year/Spectogram'

# Specify input shape based on the size of your spectrogram images
input_shape = (128, 431, 3)  # Adjust based on your spectrogram dimensions

# Create a simple CNN model
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(7, activation='softmax'))  # num_classes is the number of emotion classes

# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Create an ImageDataGenerator for data augmentation and normalization
datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)

# Generate training and validation datasets
batch_size = 32
train_generator = datagen.flow_from_directory(
    spectrogram_dir,
    target_size=(input_shape[0], input_shape[1]),
    batch_size=batch_size,
    class_mode='categorical',
    subset='training'
)

validation_generator = datagen.flow_from_directory(
    spectrogram_dir,
    target_size=(input_shape[0], input_shape[1]),
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation'
)

# Train the model
epochs = 10  # Adjust as needed
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // batch_size,
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // batch_size
)

# Save the trained model
model.save('/content/drive/MyDrive/Final Year/emotion_model.h5')


Found 0 images belonging to 0 classes.
Found 0 images belonging to 0 classes.


ValueError: Unexpected value for `steps_per_epoch`. Received value is 0. Please check the docstring for `model.fit()` for supported values.

In [None]:
import os
import random
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

# Set random seed for reproducibility
seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)
random.seed(seed)

# Define paths
train_dir = '/content/drive/MyDrive/Final Year/Spectogram/train'
test_dir = '/content/drive/MyDrive/Final Year/Spectogram/test'

# Image data generators for training and testing
train_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)
test_datagen = ImageDataGenerator(rescale=1./255)

batch_size = 32
img_height, img_width = 224, 224

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary',  # Change to 'categorical' if you have more than two classes
    subset='training'
)

validation_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary',  # Change to 'categorical' if you have more than two classes
    subset='validation'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary',  # Change to 'categorical' if you have more than two classes
    shuffle=False
)

# Define the model
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(img_height, img_width, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(1, activation='sigmoid'))  # Change to the number of classes

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
epochs = 10
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // batch_size,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // batch_size,
    epochs=epochs
)

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(test_generator, steps=test_generator.samples // batch_size)
print(f'Test Accuracy: {test_accuracy * 100:.2f}%')

# Save the trained model
model.save('/content/drive/MyDrive/Final Year/spectrogram_model.h5')


FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/Final Year/Spectogram/train'

In [None]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Function to load spectrogram images and labels
def load_data(data_dir):
    spectrograms = []
    labels = []

    for label in os.listdir(data_dir):
        label_path = os.path.join(data_dir, label)
        if os.path.isdir(label_path):
            for file in os.listdir(label_path):
                if file.endswith('_spectrogram.png'):
                    img_path = os.path.join(label_path, file)
                    spectrograms.append(img_path)
                    labels.append(label)

    return spectrograms, labels

# Load spectrogram data and labels
spectrograms, labels = load_data('/content/drive/MyDrive/Final Year/Spectogram')

# Encode labels
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(spectrograms, encoded_labels, test_size=0.2, random_state=42)

# Define a simple CNN model
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 431, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(len(np.unique(encoded_labels)), activation='softmax')
])

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Function to load and preprocess images for training/testing
def preprocess_images(image_paths, labels):
    images = []
    for path in image_paths:
        img = tf.keras.preprocessing.image.load_img(path, target_size=(128, 431))
        img_array = tf.keras.preprocessing.image.img_to_array(img)
        images.append(img_array)

    images = np.array(images)
    labels = np.array(labels)

    return images, labels

# Preprocess training and testing images
X_train_processed, y_train_processed = preprocess_images(X_train, y_train)
X_test_processed, y_test_processed = preprocess_images(X_test, y_test)

# Train the model
model.fit(X_train_processed, y_train_processed, epochs=10, validation_data=(X_test_processed, y_test_processed))

# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(X_test_processed, y_test_processed, verbose=2)
print('\nTest accuracy:', test_acc)


ValueError: With n_samples=0, test_size=0.2 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.

In [None]:
# Load spectrogram data and labels
spectrograms, labels = load_data('/content/drive/MyDrive/Final Year/Spectogram')

# Check if the data is loaded correctly
print("Number of spectrograms:", len(spectrograms))
print("Number of labels:", len(labels))

# Encode labels
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)

# Check if labels are encoded correctly
print("Encoded labels:", encoded_labels)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(spectrograms, encoded_labels, test_size=0.2, random_state=42)

# Check the sizes of the training and testing sets
print("Training set size:", len(X_train))
print("Testing set size:", len(X_test))


Number of spectrograms: 0
Number of labels: 0
Encoded labels: []


ValueError: With n_samples=0, test_size=0.2 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.

In [None]:
import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow import keras
from tensorflow.keras import layers

# Path to the directory containing the spectrograms
spectrogram_dir = '/content/drive/MyDrive/Final Year/Spectogram'

# Function to load spectrograms and labels
from PIL import UnidentifiedImageError

def load_data():
    spectrograms = []
    labels = []

    for file in os.listdir(spectrogram_dir):
        file_path = os.path.join(spectrogram_dir, file)

        try:
            # Load spectrogram as an image
            spectrogram = keras.preprocessing.image.load_img(file_path, target_size=(128, 256))
            spectrogram_array = keras.preprocessing.image.img_to_array(spectrogram)
            spectrograms.append(spectrogram_array)

            # Extract label from the file name
            labels.append(file.split('_')[0])
        except UnidentifiedImageError:
            print(f"Skipping file: {file_path}. Not a valid image.")

    return np.array(spectrograms), np.array(labels)

# Load data
spectrograms, labels = load_data()

# Encode labels
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(spectrograms, encoded_labels, test_size=0.2, random_state=42)

# Define the neural network model
model = keras.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 256, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(len(label_encoder.classes_), activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f'Test accuracy: {test_acc * 100:.2f}%')


Skipping file: /content/drive/MyDrive/Final Year/Spectogram/OAF_OAF_rush_fear_spectrogram.png. Not a valid image.
Skipping file: /content/drive/MyDrive/Final Year/Spectogram/YAF_sour_disgust_spectrogram.png. Not a valid image.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test accuracy: 100.00%


In [None]:
model.save("/content/drive/MyDrive/RecognitionModelcnn.h5")

  saving_api.save_model(


In [None]:
import os
import librosa
import numpy as np
import librosa.display
import matplotlib.pyplot as plt
from tensorflow import keras

# Function to preprocess the spectrogram
def preprocess_spectrogram(file_path):
    # Load audio file
    audio_data, sr = librosa.load(file_path)

    # Generate spectrogram
    spectrogram = librosa.feature.melspectrogram(y=audio_data, sr=sr)
    spectrogram_db = librosa.power_to_db(spectrogram, ref=np.max)

    # Resize spectrogram to match the input shape of the model (128, 256)
    spectrogram_resized = np.resize(spectrogram_db, (128, 256))

    # Normalize the spectrogram
    spectrogram_normalized = (spectrogram_resized - np.min(spectrogram_resized)) / (
            np.max(spectrogram_resized) - np.min(spectrogram_resized))

    # Create an input with three channels
    spectrogram_input = np.stack([spectrogram_normalized] * 3, axis=-1)

    return spectrogram_input

# Function to predict emotion from audio file
def predict_emotion(audio_path, model, label_encoder):
    # Preprocess the spectrogram
    spectrogram_input = preprocess_spectrogram(audio_path)

    # Make prediction using the trained model
    prediction_probabilities = model.predict(np.expand_dims(spectrogram_input, axis=0))[0]

    # Get the predicted label
    predicted_label_index = np.argmax(prediction_probabilities)
    predicted_label = label_encoder.classes_[predicted_label_index]

    return predicted_label

# Path to the trained model
model_path = '/content/drive/MyDrive/RecognitionModelcnn.h5'  # Update this path to the location of your trained model

# Load the trained model
model = keras.models.load_model(model_path)

# Path to the audio file you want to predict
audio_path_to_predict = '/content/drive/MyDrive/Final Year/happy.wav'  # Update this path to the location of your new audio file

# Predict emotion
predicted_emotion = predict_emotion(audio_path_to_predict, model, label_encoder)

# Display the predicted emotion
print(f'Predicted Emotion: {predicted_emotion}')


Predicted Emotion: OAF


In [None]:
!pip install resampy


Collecting resampy
  Downloading resampy-0.4.2-py3-none-any.whl (3.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m13.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: resampy
Successfully installed resampy-0.4.2


In [None]:
!pip uninstall librosa resampy -y
!pip install librosa resampy

Found existing installation: librosa 0.10.1
Uninstalling librosa-0.10.1:
  Successfully uninstalled librosa-0.10.1
Found existing installation: resampy 0.4.2
Uninstalling resampy-0.4.2:
  Successfully uninstalled resampy-0.4.2
Collecting librosa
  Downloading librosa-0.10.1-py3-none-any.whl (253 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m253.7/253.7 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting resampy
  Using cached resampy-0.4.2-py3-none-any.whl (3.1 MB)
Installing collected packages: resampy, librosa
Successfully installed librosa-0.10.1 resampy-0.4.2


In [None]:
from pydub import AudioSegment
import numpy as np
from tensorflow import keras

# Function to preprocess an audio file for prediction
def preprocess_audio_for_prediction(file_path):
    # Load audio file using pydub
    audio = AudioSegment.from_file(file_path)

    # Set the target sample rate
    target_sr = 22050  # or any other sample rate you desire

    # Resample the audio
    audio = audio.set_frame_rate(target_sr)

    # Convert the audio to numpy array with float32 format
    audio_data = np.array(audio.get_array_of_samples(), dtype=np.float32)

    # Normalize the audio data to the range [-1, 1]
    audio_data /= np.max(np.abs(audio_data))

    # Generate spectrogram
    spectrogram = librosa.feature.melspectrogram(y=audio_data, sr=target_sr)
    spectrogram_db = librosa.power_to_db(spectrogram, ref=np.max)

    # Resize spectrogram to match the model input size
    spectrogram_resized = keras.preprocessing.image.smart_resize(
        spectrogram_db, (256, 256), interpolation='bilinear'
    )

    # Convert to 3D array (height, width, channels)
    spectrogram_input = np.expand_dims(spectrogram_resized, axis=-1)

    return spectrogram_input


# Function to predict emotion from an audio file
def predict_emotion(audio_file_path, model, label_encoder):
    # Preprocess audio for prediction
    preprocessed_spectrogram = preprocess_audio_for_prediction(audio_file_path)

    # Make the prediction
    predicted_probs = model.predict(np.array([preprocessed_spectrogram]))
    predicted_class = np.argmax(predicted_probs)

    # Decode the predicted class using label encoder
    predicted_emotion = label_encoder.classes_[predicted_class]

    return predicted_emotion

# Example usage for prediction
audio_file_path = '/content/drive/MyDrive/TESS Toronto emotional speech set data/OAF_Sad/OAF_back_sad.wav'
predicted_emotion = predict_emotion(audio_file_path, model, label_encoder)

print(f"Predicted Emotion: {predicted_emotion}")


ValueError: Expected an image array with shape `(height, width, channels)`, or `(batch_size, height, width, channels)`, but got input with incorrect rank, of shape (128, 111).