In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import wave
import librosa
import librosa.display
import matplotlib.pyplot as plt

In [None]:
#Source 
# https://learnpython.com/blog/plot-waveform-in-python/

In [None]:
# Specify the path to the audio file
audio_file_path = "/fitzingout/proxmoxve/datasets/UrbanSodcdc/audio/fold1/101415-3-0-2.wav"

# Open the audio file
wav_obj = wave.open(audio_file_path, 'rb')

In [None]:
sample_freq = wav_obj.getframerate()
n_samples = wav_obj.getnframes()
duration = n_samples / sample_freq
num_channels = wav_obj.getnchannels()
print("Sample Frequency: {} \nNumber of Samples: {} \nDuration: {} \nNumber of Channels: {}".format(sample_freq, n_samples, duration, num_channels))

In [None]:
# Read the audio signals
wave_signal = wav_obj.readframes(n_samples)
signals = np.frombuffer(wave_signal, dtype=np.int16)

In [None]:
# Visualize the waveform
times = np.linspace(0, duration, num=n_samples)
plt.figure(figsize=(15, 5))
plt.plot(times, signals)
plt.title('Visualization of audio file waveform')
plt.xlabel('Time (s)')
plt.ylabel('Signal value')
plt.xlim(0, duration)
plt.show()

In [None]:
# Load the audio file using librosa
y, sr = librosa.load(audio_file_path)

# Calculate MFCCs
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)

In [None]:
# Plot MFCCs
plt.figure(figsize=(10, 6))
librosa.display.specshow(mfccs, x_axis='time')
plt.colorbar()
plt.title('MFCCs')
plt.tight_layout()
plt.show()

In [None]:
# Select a specific frame (e.g., frame 10)
frame_index = 10
frame_mfccs = mfccs[:, frame_index]


In [None]:
# Create a bar graph for the selected frame
plt.figure(figsize=(8, 4))
plt.bar(range(len(frame_mfccs)), frame_mfccs, color='b', alpha=0.7)
plt.xlabel('MFCC Coefficient Index')
plt.ylabel('MFCC Value')
plt.title(f'MFCCs for Frame {frame_index}')
plt.xticks(range(len(frame_mfccs)))  # Label the x-axis with coefficient indices
plt.tight_layout()
plt.show()

In [None]:
#generate metadata for Dataset so we can organize it
import os
import pandas as pd
import librosa
import numpy as np

# Set the path to your dataset
dataset_path = "/path/to/your/dataset"

# Set the path for the generated metadata file
metadata_path = "/path/to/your/metadata.csv"

# Function to extract emotion class and level from file name
def extract_info_from_filename(filename):
    parts = filename.split('_')
    emotion_class = parts[2]
    emotion_level = parts[3].split('.')[0] if len(parts) > 3 else "Unspecified"
    return emotion_class, emotion_level

# Create an empty DataFrame to store metadata
metadata_columns = ['filename', 'emotion_class', 'emotion_level']
metadata_df = pd.DataFrame(columns=metadata_columns)

# Process each audio file
for root, dirs, files in os.walk(dataset_path):
    for filename in files:
        if filename.endswith(".wav"):
            file_path = os.path.join(root, filename)

            # Extract emotion class and level from the filename
            emotion_class, emotion_level = extract_info_from_filename(filename)

            # Load the audio file and extract MFCCs
            try:
                y, sr = librosa.load(file_path)
                mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
            except Exception as e:
                print(f"Error processing file {file_path}: {e}")
                continue

            # Choose an appropriate representation or summary of the MFCCs for further processing
            mfcc_summary = mfccs.mean(axis=1)  # For example, using the mean of MFCCs

            # Add information to the metadata DataFrame
            metadata_df = metadata_df.append({
                'filename': filename,
                'emotion_class': emotion_class,
                'emotion_level': emotion_level,
            }, ignore_index=True)

            # Optionally, you can save the MFCC summary to a file for later use
            mfcc_summary_path = os.path.join(root, f"{os.path.splitext(filename)[0]}_mfcc_summary.npy")
            np.save(mfcc_summary_path, mfcc_summary)

# Save the metadata to a CSV file
metadata_df.to_csv(metadata_path, index=False)

print("Metadata generation completed.")


In [None]:
import os
import numpy as np
import pandas as pd
import librosa
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Set the path to your dataset
dataset_path = "/path/to/your/dataset"

# Set the path to your metadata file
metadata_path = "/path/to/your/metadata.csv"

# Load the metadata
metadata_df = pd.read_csv(metadata_path)

# Function to extract MFCC features from an audio file
def extract_mfcc(file_path, n_mfcc=13):
    try:
        y, sr = librosa.load(file_path)
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
        mfccs_mean = np.mean(mfccs, axis=1)
        return mfccs_mean
    except Exception as e:
        print(f"Error processing file {file_path}: {e}")
        return None

# Extract MFCC features for each audio file
mfcc_features = []
labels = []

for index, row in metadata_df.iterrows():
    filename = row['filename']
    emotion_class = row['emotion_class']

    file_path = os.path.join(dataset_path, emotion_class, filename)
    mfcc = extract_mfcc(file_path)

    if mfcc is not None:
        mfcc_features.append(mfcc)
        labels.append(emotion_class)

# Convert the lists to NumPy arrays
X = np.array(mfcc_features)
y = np.array(labels)

# Encode the emotion labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train a Random Forest classifier
classifier = RandomForestClassifier(n_estimators=100, random_state=42)
classifier.fit(X_train, y_train)

# Make predictions on the test set
y_pred = classifier.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print("Confusion Matrix:")
print(conf_matrix)

# Plot the confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", xticklabels=label_encoder.classes_, yticklabels=label_encoder.classes_)
plt.title("Confusion Matrix")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.show()
