In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import cv2
from deepface import DeepFace
from transformers import pipeline
import librosa
import librosa.display
import soundfile as sf

# Ensure results directory exists
os.makedirs("results/plots", exist_ok=True)

2024-12-18 16:04:54.541160: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-12-18 16:04:54.586820: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-12-18 16:04:54.586861: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-12-18 16:04:54.586918: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-12-18 16:04:54.596185: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-12-18 16:04:54.597239: I tensorflow/core/platform/cpu_feature_guard.cc:182] This Tens

In [3]:
ravdess_test_data = "RAVDESS"

In [7]:
def load_ravdess_audio(data_dir):
    """
    Load RAVDESS audio files recursively and extract emotion labels from filenames.
    :param data_dir: Path to directory containing RAVDESS actor folders
    :return: List of tuples (audio path, label)
    """
    emotions_map = {
        '01': 'neutral', '02': 'calm', '03': 'happy', '04': 'sad',
        '05': 'angry', '06': 'fear', '07': 'disgust', '08': 'surprise'
    }

    data = []
    for root, _, files in os.walk(data_dir):
        for file in files:
            if file.endswith('.wav'):
                parts = file.split("-")
                if len(parts) > 2:  # Ensure filename format is correct
                    emotion_label = emotions_map.get(parts[2], "unknown")
                    data.append((os.path.join(root, file), emotion_label))
    print(f"Loaded {len(data)} audio files from {data_dir}.")
    return data


def validate_wav2vec2_ravdess(audio_data, sample_size=100):
    """
    Validate Wav2Vec2 model on RAVDESS audio data.
    :param audio_data: List of tuples (audio path, label)
    :param sample_size: Number of samples to process
    :return: Ground truth and predicted labels
    """
    ground_truth = []
    predictions = []
    
    classifier = pipeline("audio-classification", model="ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition")
    sampled_data = audio_data[:sample_size]
    print(f"Validating Wav2Vec2 on {len(sampled_data)} samples...")
    
    for audio_path, label in tqdm(sampled_data):
        ground_truth.append(label)
        
        try:
            result = classifier(audio_path)
            predicted_label = result[0]['label'].lower()
        except Exception as e:
            print(f"Error processing {audio_path}: {e}")
            predicted_label = "error"
        predictions.append(predicted_label)
    
    return ground_truth, predictions

In [11]:
def plot_confusion_matrix(cm, labels, title, save_path):
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=labels, yticklabels=labels)
    plt.title(title)
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    
    # Удаляем старый файл, если он существует
    if os.path.exists(save_path):
        os.remove(save_path)

    plt.savefig(save_path)
    plt.close()

def visualize_results(ground_truth, predictions, labels, save_dir, filename):
    """
    Generate confusion matrix and classification report
    :param ground_truth: List of ground truth labels
    :param predictions: List of predicted labels
    :param labels: List of class labels
    :param save_dir: Directory to save plots
    :param filename: Name of the saved plot
    """
    cm = confusion_matrix(ground_truth, predictions, labels=labels)
    print("\nClassification Report:")
    print(classification_report(ground_truth, predictions, labels=labels))
    
    # Save confusion matrix plot
    cm_path = os.path.join(save_dir, filename)
    plot_confusion_matrix(cm, labels, filename.split(".")[0], cm_path)
    print(f"Confusion matrix saved at: {cm_path}")

In [12]:
# Run RAVDESS Validation
print("\nStarting RAVDESS validation with Wav2Vec2...")
ravdess_data = load_ravdess_audio(ravdess_test_data)
audio_ground_truth, audio_predictions = validate_wav2vec2_ravdess(ravdess_data, sample_size=2800)
visualize_results(audio_ground_truth, audio_predictions, labels=['neutral', 'calm', 'happy', 'sad', 'angry', 'fear', 'disgust', 'surprise'], save_dir="results/plots", filename="ravdess_confusion_matrix.png")


Starting RAVDESS validation with Wav2Vec2...
Loaded 2880 audio files from RAVDESS.


Some weights of the model checkpoint at ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition were not used when initializing Wav2Vec2ForSequenceClassification: ['classifier.output.weight', 'classifier.dense.weight', 'classifier.output.bias', 'wav2vec2.encoder.pos_conv_embed.conv.weight_v', 'wav2vec2.encoder.pos_conv_embed.conv.weight_g', 'classifier.dense.bias']
- This IS expected if you are initializing Wav2Vec2ForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing Wav2Vec2ForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of Wav2Vec2ForSequenceClassification were not initialized from the model checkpoint at ehcalabres/wav2vec2-lg-xlsr-e

Validating Wav2Vec2 on 2800 samples...


100%|██████████| 2800/2800 [21:48<00:00,  2.14it/s]


Classification Report:
              precision    recall  f1-score   support

     neutral       0.00      0.00      0.00       188
        calm       0.07      0.11      0.09       374
       happy       0.19      0.17      0.18       375
         sad       0.10      0.12      0.11       371
       angry       0.01      0.01      0.01       374
        fear       0.00      0.00      0.00       373
     disgust       0.00      0.00      0.00       372
    surprise       0.00      0.00      0.00       373

   micro avg       0.09      0.06      0.07      2800
   macro avg       0.05      0.05      0.05      2800
weighted avg       0.05      0.06      0.05      2800

Confusion matrix saved at: results/plots/ravdess_confusion_matrix.png



