In [51]:
import os
import pandas as pd
import numpy as np
import csv
import torchaudio
from pydub import AudioSegment
from collections import Counter
from speechbrain.pretrained import EncoderClassifier

In [None]:
!pip install speechbrain
!pip install git+https://github.com/speechbrain/speechbrain.git@develop

In [52]:
os.environ["PATH"] += os.pathsep + "/opt/homebrew/bin"

In [None]:
# Convert .mp4 files to .wav
videos_folder = "Videos"

for file_name in os.listdir(videos_folder):
    if file_name.lower().endswith(".mp4")
        video_path = os.path.join(videos_folder, file_name)
        wav_path = os.path.splitext(video_path)[0] + ".wav"
        
        try:
            audio = AudioSegment.from_file(video_path, format="mp4")
            audio.export(wav_path, format="wav")
            print(f"Converted {file_name} to {wav_path}")
        except Exception as e:
            print(f"Error converting {file_name}: {e}")

print("All videos have been processed.")

In [None]:
from speechbrain.inference.interfaces import foreign_class
classifier = foreign_class(source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP", pymodule_file="custom_interface.py", classname="CustomEncoderWav2vec2Classifier")
out_prob, score, index, text_lab = classifier.classify_file("speechbrain/emotion-recognition-wav2vec2-IEMOCAP/anger.wav")
print(text_lab)

In [29]:
from huggingface_hub import login
login("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")

In [None]:
# Load Wav2Vec
classifier = EncoderClassifier.from_hparams(
    source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP",
    savedir="pretrained_models/emotion_recognition"
)

In [None]:
audio_folder = "Videos"
output_csv_file = "chunk_emotions.csv"

all_emotions = {}

for file_name in os.listdir(audio_folder):
    if file_name.lower().endswith(".wav"):
        file_path = os.path.join(audio_folder, file_name)
        signal, fs = torchaudio.load(file_path)

        if signal.shape[0] > 1:
            signal = signal.mean(dim=0, keepdim=True)

        if fs != 16000:
            resampler = torchaudio.transforms.Resample(orig_freq=fs, new_freq=16000)
            signal = resampler(signal)
            fs = 16000

        num_samples = signal.shape[1]
        num_chunks = 25
        chunk_length = (num_samples + num_chunks - 1) // num_chunks
        chunk_emotions = []

        for i in range(num_chunks):
            start = i * chunk_length
            end = min((i + 1) * chunk_length, num_samples)
            chunk = signal[:, start:end]

            features = classifier.mods.wav2vec2(chunk)
            pooled_features = classifier.mods.avg_pool(features)
            logits = classifier.mods.output_mlp(pooled_features)
            score, index = logits.max(dim=-1)
            predicted_emotion = classifier.hparams.label_encoder.decode_torch(index)

            chunk_emotions.append(predicted_emotion[0][0])

        video_name = os.path.splitext(file_name)[0]
        all_emotions[video_name] = chunk_emotions

with open(output_csv_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    
    header = ["Chunk"] + list(all_emotions.keys())
    writer.writerow(header)
    
    for i in range(num_chunks):
        row = [i + 1] 
        for video_name in all_emotions.keys():
            row.append(all_emotions[video_name][i])
        writer.writerow(row)

print(f"Predicted emotions for all chunks and all files saved to: {output_csv_file}")

In [None]:
results = []

for video_name, emotions in all_emotions.items():
    total_chunks = len(emotions) 
    emotion_counts = {
        "neutral": emotions.count("neu"),
        "happy": emotions.count("hap"),
        "sad": emotions.count("sad"),
        "angry": emotions.count("ang")
    }

    emotion_percentages = {
        "neutral_percent": (emotion_counts["neutral"] / total_chunks),
        "happy_percent": (emotion_counts["happy"] / total_chunks),
        "sad_percent": (emotion_counts["sad"] / total_chunks),
        "angry_percent": (emotion_counts["angry"] / total_chunks)
    }

    results.append({"video_name": video_name, **emotion_percentages})

df = pd.DataFrame(results)

output_csv_file = "video_emotion_percentages.csv"
df.to_csv(output_csv_file, index=False)

print(f"Emotion percentages have been saved to {output_csv_file}")