In [42]:
from transformers import pipeline
import soundfile as sf
from collections import Counter
import soundfile as sf
import torch
import torchaudio
from nltk.sentiment import SentimentIntensityAnalyzer
from nltk.tokenize import sent_tokenize
from deepmultilingualpunctuation import PunctuationModel
from collections import Counter


In [43]:
asr_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-small")
punctuator = PunctuationModel()
sentiment_model = pipeline("text-classification", model="bhadresh-savani/distilbert-base-uncased-emotion")
sia = SentimentIntensityAnalyzer()



In [44]:
def transcribe_audio(audio_file_path):
    speech, sample_rate = sf.read(audio_file_path, dtype="float32")

    if speech.ndim > 1:
        speech = speech.mean(axis=1)

    if sample_rate != 16000:
        resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
        speech = resampler(torch.tensor(speech)).numpy()

    transcription = asr_pipeline(speech)
    return transcription["text"]

def analyze_emotion(sentences):
    results = sentiment_model(sentences)
    return [{'label': normalize_emotion_label(result['label']), 'score': result['score']} for result in results]

def normalize_emotion_label(label):
    mapping = {
        'joy': 'happy',
        'anger': 'angry',
        'sadness': 'sad',
        'fear': 'others',
        'love': 'happy',
        'surprise': 'others',
    }
    return mapping.get(label, 'others')

def process(audio_file):  
    transcribed_text = transcribe_audio(audio_file)
    if transcribed_text:
        punctuated_text = punctuator.restore_punctuation(transcribed_text)
        sentences = sent_tokenize(punctuated_text)
        emotion_analysis = analyze_emotion(sentences)
        emotions = [result['label'] for result in emotion_analysis]
        emotion_counts = Counter(emotions)
        return transcribed_text, punctuated_text, sentences, emotion_analysis, emotion_counts
    else:
        return {}

In [45]:
data = process(r"../SER/output/TobyReturns.wav")
print(data[0])
print(data[1])
print(data[2])
print(data[3])
print(data[4])

 Are you swallowing them whole? You're eating them so fast, are they even touching your tongue? Yeah. Why aren't you having an angela? Oh, I ate so many already when no one was looking. Yeah, right. My baby's dieting. She wants to fit into a special child-sized wedding gown for her big day. Does one of those exist? It's from my pageant days. I was Miss Tiny Mid-Atlantic bride when I was 10. I was Miss Tiny Mid-Atlantic Bride when I was 10. Probably heard of it. Brownies, is that... pastry cubes made of sugar and fat? No, thank you. I'll stick with my jerky. So why'd you come in here? To socialize and inform. Ooh, brownies! I'm taking two so I can parcel them up and eat them at my leisure later on. Much healthier. You're taking two? Yeah, um, but one of them is for Tilly. Yeah! Why don't you send that to him in Costa Rica? Recon. Um, I'm just gonna hand it to him right now. Okay, weirdo. Why is that weird? She said she's gonna give it to him right now. She's probably going to, because t