<a href="https://colab.research.google.com/github/SankarSubbu/Machine-Learning/blob/Project/Real_time_audio_sentiment_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install gradio
!pip install transformers
!pip install git+https://github.com/openai/whisper.git

Collecting gradio
  Downloading gradio-5.23.1-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 

In [2]:
!git clone https://huggingface.co/spaces/SankarSubbu/Real-Time-Audio-sentiment-analysis

Cloning into 'Real-Time-Audio-sentiment-analysis'...
remote: Enumerating objects: 4, done.[K
remote: Total 4 (delta 0), reused 0 (delta 0), pack-reused 4 (from 1)[K
Unpacking objects: 100% (4/4), 1.34 KiB | 1.34 MiB/s, done.


In [None]:
!pip install gradio whisper transformers scipy numpy torch

Collecting whisper
  Downloading whisper-1.1.10.tar.gz (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.8/42.8 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: whisper
  Building wheel for whisper (setup.py) ... [?25l[?25hdone
  Created wheel for whisper: filename=whisper-1.1.10-py3-none-any.whl size=41120 sha256=50ff83fc38bd1d728cd05b29bcc33ff9a5a6ea09ff93afc1419bff0f4790c8bc
  Stored in directory: /root/.cache/pip/wheels/21/65/ee/4e6672aabfa486d3341a39a04f8f87c77e5156149299b5a7d0
Successfully built whisper
Installing collected packages: whisper
Successfully installed whisper-1.1.10


In [None]:
import gradio as gr
import whisper
import numpy as np
import tempfile
from transformers import pipeline
from scipy.io.wavfile import write

# Load models
model = whisper.load_model("base")
sentiment_analysis = pipeline("sentiment-analysis", framework="pt", model="SamLowe/roberta-base-go_emotions")

# Sentiment Emoji Mapping
emoji_mapping = {
    "disappointment": "😞", "sadness": "😢", "annoyance": "😠", "neutral": "😐",
    "disapproval": "👎", "realization": "😮", "nervousness": "😬", "approval": "👍",
    "joy": "😄", "anger": "😡", "embarrassment": "😳", "caring": "🤗", "remorse": "😔",
    "disgust": "🤢", "grief": "😥", "confusion": "😕", "relief": "😌", "desire": "😍",
    "admiration": "😌", "optimism": "😊", "fear": "😨", "love": "❤️", "excitement": "🎉",
    "curiosity": "🤔", "amusement": "😄", "surprise": "😲", "gratitude": "🙏", "pride": "🦁"
}

def analyze_sentiment(text):
    results = sentiment_analysis(text)
    return {result['label']: result['score'] for result in results}

def get_sentiment_emoji(sentiment):
    return emoji_mapping.get(sentiment, "")

def display_sentiment_results(sentiment_results, option):
    sentiment_text = ""
    for sentiment, score in sentiment_results.items():
        emoji = get_sentiment_emoji(sentiment)
        if option == "Sentiment Only":
            sentiment_text += f"{sentiment} {emoji}\n"
        elif option == "Sentiment + Score":
            sentiment_text += f"{sentiment} {emoji}: {score:.2f}\n"
    return sentiment_text

def inference(audio, sentiment_option):
    # Handle microphone input (NumPy array) and convert to a WAV file
    if isinstance(audio, tuple):
        sample_rate, audio_data = audio
        temp_audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
        write(temp_audio_file.name, sample_rate, audio_data.astype(np.int16))
        audio = temp_audio_file.name  # Use the saved file

    # Process the audio with Whisper
    audio = whisper.load_audio(audio)
    audio = whisper.pad_or_trim(audio)
    mel = whisper.log_mel_spectrogram(audio).to(model.device)

    # Language Detection
    _, probs = model.detect_language(mel)
    lang = max(probs, key=probs.get)

    # Transcription
    options = whisper.DecodingOptions(fp16=False)
    result = whisper.decode(model, mel, options)

    # Sentiment Analysis
    sentiment_results = analyze_sentiment(result.text)
    sentiment_output = display_sentiment_results(sentiment_results, sentiment_option)

    return lang.upper(), result.text, sentiment_output

# UI Components
title = """<h1 align="center">🎤 Multilingual ASR + Sentiment Analysis 💬</h1>"""
description = """
💻 This demo showcases **Whisper** for multilingual speech recognition and **GoEmotions** for sentiment analysis. <br><br>
✅ **Features:** <br>
- 🎙️ Real-time microphone & file-based speech recognition <br>
- 🌍 Automatic language identification <br>
- ❤️ Sentiment analysis of transcriptions (with emojis!) <br>
"""

# Gradio App Layout
with gr.Blocks() as block:
    gr.HTML(title)
    gr.HTML(description)

    with gr.Group():
        audio = gr.Audio(label="🎙️ Input Audio", type="numpy")  # Handles both uploaded files & microphone

        sentiment_option = gr.Radio(
            choices=["Sentiment Only", "Sentiment + Score"],
            label="📌 Select Sentiment Display Option",
            value="Sentiment Only"  # Fix: Use `value` instead of `default`
        )

        btn = gr.Button("Transcribe & Analyze")

        lang_str = gr.Textbox(label="🌍 Detected Language")
        text = gr.Textbox(label="📝 Transcription")
        sentiment_output = gr.Textbox(label="❤️ Sentiment Analysis Results")  # Fix: Removed `output=True`

        btn.click(inference, inputs=[audio, sentiment_option], outputs=[lang_str, text, sentiment_output])

    gr.HTML('''
    <div class="footer">
        <p>Powered by <a href="https://github.com/openai/whisper" target="_blank">OpenAI Whisper</a> &
        <a href="https://huggingface.co/SamLowe/roberta-base-go_emotions" target="_blank">GoEmotions</a></p>
    </div>
    ''')

block.launch()


100%|████████████████████████████████████████| 139M/139M [00:01<00:00, 115MiB/s]
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.92k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/380 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

Device set to use cpu


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://cae31a22de03624e6b.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


