In [2]:
# 📦 Install dependencies
!pip install git+https://github.com/openai/whisper.git
!pip install transformers
!sudo apt install ffmpeg  # Needed to process audio

import whisper
from transformers import pipeline
import IPython.display as ipd
import os
from google.colab import files

# 🧠 Load Whisper Model
whisper_model = whisper.load_model("base")  # or "small", "medium", "large"

# 📁 Upload audio
print("Upload a voice recording (.wav or .mp3)")
uploaded = files.upload()

# Check if a file was uploaded
if uploaded:
    # Get uploaded filename
    filename = list(uploaded.keys())[0]
    print(f"File uploaded: {filename}")

    # 🎧 Play the audio in Colab
    ipd.display(ipd.Audio(filename))

    # 🎤 Transcribe with Whisper
    result = whisper_model.transcribe(filename)
    transcribed_text = result["text"]

    print("\n📜 Transcribed Text:")
    print(transcribed_text)

    # 🧠 Emotion Detection from Transcribed Text
    emotion_classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", top_k=None)

    emotion_result = emotion_classifier(transcribed_text)[0]
    sorted_emotions = sorted(emotion_result, key=lambda x: x['score'], reverse=True)

    print("\n🎯 Emotion Prediction:")
    for emo in sorted_emotions:
        print(f"{emo['label']}: {round(emo['score'] * 100, 2)}%")
else:
    print("No file was uploaded.")

Collecting git+https://github.com/openai/whisper.git
  Cloning https://github.com/openai/whisper.git to /tmp/pip-req-build-xc62b052
  Running command git clone --filter=blob:none --quiet https://github.com/openai/whisper.git /tmp/pip-req-build-xc62b052
  Resolved https://github.com/openai/whisper.git to commit c0d2f624c09dc18e709e37c2ad90c039a4eb72a2
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 35 not upgraded.
Upload a voice recording (.wav or .mp3)


Saving Recording.mp3.m4a to Recording.mp3.m4a
File uploaded: Recording.mp3.m4a





📜 Transcribed Text:
 I have been feeling very low and tired.


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/329M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/329M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/294 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Device set to use cpu



🎯 Emotion Prediction:
sadness: 98.42%
neutral: 0.54%
disgust: 0.51%
fear: 0.22%
anger: 0.13%
joy: 0.1%
surprise: 0.08%
