<a href="https://colab.research.google.com/github/Fazna-kozhipparambil/Interactive-Emotion-Detection/blob/main/interactive_emotion_detection_notebook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Step 1: Install Dependencies

In [None]:
!pip install transformers torch torchaudio librosa soundfile pydub ipywidgets



2: Import Libraries & Load Models

In [None]:
from transformers import pipeline, Wav2Vec2ForSequenceClassification, Wav2Vec2FeatureExtractor
import torch
import librosa
from IPython.display import display, Javascript
import ipywidgets as widgets
from google.colab import output
import numpy as np
import base64
import io
import soundfile as sf

# Text Emotion Detection

text_emotion_detector = pipeline(
    "text-classification",
    model="j-hartmann/emotion-english-distilroberta-base",
    top_k=None
)


# Audio Emotion Detection

audio_feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained("superb/wav2vec2-base-superb-er")
audio_model = Wav2Vec2ForSequenceClassification.from_pretrained("superb/wav2vec2-base-superb-er")
audio_labels = ["neutral","happy","sad","angry","fearful","disgusted","surprised"]


In [None]:
%load_ext cudf.pandas
import pandas as pd
import numpy as np

# Randomly generated dataset of parking violations-
# Define the number of rows
num_rows = 1000000

states = ["NY", "NJ", "CA", "TX"]
violations = ["Double Parking", "Expired Meter", "No Parking",
              "Fire Hydrant", "Bus Stop"]
vehicle_types = ["SUBN", "SDN"]

# Create a date range
start_date = "2022-01-01"
end_date = "2022-12-31"
dates = pd.date_range(start=start_date, end=end_date, freq='D')

# Generate random data
data = {
    "Registration State": np.random.choice(states, size=num_rows),
    "Violation Description": np.random.choice(violations, size=num_rows),
    "Vehicle Body Type": np.random.choice(vehicle_types, size=num_rows),
    "Issue Date": np.random.choice(dates, size=num_rows),
    "Ticket Number": np.random.randint(1000000000, 9999999999, size=num_rows)
}

# Create a DataFrame
df = pd.DataFrame(data)

# Which parking violation is most commonly committed by vehicles from various U.S states?

(df[["Registration State", "Violation Description"]]  # get only these two columns
 .value_counts()  # get the count of offences per state and per type of offence
 .groupby("Registration State")  # group by state
 .head(1)  # get the first row in each group (the type of offence with the largest count)
 .sort_index()  # sort by state name
 .reset_index()
)

Step 3: Text Emotion Widget

In [None]:
def detect_text_emotion(text):
    results = text_emotion_detector(text)
    print("Text Emotion Detection Results:")
    for r in results[0]:
        print(f"{r['label']}: {r['score']:.2f}")

text_input = widgets.Text(
    value='I am feeling excited today!',
    description='Your Text:',
    layout=widgets.Layout(width='80%')
)
widgets.interact_manual(detect_text_emotion, text=text_input)


Step 4: Audio File Upload Widget

In [None]:
def detect_audio_file(_):
    uploaded = files.upload()
    audio_file = list(uploaded.keys())[0]

    speech, sr = librosa.load(audio_file, sr=16000)
    inputs = audio_feature_extractor(speech, sampling_rate=sr, return_tensors="pt", padding=True)

    with torch.no_grad():
        logits = audio_model(**inputs).logits

    predicted_id = torch.argmax(logits, dim=-1).item()
    print("Uploaded Audio Emotion Detected:", audio_labels[predicted_id])

audio_button = widgets.Button(description="Upload Audio (.wav) for Emotion Detection")
audio_button.on_click(detect_audio_file)
display(audio_button)


Step 5: Live Microphone Widget

In [None]:
# Helper to convert JS audio to numpy array
def js_to_audio(js_audio):
    import soundfile as sf
    import io
    import numpy as np
    audio_bytes = base64.b64decode(js_audio.split(',')[1])
    audio_data, samplerate = sf.read(io.BytesIO(audio_bytes))
    if audio_data.ndim > 1:
        audio_data = audio_data[:,0]  # use first channel if stereo
    return audio_data, samplerate

# Callback to process recorded audio
def process_audio(js_audio):
    audio_data, sr = js_to_audio(js_audio)
    # Resample to 16kHz
    import librosa
    if sr != 16000:
        audio_data = librosa.resample(audio_data, orig_sr=sr, target_sr=16000)
        sr = 16000
    inputs = audio_feature_extractor(audio_data, sampling_rate=sr, return_tensors="pt", padding=True)
    with torch.no_grad():
        logits = audio_model(**inputs).logits
    predicted_id = torch.argmax(logits, dim=-1).item()
    print("Live Microphone Emotion Detected:", audio_labels[predicted_id])

# Register callback
output.register_callback('notebook.process_audio', process_audio)

# Function to trigger JS recording with start/stop
def record_and_detect_audio(_):
    display(Javascript("""
    let stream;
    let mediaRecorder;
    let chunks = [];

    async function startRecording() {
        stream = await navigator.mediaDevices.getUserMedia({audio:true});
        mediaRecorder = new MediaRecorder(stream);
        chunks = [];
        mediaRecorder.ondataavailable = e => chunks.push(e.data);
        mediaRecorder.start();
        alert("Recording started! Press OK when done speaking.");
    }

    async function stopRecording() {
        mediaRecorder.stop();
        mediaRecorder.onstop = async () => {
            const blob = new Blob(chunks, {type:'audio/wav'});
            const reader = new FileReader();
            reader.onloadend = () => {
                google.colab.kernel.invokeFunction('notebook.process_audio', [reader.result], {});
            };
            reader.readAsDataURL(blob);
            stream.getTracks().forEach(track => track.stop());
        };
    }

    startRecording().then(() => stopRecording());
    """))

# Button to start live microphone recording
mic_button = widgets.Button(description="Record Live Audio (Start/Stop)")
mic_button.on_click(record_and_detect_audio)
display(mic_button)

