In [3]:
import gradio as gr
import torch
import whisper
import warnings
warnings.filterwarnings('ignore')
from transformers import pipeline
import os
from collections import Counter

MODEL_NAME = "openai/whisper-small"
BATCH_SIZE = 8

device = 0 if torch.cuda.is_available() else "cpu"

pipe = pipeline(
    task="automatic-speech-recognition",
    model=MODEL_NAME,
    chunk_length_s=30,
    device=device)


emotion_classifier = pipeline("text-classification",model='MilaNLProc/xlm-emo-t', return_all_scores=True)

secondary_emotions_map = {
    ('anger', 'fear'): ['Anxiety', 'Hostility'],
    ('anger', 'joy'): ['Zeal', 'Pride'],
    ('anger', 'sadness'): ['Bitterness', 'Resentment'],
    ('fear', 'joy'): ['Thrill', 'Relief'],
    ('fear', 'sadness'): ['Desperation', 'Hopelessness'],
    ('joy', 'sadness'): ['Nostalgia', 'Melancholy'],
    ('anger', 'fear', 'joy'): ['Exuberant Aggression', 'Fierce Excitement'],
    ('anger', 'fear', 'sadness'): ['Despair', 'Turmoil'],
    ('anger', 'joy', 'sadness'): ['Morose Satisfaction', 'Bittersweet Victory'],
    ('fear', 'joy', 'sadness'): ['Complex Relief', 'Tinged Joy'],
    ('anger', 'fear', 'joy', 'sadness'): ['Emotional Whirlwind', 'Profound Ambivalence']
}


def transcribe(file):
    import soundfile as sf
    print("File received:", file)
    if file is None:
        raise gr.Error("You must provide an audio file.")

    # Read the audio file
    data, samplerate = sf.read(file)
    # Calculate the number of samples for 6 seconds
    samples_per_segment = 6 * samplerate
    total_samples = len(data)
    
    segment_texts = []
    
    try:
        for start in range(0, total_samples, samples_per_segment):
            end = start + samples_per_segment
            segment = data[start:end]
            # Write the segment to a temporary file
            temp_file = "temp_segment.wav"
            sf.write(temp_file, segment, samplerate)
            # Process the segment
            results = pipe(temp_file, batch_size=BATCH_SIZE)
            text = results['text']
            segment_texts.append(text)
            print("Transcription segment:", text)
    except Exception as e:
        print("Transcription error:", e)
        raise gr.Error(f"An error occurred during transcription: {str(e)}")

    return segment_texts

import matplotlib.pyplot as plt

def translate_and_classify(audio, first_name, last_name, student_id):
    print("Starting transcription and classification...")
    segment_texts = transcribe(audio)
    all_segment_emotions = []

    for text in segment_texts:
        try:
            emotions = emotion_classifier(text)
            detected_emotion = {emotion["label"]: round(emotion["score"] * 100, 2) for emotion in emotions[0]}
            significant_emotions = {k: v for k, v in detected_emotion.items() if v > 3}
            
            secondary_emotions = []
            keys = tuple(sorted(significant_emotions.keys()))
            for key in secondary_emotions_map:
                if all(subkey in keys for subkey in key):
                    secondary_emotions.extend(secondary_emotions_map[key])
            
            all_segment_emotions.append({
                "primary_emotions": detected_emotion,
                "secondary_emotions": secondary_emotions
            })
        except Exception as e:
            print("Classification error:", e)
            raise gr.Error(f"An error occurred during emotion classification: {str(e)}")

    # Save results to CSV and plot emotions
    csv_filename = f"{first_name}_{last_name}_{student_id}.csv"
    png_filename = f"{first_name}_{last_name}_{student_id}.png"
    save_results_to_csv(segment_texts, all_segment_emotions, csv_filename)
    plot_emotions(segment_texts, all_segment_emotions, png_filename, first_name, last_name, student_id)

    return segment_texts, all_segment_emotions

emotion_short_forms = {
    'Anxiety': 'ANX',
    'Hostility': 'HST',
    'Zeal': 'ZEA',
    'Pride': 'PRD',
    'Bitterness': 'BIT',
    'Resentment': 'RES',
    'Thrill': 'THR',
    'Relief': 'RLF',
    'Desperation': 'DSP',
    'Hopelessness': 'HPL',
    'Nostalgia': 'NST',
    'Melancholy': 'MCH',
    'Exuberant Aggression': 'EXA',
    'Fierce Excitement': 'FEX',
    'Despair': 'DSPR',
    'Turmoil': 'TML',
    'Morose Satisfaction': 'MSF',
    'Bittersweet Victory': 'BV',
    'Complex Relief': 'CRF',
    'Tinged Joy': 'TJ',
    'Emotional Whirlwind': 'EWH',
    'Profound Ambivalence': 'PAM'
}




import matplotlib.pyplot as plt
import numpy as np



def plot_emotions(segment_texts, all_segment_emotions, filename, first_name, last_name, student_id):
    primary_categories = ['anger', 'fear', 'joy', 'sadness']  # Example primary emotions
    num_segments = len(all_segment_emotions)
    
    # Setup the plot for segments and primary emotions
    fig, axs = plt.subplots(num_segments + 2, figsize=(15, 6 * (num_segments + 2)))  # +2 for primary and secondary combined

    # Track overall secondary emotion frequencies
    overall_secondary_counts = {}

    # Plot individual segments
    for i, segment_data in enumerate(all_segment_emotions):
        axs[-1].set_title(f"{first_name} {last_name} Reports [Confidential]\nReport only to be viewed by SENECA COUNSELLING COMMITTEE")
        primary_scores = [segment_data['primary_emotions'].get(emotion, 0) for emotion in primary_categories]
        axs[i].plot(primary_categories, primary_scores, label='Primary Emotions', marker='o', linestyle='-', color='blue')

        # Plot secondary emotions based on appearance
        secondary_scores = {emotion: segment_data['secondary_emotions'].count(emotion) for emotion in segment_data['secondary_emotions']}
        for emotion, count in secondary_scores.items():
            short_form = emotion_short_forms[emotion]
            overall_secondary_counts[short_form] = overall_secondary_counts.get(short_form, 0) + count
            axs[i].scatter([short_form] * count, [count] * count, label='Secondary Emotions', color='green', s=100)

        axs[i].set_title(f"Emotions in Segment {i+1}")
        axs[i].set_xlabel("Emotion Types")
        axs[i].set_ylabel("Counts")
        axs[i].legend()

    # Combined graph for primary emotions
    combined_primary = [sum([segment['primary_emotions'].get(emotion, 0) for segment in all_segment_emotions]) for emotion in primary_categories]
    axs[-1-1].plot(primary_categories, combined_primary, label='Combined Primary Emotions', marker='o', linestyle='-', color='blue')
    axs[-1-1].set_title("Combined Primary Emotions Across All Segments")
    axs[-1-1].set_xlabel("Emotion Types")
    axs[-1-1].set_ylabel("Counts")
    axs[-1-1].legend()

    # Separate plot for combined secondary emotions
    if overall_secondary_counts:
        secondary_emotions = list(overall_secondary_counts.keys())
        secondary_counts = [overall_secondary_counts[emo] for emo in secondary_emotions]
        sorted_indices = np.argsort(secondary_emotions)
        sorted_emotions = np.array(secondary_emotions)[sorted_indices]
        sorted_counts = np.array(secondary_counts)[sorted_indices]
        axs[-1].plot(sorted_emotions, sorted_counts, label='Combined Secondary Emotions', marker='o', linestyle='-', color='green')
        axs[-1].set_ylim(0, max(sorted_counts) + 1)  # Adjust y-axis to better zoom into the data
        axs[-1].set_xlabel("Short Form Emotion Types")
        axs[-1].set_ylabel("Counts")
        axs[-1].legend()

    plt.tight_layout()
    plt.savefig(f"{first_name}_{last_name}_{student_id}.png")
    plt.close()
    print(f"All graphs saved in one file as {filename}")

with gr.Blocks() as demo:
    with gr.Row():
        first_name = gr.Textbox(label="First Name")
        last_name = gr.Textbox(label="Last Name")
        student_id = gr.Textbox(label="Student ID")
    with gr.Tab("Record Audio"):
        audio_input = gr.Audio(label='Record Audio Input', type="filepath")
        transcribe_button = gr.Button('Transcribe')

    transcript_output = gr.Textbox(label="Transcription of each segment", lines=6)
    emotion_output = gr.Json(label="Detected Primary and Secondary Emotions for each segment")

    transcribe_button.click(translate_and_classify, inputs=[audio_input, first_name, last_name, student_id], outputs=[transcript_output, emotion_output])

    demo.launch()
   
demo.launch(inbrowser=True)

import pandas as pd


def save_results_to_csv(segment_texts, all_segment_emotions, filename):
    data = {
        "Segment Text": segment_texts,
        "Primary Emotions": [str(emotions["primary_emotions"]) for emotions in all_segment_emotions],
        "Secondary Emotions": [str(emotions["secondary_emotions"]) for emotions in all_segment_emotions]
    }
    df = pd.DataFrame(data)
    df.to_csv(filename, index=False)
    print(f"Results saved to {filename}")


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Running on local URL:  http://127.0.0.1:7862

To create a public link, set `share=True` in `launch()`.


Rerunning server... use `close()` to stop if you need to change `launch()` parameters.
----

To create a public link, set `share=True` in `launch()`.


Starting transcription and classification...
File received: /private/var/folders/nt/_c8pnwkx3w7f2b_nwjfxcp0m0000gn/T/gradio/91cd0c86d53663b8274df489f2f9bed7232f3f5c/audio.wav
Transcription segment:  Hello, good afternoon. This is my student ID 123.
Transcription segment:  3, 2, 3, 2, 1, 3, 4. Actually, I'm facing issue with my
Transcription segment:  that the problem that is present in my case and I wanted to book a full
Transcription segment:  up appointment, so I'm expecting your call for the rest of the process. Thanks.
Transcription segment:  Thank you.
Results saved to AAA_dsad_123232134.csv
All graphs saved in one file as AAA_dsad_123232134.png
