<a href="https://colab.research.google.com/github/Anonyious/Voice-Cognition-Analysis-Final/blob/main/Voice%20Cognition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install numpy numba
!pip install git+https://github.com/openai/whisper.git --no-deps
!pip install gradio librosa matplotlib fpdf


Collecting git+https://github.com/openai/whisper.git
  Cloning https://github.com/openai/whisper.git to /tmp/pip-req-build-j2tics4p
  Running command git clone --filter=blob:none --quiet https://github.com/openai/whisper.git /tmp/pip-req-build-j2tics4p
  Resolved https://github.com/openai/whisper.git to commit 517a43ecd132a2089d85f4ebc044728a71d49f6e
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: openai-whisper
  Building wheel for openai-whisper (pyproject.toml) ... [?25l[?25hdone
  Created wheel for openai-whisper: filename=openai_whisper-20240930-py3-none-any.whl size=803707 sha256=61c2b5805c9184c7bf4a579471fd5de2645cf99b1e0bfd41cf2ee2e459bfd38e
  Stored in directory: /tmp/pip-ephem-wheel-cache-6avqdep6/wheels/1f/1d/98/9583695e6695a6ac0ad42d87511097dce5ba486647dbfecb0e
Successfully built openai-whisper
Installing collec

In [3]:
!pip install tiktoken

import gradio as gr
import whisper
import librosa
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from fpdf import FPDF
import os
import tempfile




Collecting tiktoken
  Downloading tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Downloading tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.2 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.3/1.2 MB[0m [31m9.2 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m17.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tiktoken
Successfully installed tiktoken-0.9.0


model = whisper.load_model("base")

analysis_data = []



In [5]:

model = whisper.load_model("base")

analysis_data = []


In [6]:
def analyze_audio(file):
    # Load audio
    y, sr = librosa.load(file, sr=16000)
    duration = librosa.get_duration(y=y, sr=sr)
    rms = np.sqrt(np.mean(y**2))
    pitch = librosa.yin(y, fmin=50, fmax=300, sr=sr)
    avg_pitch = np.mean(pitch)

    # Transcription
    result = model.transcribe(file)
    transcript = result["text"]

    # Save data
    metrics = {
        "Filename": os.path.basename(file),
        "Duration (s)": round(duration, 2),
        "Loudness (RMS)": round(rms, 4),
        "Avg Pitch (Hz)": round(avg_pitch, 2),
        "Transcript": transcript.strip()
    }
    analysis_data.append(metrics)

    return transcript, pd.DataFrame([metrics])


In [7]:
def download_csv():
    df = pd.DataFrame(analysis_data)
    path = "/tmp/voice_analysis_data.csv"
    df.to_csv(path, index=False)
    return path

def generate_report():
    pdf = FPDF()
    pdf.add_page()
    pdf.set_font("Arial", size=12)

    for row in analysis_data:
        pdf.cell(200, 10, txt=f"File: {row['Filename']}", ln=True)
        pdf.cell(200, 10, txt=f"Duration: {row['Duration (s)']}s", ln=True)
        pdf.cell(200, 10, txt=f"Loudness: {row['Loudness (RMS)']}", ln=True)
        pdf.cell(200, 10, txt=f"Avg Pitch: {row['Avg Pitch (Hz)']} Hz", ln=True)
        pdf.multi_cell(0, 10, txt=f"Transcript: {row['Transcript']}")
        pdf.ln(10)

    report_path = "/tmp/voice_report.pdf"
    pdf.output(report_path)
    return report_path

def plot_metrics():
    if not analysis_data:
        return None

    df = pd.DataFrame(analysis_data)

    fig, ax = plt.subplots(1, 2, figsize=(12, 4))
    df.plot(x="Filename", y="Duration (s)", kind="bar", ax=ax[0], legend=False, color='skyblue')
    ax[0].set_title("Audio Duration")

    df.plot(x="Filename", y="Avg Pitch (Hz)", kind="bar", ax=ax[1], legend=False, color='orange')
    ax[1].set_title("Average Pitch")

    plt.tight_layout()
    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
    plt.savefig(temp_file.name)
    return temp_file.name

with gr.Blocks() as app:
    gr.Markdown("## 🎙️ Voice Analysis App")

    with gr.Tab("Analyze"):
        audio_input = gr.Audio(type="filepath", label="Upload or Record Audio")
        transcribed = gr.Textbox(label="Transcript")
        metrics = gr.Dataframe(headers=["Filename", "Duration (s)", "Loudness (RMS)", "Avg Pitch (Hz)", "Transcript"])
        analyze_btn = gr.Button("Analyze Audio")
        download_btn = gr.Button("⬇️ Download CSV")
        report_btn = gr.Button("📄 Generate PDF Report")
        csv_out = gr.File(label="CSV File")
        pdf_out = gr.File(label="PDF Report")

        analyze_btn.click(analyze_audio, inputs=audio_input, outputs=[transcribed, metrics])
        download_btn.click(download_csv, outputs=csv_out)
        report_btn.click(generate_report, outputs=pdf_out)

    with gr.Tab("Compare Metrics"):
        compare_btn = gr.Button("📊 Show Comparison Plots")
        image_output = gr.Image()
        compare_btn.click(plot_metrics, outputs=image_output)

app.launch()


It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://615204d545f7631b95.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


