<a href="https://colab.research.google.com/github/AKesari24/Call-Quality-analzyer/blob/main/call_quality_analyzer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [18]:
!pip install --upgrade pip
!pip install vosk==0.3.45 pydub nltk gradio yt-dlp matplotlib

!apt-get install -y libsndfile1 ffmpeg

# Imports
import os, re, wave, json, subprocess, tempfile
import matplotlib.pyplot as plt
import gradio as gr
from vosk import Model, KaldiRecognizer
from pydub import AudioSegment
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer

nltk.download("vader_lexicon")

# Ensure Vosk model exists
if not os.path.exists("model"):
    !wget -q https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip -O model.zip
    !unzip -q model.zip -d .
    !mv vosk-model-small-en-us-0.15 model
    !rm model.zip

# Main function
def analyze_call(file, youtube_url):
    if youtube_url:
        # Download YouTube audio
        video_id = youtube_url.split("v=")[-1]
        orig_audio = f"{video_id}.wav"
        try:
            subprocess.run([
                "yt-dlp", "-x", "--audio-format", "wav",
                "-o", orig_audio, youtube_url
            ], check=True)
        except:
            return "Failed to download YouTube audio", None, None
    elif file:
        orig_audio = file.name
    else:
        return "No input provided", None, None

    # Convert to mono WAV
    wav_file = "audio.wav"
    subprocess.run([
        "ffmpeg", "-y", "-i", orig_audio, "-ar", "16000", "-ac", "1", wav_file
    ], check=True)

    # Transcription with Vosk
    model = Model("model")
    rec = KaldiRecognizer(model, 16000)
    rec.SetWords(True)
    results = []
    with wave.open(wav_file, "rb") as wf:
        while True:
            data = wf.readframes(4000)
            if len(data) == 0: break
            if rec.AcceptWaveform(data):
                results.append(json.loads(rec.Result()))
        results.append(json.loads(rec.FinalResult()))
    full_text = " ".join([r.get("text", "") for r in results])

    # Number of questions
    questions = len(re.findall(r"\?", full_text)) + len(re.findall(r"\b(what|why|how|when|where)\b", full_text, re.I))
    # Longest monologue (approx)
    longest_monologue = max([len(s.split())/2 for s in full_text.split(".")]) if full_text else 0
    # Sentiment
    sia = SentimentIntensityAnalyzer()
    sentiment = sia.polarity_scores(full_text)
    overall_sent = "positive" if sentiment["compound"]>0.2 else "negative" if sentiment["compound"]<-0.2 else "neutral"

    # Speaker segmentation
    audio_seg = AudioSegment.from_wav(wav_file)
    chunk_length_ms = 5000  # 5 sec
    speaker_segments = {"Speaker A":0.0, "Speaker B":0.0}
    speaker_texts = {"Speaker A":"", "Speaker B":""}

    sentences = [s.strip() for s in full_text.split(".") if s.strip()]
    for i, sentence in enumerate(sentences):
        speaker = "Speaker A" if i % 2 == 0 else "Speaker B"
        speaker_texts[speaker] += " " + sentence

    # Assign talk-time
    for i, start in enumerate(range(0, len(audio_seg), chunk_length_ms)):
        end = min(start + chunk_length_ms, len(audio_seg))
        speaker = "Speaker A" if i % 2 == 0 else "Speaker B"
        speaker_segments[speaker] += (end-start)/1000

    talk_ratio_display = {k: round(v,1) for k,v in speaker_segments.items()}

    # Role guessing based on keywords
    sales_keywords = ["buy", "price", "offer", "discount", "purchase", "cost"]
    customer_keywords = ["how much", "question", "concern", "interested", "need"]

    sales_counts = {s: sum(speaker_texts[s].lower().count(k) for k in sales_keywords) for s in speaker_texts}
    if sales_counts["Speaker A"] >= sales_counts["Speaker B"]:
        sales_rep = "Speaker A"
        customer = "Speaker B"
    else:
        sales_rep = "Speaker B"
        customer = "Speaker A"

    insight = "Try asking more open-ended questions" if questions<3 else "Good engagement with customer questions."

    analysis = f"""
### 📊 Call Analysis
- Talk-time ratio (sec): {talk_ratio_display}
- Questions asked: {questions}
- Longest monologue: {round(longest_monologue,1)} sec
- Sentiment: **{overall_sent}**
- Insight: {insight}

### 🎭 Role Guess
- Sales Rep: {sales_rep}
- Customer: {customer}
"""

    # Talk-time pie
    pie_path = tempfile.mktemp(suffix=".png")
    fig1, ax1 = plt.subplots()
    ax1.pie([speaker_segments["Speaker A"], speaker_segments["Speaker B"]],
            labels=["Speaker A", "Speaker B"], autopct='%1.1f%%')
    ax1.set_title("Talk-Time Ratio (sec)")
    fig1.savefig(pie_path)
    plt.close(fig1)

    # Sentiment bar
    sent_path = tempfile.mktemp(suffix=".png")
    fig2, ax2 = plt.subplots()
    ax2.bar(sentiment.keys(), sentiment.values(), color='skyblue')
    ax2.set_title("Sentiment Breakdown")
    fig2.savefig(sent_path)
    plt.close(fig2)

    return analysis, pie_path, sent_path

# Gradio UI
demo = gr.Interface(
    fn=analyze_call,
    inputs=[
        gr.File(label="Upload audio file (optional)"),
        gr.Textbox(label="Or paste YouTube link (optional)", placeholder="https://www.youtube.com/watch?v=...")
    ],
    outputs=[
        gr.Markdown(label="Analysis Results"),
        gr.Image(label="Talk-Time Pie"),
        gr.Image(label="Sentiment Chart")
    ],
    title="📞 Call Quality Analyzer",
    description="Upload audio or paste YouTube sales call link. System transcribes, assigns speakers (heuristic + keyword role guesser), and analyzes.",
)

demo.launch(debug=True, share=True)


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
libsndfile1 is already the newest version (1.0.31-2ubuntu0.2).
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 35 not upgraded.


[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://04fe1b781795c47340.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://04fe1b781795c47340.gradio.live




# New Section