In [3]:
import librosa
import numpy as np
import scipy.signal

## Pipeline 1 : Voice & Tone Analysis

input : audio file

output : * Vocal Features (like - pitch, pace,pause)
         * Tone 
         * Negative Behavioral Traits
         * Score

In [4]:


class VoiceAnalyzer:
    def __init__(self, audio_path):
        self.audio_path = audio_path
        try:
            self.y, self.sr = librosa.load(audio_path, sr=None)
        except Exception as e:
            print(f"Error loading audio: {e}")
            self.y, self.sr = None, None

    def analyze_pitch(self):
        """
        Analyzes Fundamental Frequency (F0).
        """
        f0, voiced_flag, voiced_probs = librosa.pyin(
            self.y, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7')
        )
        valid_pitch = f0[~np.isnan(f0)]
        
        if len(valid_pitch) == 0:
            return {"avg_pitch_hz": 0, "pitch_variation": 0, "f0_series": []}

        return {
            "avg_pitch_hz": round(np.mean(valid_pitch), 2),
            "pitch_variation": round(np.std(valid_pitch), 2),
            "f0_series": valid_pitch  # Keep for filler detection
        }

    def analyze_volume(self):
        """
        Analyzes Energy/Volume.
        """
        rms = librosa.feature.rms(y=self.y)[0]
        return {
            "avg_volume": round(float(np.mean(rms)), 4),
            "volume_dynamic_range": round(float(np.max(rms) - np.min(rms)), 4),
            "rms_series": rms
        }

    def analyze_fluency(self, f0_series, rms_series):
        """
        Detects negative behaviors: Hesitations and Monotone "Fillers".
        """
        # 1. Hesitation Index (Silence Ratio)
        non_silent = librosa.effects.split(self.y, top_db=20)
        non_silent_dur = sum((end - start) / self.sr for start, end in non_silent)
        total_dur = librosa.get_duration(y=self.y, sr=self.sr)
        
        hesitation_index = (total_dur - non_silent_dur) / total_dur if total_dur > 0 else 0

        # 2. Potential Filler Words (Heuristic: Long, Flat Pitch, Low Energy)
        # "Umm" is usually >300ms of voiced sound with very low pitch variance.
        # We look for continuous voiced segments with std_dev(pitch) < threshold.
        
        # Note: This is a signal proxy. True filler detection needs ASR.
        # Here we count segments of stable pitch as "monotone holds".
        
        # Calculate pitch derivative (change over time)
        if len(f0_series) > 0:
            pitch_derivative = np.abs(np.diff(f0_series))
            # Count frames where pitch changes very little (flat tone)
            flat_pitch_frames = np.sum(pitch_derivative < 2.0) 
            monotone_ratio = flat_pitch_frames / len(f0_series)
        else:
            monotone_ratio = 0

        return {
            "hesitation_index": round(hesitation_index, 2), # 0.0 - 1.0 (High is bad)
            "monotone_ratio": round(monotone_ratio, 2)      # 0.0 - 1.0 (High is robotic)
        }

    def detect_emotional_tone(self, pitch_data, vol_data, fluency_data):
        """
        Infers emotion based on acoustic combinations.
        """
        pitch_var = pitch_data.get("pitch_variation", 0)
        vol_range = vol_data.get("volume_dynamic_range", 0)
        pace = fluency_data.get("hesitation_index", 0) # High hesitation = Nervous

        # Heuristic Decision Tree
        if pitch_var > 40 and vol_range > 0.05:
            return "Enthusiastic / Excited"
        elif pitch_var < 10 and vol_range < 0.02:
            return "Bored / Monotone"
        elif pace > 0.30: # >30% silence
            return "Nervous / Hesitant"
        else:
            return "Confident / Balanced"

    def calculate_delivery_score(self, pitch_data, vol_data, fluency_data):
        """
        Generates a 0-100 score based on weighted best practices.
        """
        score = 100
        
        # 1. Penalize Monotony (Low Pitch Variation)
        pv = pitch_data['pitch_variation']
        if pv < 15: score -= 20  # Too robotic
        elif pv > 80: score -= 10 # Too erratic
        
        # 2. Penalize Hesitation
        hes = fluency_data['hesitation_index']
        if hes > 0.20: score -= (hes * 100) # Heavy penalty for silence
        
        # 3. Reward Energy (Volume Range)
        vol = vol_data['volume_dynamic_range']
        if vol < 0.02: score -= 15 # Too quiet/flat
        
        return max(0, int(score))

    def run_full_analysis(self):
        if self.y is None: return "Error"
        
        print(f"--- üé§ Analyzing: {self.audio_path} ---")
        
        # Run sub-modules
        pitch = self.analyze_pitch()
        volume = self.analyze_volume()
        fluency = self.analyze_fluency(pitch['f0_series'], volume['rms_series'])
        
        # High-level insights
        emotion = self.detect_emotional_tone(pitch, volume, fluency)
        score = self.calculate_delivery_score(pitch, volume, fluency)
        
        
        
        results = {
            "metrics": {**pitch, **volume, **fluency},
            "emotion": emotion,
            "delivery_score": score
        }

        # Pretty Print
        print("\n--- üìä ANALYSIS RESULTS ---")
        print(f"Detected Emotion: {emotion.upper()}")
        print(f"Delivery Score:   {score}/100")
        print(f"Hesitation Index: {fluency['hesitation_index']} (Lower is better)")
        print(f"Pitch Variation:  {pitch['pitch_variation']} Hz")
        
        return results

# Example Usage
# analyzer = VoiceAnalyzer("my_pitch.wav")
# data = analyzer.run_full_analysis()

analyzer = VoiceAnalyzer("../data/sample_pitch.wav")
data = analyzer.run_full_analysis()

--- üé§ Analyzing: ../data/sample_pitch.wav ---

--- üìä ANALYSIS RESULTS ---
Detected Emotion: CONFIDENT / BALANCED
Delivery Score:   52/100
Hesitation Index: 0.28 (Lower is better)
Pitch Variation:  12.77 Hz


## Pipeline 2 : Content & Business Logic Analysis

input : audio file

output : * Business Logic
         * Transcript

In [5]:
import os
import json
from openai import OpenAI
from dotenv import load_dotenv

load_dotenv()
key = os.getenv("OPENAI_API_KEY")

# Initialize client
client = OpenAI(api_key=key)

class ContentAnalyzer:
    def __init__(self, audio_path):
        self.audio_path = audio_path
        self.transcript = ""

    def transcribe_audio(self):
        """
        Step 1: Convert Speech to Text using OpenAI Whisper.
        """
        print("--- 1. Transcribing Audio ---")
        try:
            with open(self.audio_path, "rb") as audio_file:
                transcription = client.audio.transcriptions.create(
                    model="whisper-1", 
                    file=audio_file
                )
            self.transcript = transcription.text
            print(f"Transcript Preview: {self.transcript[:100]}...")
            return self.transcript
        except Exception as e:
            print(f"Transcription Error: {e}")
            self.transcript = ""
            return None

    def analyze_business_logic(self):
        """
        Step 2: Evaluate the Pitch Structure & Business Viability using GPT-4o.
        """
        if not self.transcript:
            return {"error": "No transcript available to analyze."}

        print("--- 2. Analyzing Business Logic ---")
        
        # Updated System Prompt to include Structure Detection & Viability Score
        system_prompt = """
        You are a Venture Capital Analyst screening pitches for Shark Tank. 
        Analyze the provided pitch transcript for BUSINESS LOGIC and STRUCTURE.
        
        TASK 1: Structural Analysis
        Identify the specific sentences or phrases that correspond to these sections:
        - "Hook": The opening statement to grab attention.
        - "Problem": The pain point being solved.
        - "Solution": The product/service description.
        - "Ask": The specific investment request (amount/equity).
        (If a section is missing, return null).

        TASK 2: Scoring (1-10 Scale)
        Evaluate these 5 Key Pillars:
        1. Problem Clarity: Is the pain point clear and urgent?
        2. Solution Viability: Does the product actually solve the problem?
        3. Market Size: Is the addressable market defined?
        4. Business Model: Is it clear how they make money?
        5. The "Ask": Is the valuation realistic?

        TASK 3: Overall Viability Score (0-100)
        Calculate a weighted score based on the 5 pillars above (Ask & Model are weighted highest).
        
        Output must be a valid JSON object with this exact structure:
        {
            "pitch_structure": {
                "hook_segment": "text or null",
                "problem_segment": "text or null",
                "solution_segment": "text or null",
                "ask_segment": "text or null"
            },
            "scores": {"problem": 0, "solution": 0, "market": 0, "model": 0, "ask": 0},
            "viability_score": 0,
            "missing_elements": ["List of critical missing info"],
            "red_flags": ["List of logical fallacies"],
            "summary_critique": "A 2-sentence summary."
        }
        """

        response = client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": f"Pitch Transcript: \"{self.transcript}\""}
            ],
            response_format={"type": "json_object"} 
        )

        try:
            analysis_json = json.loads(response.choices[0].message.content)
            return analysis_json
        except json.JSONDecodeError:
            return {"error": "Failed to parse analysis results."}

    def run_full_analysis(self):
        # 1. Transcribe
        self.transcribe_audio()
        
        # 2. Analyze
        analysis_results = self.analyze_business_logic()
        
        # 3. Merge Transcript into Final Result
        # This ensures the frontend has access to the raw text alongside the scores
        final_output = {
            "transcript": self.transcript,
            "analysis": analysis_results
        }
        
        return final_output

# Example Usage:
logic_analyzer = ContentAnalyzer("../data/sample_pitch.wav")
business_insights = logic_analyzer.run_full_analysis()
print(json.dumps(business_insights, indent=2))

--- 1. Transcribing Audio ---
Transcript Preview: Thousands of women are struggling to access critical ob-gyn services due to clinic closures and long...
--- 2. Analyzing Business Logic ---
{
  "transcript": "Thousands of women are struggling to access critical ob-gyn services due to clinic closures and long wait times. Recent closures have left a gap in women's health care, forcing patients to wait months for critical services like ob-gyn checkups, prenatal care and wellness exams. For women in rural areas, the problem is even worse. Many are simply unable to access the care they need. There is an urgent demand for a health care solution that can bridge this gap, particularly as the population continues to grow. Our advanced practice registered nurse led women's health clinic solves this problem by providing high quality affordable care with a fixed payment model, eliminating the need for insurances and surprise bills.",
  "analysis": {
    "pitch_structure": {
      "hook_segment": "

In [6]:
business_insights['analysis']

{'pitch_structure': {'hook_segment': 'Thousands of women are struggling to access critical ob-gyn services due to clinic closures and long wait times.',
  'problem_segment': "Recent closures have left a gap in women's health care, forcing patients to wait months for critical services like ob-gyn checkups, prenatal care and wellness exams.",
  'solution_segment': "Our advanced practice registered nurse led women's health clinic solves this problem by providing high quality affordable care with a fixed payment model, eliminating the need for insurances and surprise bills.",
  'ask_segment': None},
 'scores': {'problem': 9, 'solution': 8, 'market': 7, 'model': 7, 'ask': 0},
 'viability_score': 62,
 'missing_elements': ['Detailed market size analysis',
  'Specific investment request',
  'Revenue projections'],
 'red_flags': ['Lack of financial details or investment request'],
 'summary_critique': "The pitch effectively identifies a clear and urgent problem in women's healthcare access. How

## Pipeline 3 : Content & Business Logic Analysis



In [7]:
# import json
# import os
# from openai import OpenAI
# from dotenv import load_dotenv

# # Load environment variables
# load_dotenv()
# key = os.getenv("OPENAI_API_KEY")
# client = OpenAI(api_key=key)

# ==============================================================================
# CLASS 1: PERSONA SYNTHESIS ENGINE (With Final Verdict)
# ==============================================================================
class PersonaSynthesisEngine:
    def __init__(self, acoustic_full_results, business_full_results):
        """
        acoustic_full_results: Dictionary from VoiceAnalyzer containing:
                               {'metrics': {...}, 'emotion': '...', 'delivery_score': 0-100}
        business_full_results: Dictionary from ContentAnalyzer containing:
                               {'transcript': '...', 'analysis': {'scores', 'viability_score', ...}}
        """
        self.acoustic_data = acoustic_full_results
        self.business_data = business_full_results.get('analysis', {})
        self.transcript = business_full_results.get('transcript', "")
        
        self.interpreted_acoustics = self._interpret_acoustic_data()

    def _interpret_acoustic_data(self):
        """
        Translates raw audio metrics into a narrative profile for the LLM.
        """
        metrics = self.acoustic_data.get('metrics', {})
        emotion = self.acoustic_data.get('emotion', "Unknown")
        score = self.acoustic_data.get('delivery_score', 0)
        
        summary = f"The speaker's emotional tone was detected as '{emotion}'."
        summary += f" They received a Vocal Delivery Score of {score}/100."
        
        # Add nuance based on raw metrics
        pitch_var = metrics.get('pitch_variation', 0)
        hesitation = metrics.get('hesitation_index', 0)
        
        if hesitation > 0.20:
            summary += " There were frequent awkward silences (High Hesitation)."
        if pitch_var < 15:
            summary += " The voice was notably monotone and lacked energy."
            
        return summary

    def _get_persona_prompt(self, persona_type):
        base_instruction = """
        You are a judge on 'Shark Tank'. 
        Synthesize the provided data into a first-person critique. 
        React emotionally to the specific scores provided.
        
        CRITICAL INSTRUCTION:
        End your response with a separate line exactly like this:
        "FINAL RECOMMENDATION: [Invest / Not Invest / Need More Info]"
        """

        if persona_type == "The Royalty":
            return base_instruction + """
            PERSONA: 'The Royalty' (Kevin O'Leary).
            - Focus on 'Viability Score' and 'The Ask'.
            - If Viability < 50, call them a "cockroach" or tell them to stop the madness.
            - If Delivery Score is low, say "You are boring me to death."
            - TONE: Condescending, financially ruthless, impatient.
            """
        elif persona_type == "The Tech Visionary":
            return base_instruction + """
            PERSONA: 'The Tech Visionary' (Mark Cuban).
            - Focus on 'Solution Viability' and 'Problem Clarity'.
            - If 'Emotional Tone' is 'Nervous', tell them to breathe and pitch again.
            - If 'Red Flags' exist, aggressively question their competence.
            - TONE: High-energy, direct, allergic to fluff.
            """
        elif persona_type == "The Brand Guru":
            return base_instruction + """
            PERSONA: 'The Brand Guru' (Lori Greiner).
            - Focus on the 'Hook' and 'Market Size'.
            - If 'Delivery Score' > 80, compliment their passion.
            - If 'Viability' is low, say "I can't get there" gently but firmly.
            - TONE: Empathetic, warm, but intuitively sharp.
            """
        else:
            return base_instruction

    def generate_feedback(self, persona_type="The Royalty"):
        print(f"--- üß† Synthesizing Feedback: {persona_type} ---")
        
        system_prompt = self._get_persona_prompt(persona_type)
        
        # Prepare context
        scores = self.business_data.get('scores', {})
        viability = self.business_data.get('viability_score', 0)
        structure = self.business_data.get('pitch_structure', {})
        red_flags = self.business_data.get('red_flags', [])

        user_context = f"""
        Here is the entrepreneur's performance data:

        === üé§ VOCAL PERFORMANCE ===
        {self.interpreted_acoustics}
        
        === üíº BUSINESS METRICS ===
        - Overall Viability Score: {viability}/100
        - Problem Clarity: {scores.get('problem')}/10
        - Solution Viability: {scores.get('solution')}/10
        - The Ask (Valuation): {scores.get('ask')}/10
        
        === üö© RED FLAGS ===
        {", ".join(red_flags) if red_flags else "None detected."}

        === üìù PITCH STRUCTURE ===
        - Hook: "{structure.get('hook_segment', 'Not found')}"
        - Ask: "{structure.get('ask_segment', 'Not found')}"

        === TRANSCRIPT SNIPPET ===
        "...{self.transcript[:400]}..."

        VERDICT: Are you in or out? Explain why using your unique voice.
        Don't forget the FINAL RECOMMENDATION line at the very end.
        """

        response = client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_context}
            ],
            temperature=0.8
        )

        return response.choices[0].message.content

# ==============================================================================
# CLASS 2: SHARK VOICE GENERATOR
# ==============================================================================
class SharkVoiceGenerator:
    def __init__(self, client):
        self.client = client

    def generate_audio(self, text, persona_type, output_filename):
        # Map Personas to OpenAI Voices
        voice_map = {
            "The Royalty": "onyx",       # Deep, authoritative (Kevin)
            "The Tech Visionary": "echo", # Resonant, conversational (Mark)
            "The Brand Guru": "nova"     # Energetic, female tone (Lori)
        }
        
        selected_voice = voice_map.get(persona_type, "alloy")
        
        # We strip the "FINAL RECOMMENDATION" line from the audio 
        # because the Shark usually says "I'm out" in the text anyway, 
        # and reading the label "Final Recommendation" sounds robotic.
        clean_text_for_audio = text.split("FINAL RECOMMENDATION:")[0]

        print(f"   üéôÔ∏è Generating audio for {persona_type} (Voice: {selected_voice})...")

        try:
            response = self.client.audio.speech.create(
                model="tts-1",
                voice=selected_voice,
                input=clean_text_for_audio,
                response_format="wav"
            )
            
            response.stream_to_file(output_filename)
            print(f"      ‚úÖ Saved: {output_filename}")
            return output_filename
            
        except Exception as e:
            print(f"      ‚ùå Error generating audio: {e}")
            return None

# ==============================================================================
# FINAL INTEGRATION & EXECUTION
# ==============================================================================

# NOTE: Ensure 'data' and 'business_insights' are populated before running this.

if 'data' in locals() and 'business_insights' in locals():
    # 1. Initialize Engines
    engine = PersonaSynthesisEngine(data, business_insights)
    voice_gen = SharkVoiceGenerator(client)

    print("\n" + "="*50)
    print("ü¶à FINAL SHARK TANK VERDICT ü¶à")
    print("="*50)

    # Function to print nicely formatted output
    def run_shark_critique(persona, filename):
        full_response = engine.generate_feedback(persona_type=persona)
        
        # Split logic to separate the monologue from the final label
        if "FINAL RECOMMENDATION:" in full_response:
            parts = full_response.split("FINAL RECOMMENDATION:")
            monologue = parts[0].strip()
            verdict = parts[1].strip()
        else:
            monologue = full_response
            verdict = "Unknown"

        # Print Visuals
        print(f"\n{'='*20} {persona.upper()} {'='*20}")
        print(f"üó£Ô∏è \"{monologue}\"")
        print(f"\nüëâ VERDICT: {verdict}")
        print("-" * 60)
        
        # Generate Audio
        voice_gen.generate_audio(full_response, persona, filename)

    # --- RUN ALL SHARKS ---
    run_shark_critique("The Royalty", "Feedback/feedback_kevin.wav")
    run_shark_critique("The Tech Visionary", "Feedback/feedback_mark.wav")
    run_shark_critique("The Brand Guru", "Feedback/feedback_lori.wav")

else:
    print("‚ö†Ô∏è Please run VoiceAnalyzer and ContentAnalyzer first to populate 'data' and 'business_insights'.")


ü¶à FINAL SHARK TANK VERDICT ü¶à
--- üß† Synthesizing Feedback: The Royalty ---

üó£Ô∏è "Oh dear, you're trying to convince me with a violin solo about the struggles of women accessing ob-gyn services, yet somehow you can't manage to deliver this with any semblance of passion or urgency. Your Vocal Delivery Score of 52 is like a sedative; you are boring me to death. You need to wake up and make sure your pitch doesn't sound like it's directed at the walking dead.

Now, let's dissect your business metrics. An Overall Viability Score of 62 is teetering on the edge of acceptability, I'll grant you that. Problem Clarity and Solution Viability are commendably high, but what on earth were you thinking with your Valuation? A big fat zero for The Ask? Are you trying to become the hero of a rags-to-riches story without ever actually asking for the riches? This is the kind of madness that will have you scratching for pennies like a cockroach under the fridge.

And here we have red flags wav

  response.stream_to_file(output_filename)


      ‚úÖ Saved: Feedback/feedback_kevin.wav
--- üß† Synthesizing Feedback: The Tech Visionary ---

üó£Ô∏è "Alright, let's dive in. Your pitch highlighted a significant issue‚Äîwomen's access to crucial ob-gyn services. You nailed the Problem Clarity with a score of 9 out of 10, and your Solution Viability is strong at 8 out of 10. This reflects a deep understanding of the problem you're trying to solve, which is crucial for any startup seeking investment.

However, there are some glaring issues we need to address. First off, your Vocal Delivery Score of 52 out of 100 is concerning. Despite the emotional tone detection as 'Confident / Balanced', the frequent awkward silences and monotone delivery suggest otherwise. High hesitation shows a lack of preparation or perhaps nerves. We can't afford hesitation when pitching a business that aims to transform such a critical area.

But the most crucial red flag here is the absence of financial details or investment requests. This essentially 