In [5]:
pip install librosa

Note: you may need to restart the kernel to use updated packages.


In [1]:
# audio_features.py
import librosa
import numpy as np

def load_audio(path, sr=16000):
    y, sr = librosa.load(path, sr=sr, mono=True)
    return y, sr

def extract_basic_features(y, sr):
    # RMS energy
    hop_length = 512
    rms = librosa.feature.rms(y=y, hop_length=hop_length)[0]
    rms_mean = float(np.mean(rms))
    rms_std = float(np.std(rms))

    # Pitch estimation using librosa.yin
    f0 = librosa.yin(y, fmin=50, fmax=400, sr=sr, frame_length=2048, hop_length=hop_length)
    # filter out unvoiced frames (nan)
    f0_valid = f0[~np.isnan(f0)]
    f0_mean = float(np.mean(f0_valid)) if len(f0_valid)>0 else 0.0
    f0_std = float(np.std(f0_valid)) if len(f0_valid)>0 else 0.0
    f0_min = float(np.min(f0_valid)) if len(f0_valid)>0 else 0.0
    f0_max = float(np.max(f0_valid)) if len(f0_valid)>0 else 0.0

    # Silence/pause detection (simple energy threshold)
    frames = librosa.util.frame(y, frame_length=2048, hop_length=hop_length)
    frame_energies = np.mean(frames**2, axis=0)
    silence_mask = frame_energies < (0.05 * np.max(frame_energies))
    # convert consecutive silent frames to pause durations
    sr_frame = hop_length / sr
    pauses = []
    i = 0
    while i < len(silence_mask):
        if silence_mask[i]:
            start = i
            while i < len(silence_mask) and silence_mask[i]:
                i += 1
            end = i
            pauses.append((end-start)*sr_frame)  # seconds
        else:
            i += 1
    num_pauses = len(pauses)
    mean_pause = float(np.mean(pauses)) if pauses else 0.0
    max_pause = float(np.max(pauses)) if pauses else 0.0

    return {
        "rms_mean": rms_mean, "rms_std": rms_std,
        "f0_mean": f0_mean, "f0_std": f0_std, "f0_min": f0_min, "f0_max": f0_max,
        "num_pauses": num_pauses, "mean_pause": mean_pause, "max_pause": max_pause
    }


In [2]:
pip install whisper

Note: you may need to restart the kernel to use updated packages.


In [3]:
pip install nt

Note: you may need to restart the kernel to use updated packages.


ERROR: Could not find a version that satisfies the requirement nt (from versions: none)
ERROR: No matching distribution found for nt


In [4]:
pip install re

Note: you may need to restart the kernel to use updated packages.


ERROR: Could not find a version that satisfies the requirement re (from versions: none)
ERROR: No matching distribution found for re


In [5]:
# transcription_and_fillers.py
import whisper
import re

model = whisper.load_model("small")  # choose model size

def transcribe(path, language=None):
    result = model.transcribe(path, language=language)
    transcript = result["text"].strip()
    # also returns timestamps in segments: result["segments"]
    return transcript, result

FILLERS = ["um","uh","you know","like","so","actually","basically","right","i mean"]

def detect_fillers(transcript):
    text = transcript.lower()
    counts = {}
    total = 0
    for f in FILLERS:
        # simple count, using word boundaries
        c = len(re.findall(r"\b"+re.escape(f)+r"\b", text))
        if c>0:
            counts[f] = c
            total += c
    return counts, total


TypeError: argument of type 'NoneType' is not iterable

In [None]:
def speech_rate_wpm(transcript, speaking_seconds):
    words = transcript.split()
    if speaking_seconds <= 0:
        return 0.0
    wpm = (len(words) / speaking_seconds) * 60.0
    return wpm

def hesitation_index(num_pauses, mean_pause, total_seconds):
    # normalized measure: (pause time fraction) + pause frequency
    if total_seconds <= 0:
        return 0.0
    pause_time = num_pauses * mean_pause
    return (pause_time / total_seconds) + (num_pauses / max(1, total_seconds/2.0))


In [None]:
from transformers import pipeline
# NOTE: there are speech emotion models; if unavailable, use textual emotion detection as fallback.
text_emotion = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", return_all_scores=True)

def emotion_from_text(transcript):
    out = text_emotion(transcript[:1000])  # limit length
    # returns list of label/prob; we can map e.g. 'joy'->enthusiasm, 'fear'->nervous, etc.
    return out


In [None]:
from sentence_transformers import SentenceTransformer, util
sbert = SentenceTransformer('all-MiniLM-L6-v2')

# archetypes: short descriptions that represent ideal content for each business category
archetypes = {
    "problem_clarity": "A clear statement of the user's problem, who is affected, and the pain points.",
    "product_differentiation": "Why this product is different from current solutions and the unique value.",
    "business_model": "How the product makes money, unit economics, pricing, or revenue streams.",
    "market_opportunity": "Size of the market, target segments, and growth potential.",
    "competition_awareness": "Mention of competitors and how the offering compares."
}

def semantic_coverage_score(transcript, archetype_key):
    sent_emb = sbert.encode(transcript, convert_to_tensor=True)
    arch_emb = sbert.encode(archetypes[archetype_key], convert_to_tensor=True)
    sim = util.cos_sim(sent_emb, arch_emb).item()  # uses entire transcript; you can split into sentences and take max
    # transform similarity (-1..1) to 0..1
    return max(0.0, min(1.0, (sim+1)/2.0))


In [None]:
You are "The Finance Shark" â€” blunt, numbers-first investor. 
Input:
- delivery_score: {delivery_score}
- business_score: {business_score}
- transcript: {transcript}
- detected_strengths: {strengths}
- detected_weaknesses: {weaknesses}

Task: In ~200-300 words, provide:
1) One-line verdict (Invest / Not Invest / Need More Info) with justification.
2) 2 short praises (what was good).
3) 3 detailed criticisms focusing on unit economics, margins, assumptions, and actionable requests (e.g., show CAC, LTV, payback).
4) A concrete final recommendation and next step.

Write in a direct, slightly skeptical voice typical of a finance investor.


In [None]:
{
  "delivery_score": 68.3,
  "business_score": 72.5,
  "pipeline1_findings": {
     "f0_mean": 145.2, "f0_std": 12.1, "num_pauses": 8, "mean_pause": 0.45,
     "fillers": {"um": 3, "like":2}, "wpm": 135
  },
  "pipeline2_findings": {
     "structure": ["Hook","Problem","Solution","Ask"],
     "problem_clarity": 7.5,
     "business_model": 6.8
  },
  "shark_panel": {
     "Visionary": "text...",
     "Finance Shark": "text...",
     "Customer Advocate":"text...",
     "Skeptic":"text..."
  },
  "final_recommendation": "Need More Info"
}
