<a href="https://colab.research.google.com/github/AmishaSingh0408/videonotetaker/blob/main/Untitled2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q yt-dlp openai-whisper transformers accelerate bitsandbytes

print("All dependencies installed!")

In [None]:
import subprocess

def download_audio(video_url: str) -> str:
    output_path = "audio.mp3"
    print(f" Downloading audio...")

    cmd = ["yt-dlp", "-x", "--audio-format", "mp3", "-o", output_path, video_url]
    subprocess.run(cmd, check=True, capture_output=True)

    print(f" Downloaded!\n")
    return output_path

print(" Downloader ready")

In [None]:
import whisper

def transcribe_audio(audio_path: str) -> dict:
    print(f" Transcribing...")

    model = whisper.load_model("base")
    result = model.transcribe(audio_path, verbose=False)

    print(f" Transcribed {result['segments'][-1]['end']:.0f}s of audio\n")
    return result

print(" Whisper ready")

In [None]:
from typing import List, Dict

def format_timestamp(start: float, end: float) -> str:
    def sec_to_time(sec):
        m, s = divmod(int(sec), 60)
        h, m = divmod(m, 60)
        return f"{h}:{m:02d}:{s:02d}" if h else f"{m}:{s:02d}"
    return f"{sec_to_time(start)} - {sec_to_time(end)}"

def chunk_transcript(segments: List[Dict], chunk_duration: int = 180) -> List[Dict]:
    """Chunk into 3-minute segments (180s)"""
    print(f"  Chunking (every {chunk_duration}s)...")

    chunks = []
    current = {"text": "", "start": 0, "segments": []}

    for seg in segments:
        if seg["end"] - current["start"] > chunk_duration and current["text"]:
            end_time = current["segments"][-1]["end"]
            chunks.append({
                "text": current["text"].strip(),
                "start": current["start"],
                "end": end_time,
                "timestamp": format_timestamp(current["start"], end_time)
            })
            current = {"text": "", "start": seg["start"], "segments": []}

        current["text"] += " " + seg["text"]
        current["segments"].append(seg)
        if not current["start"]:
            current["start"] = seg["start"]

    if current["text"]:
        end_time = current["segments"][-1]["end"]
        chunks.append({
            "text": current["text"].strip(),
            "start": current["start"],
            "end": end_time,
            "timestamp": format_timestamp(current["start"], end_time)
        })

    print(f" Created {len(chunks)} chunks\n")
    return chunks

print(" Chunker ready")

In [None]:

#  LOAD FREE LLM - Flan-T5 (No authentication needed)


from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch

print(" Loading summarization model...")

model_name = "google/flan-t5-base"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto"
)

print(" Model loaded!\n")


def generate_summary(prompt: str) -> str:
    """Generate text using T5."""
    inputs = tokenizer(prompt, return_tensors="pt", max_length=512, truncation=True).to(model.device)

    outputs = model.generate(
        **inputs,
        max_length=300,
        temperature=0.7,
        do_sample=True,
        top_p=0.9
    )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

print(" Generator ready")

In [None]:

#  GENERATE DETAILED NOTES WITH LLM


def analyze_chunk(chunk_text: str, timestamp: str) -> Dict:
    """Analyze a chunk and extract structured notes."""

    prompt = f"""Analyze this video transcript segment and create detailed study notes.

Transcript ({timestamp}):
{chunk_text}

Create structured notes in this exact format:

SUMMARY:
[Write a 2-3 sentence summary of what this segment covers]

KEY POINTS:
- [First main point or concept]
- [Second main point or concept]
- [Third main point or concept]

IMPORTANT FACTS:
- [Any specific facts, numbers, dates, names mentioned]
- [Another important fact]

ACTION ITEMS:
- [Any tasks, recommendations, or things to do]
- [Or write "None" if there are no action items]

Be specific and factual. Extract actual information from the transcript."""

    response = generate_summary(prompt, max_length=600)


    result = {
        "timestamp": timestamp,
        "summary": "",
        "key_points": [],
        "important_facts": [],
        "action_items": []
    }


    import re

    summary_match = re.search(r'SUMMARY:\s*(.+?)(?=KEY POINTS:|IMPORTANT FACTS:|ACTION ITEMS:|$)', response, re.DOTALL)
    if summary_match:
        result["summary"] = summary_match.group(1).strip()

    key_match = re.search(r'KEY POINTS:\s*(.+?)(?=IMPORTANT FACTS:|ACTION ITEMS:|$)', response, re.DOTALL)
    if key_match:
        points = [p.strip('- ').strip() for p in key_match.group(1).split('\n') if p.strip().startswith('-')]
        result["key_points"] = points

    facts_match = re.search(r'IMPORTANT FACTS:\s*(.+?)(?=ACTION ITEMS:|$)', response, re.DOTALL)
    if facts_match:
        facts = [f.strip('- ').strip() for f in facts_match.group(1).split('\n') if f.strip().startswith('-')]
        result["important_facts"] = facts

    action_match = re.search(r'ACTION ITEMS:\s*(.+?)$', response, re.DOTALL)
    if action_match:
        actions = [a.strip('- ').strip() for a in action_match.group(1).split('\n') if a.strip().startswith('-')]
        result["action_items"] = actions

    return result


def process_all_chunks(chunks: List[Dict]) -> List[Dict]:
    """Process all chunks with LLM."""
    print(f"Generating detailed notes for {len(chunks)} chunks...")
    print("   (This will take a few minutes)\n")

    results = []
    for i, chunk in enumerate(chunks, 1):
        print(f"   [{i}/{len(chunks)}] Processing {chunk['timestamp']}...")
        result = analyze_chunk(chunk["text"], chunk["timestamp"])
        results.append(result)

    print(f"\n All chunks analyzed!\n")
    return results

print(" Analyzer ready")

In [None]:

#  GENERATE DETAILED MARKDOWN NOTES


def generate_detailed_notes(results: List[Dict], video_url: str, full_transcript: str) -> str:
    """Generate comprehensive markdown notes."""

    notes = f"#  Video Study Notes\n\n"
    notes += f"**Source:** {video_url}\n\n"
    notes += f"---\n\n"


    notes += "##  Executive Summary\n\n"
    all_summaries = [r["summary"] for r in results if r["summary"]]
    if all_summaries:
        notes += " ".join(all_summaries[:3])
    notes += "\n\n---\n\n"


    notes += "##  Action Items\n\n"
    all_actions = []
    for r in results:
        for action in r["action_items"]:
            if action.lower() not in ["none", "no action items"]:
                all_actions.append(f"- **[{r['timestamp']}]** {action}")

    if all_actions:
        notes += "\n".join(all_actions)
    else:
        notes += "*No specific action items mentioned*"

    notes += "\n\n---\n\n"


    notes += "##  Detailed Notes by Timestamp\n\n"

    for r in results:
        notes += f"###  {r['timestamp']}\n\n"

        if r["summary"]:
            notes += f"**Overview:** {r['summary']}\n\n"

        if r["key_points"]:
            notes += f"**Key Concepts:**\n"
            for point in r["key_points"]:
                notes += f"- {point}\n"
            notes += "\n"

        if r["important_facts"]:
            notes += f"**Important Details:**\n"
            for fact in r["important_facts"]:
                notes += f"- {fact}\n"
            notes += "\n"

        notes += "---\n\n"


    notes += "##  Full Transcript\n\n"
    notes += f"```\n{full_transcript}\n```\n"

    return notes

print(" Note generator ready")

In [None]:

#  RUN THE FULL PIPELINE


def process_video(video_url: str):
    print("="*60)
    print(" DETAILED VIDEO NOTE TAKER")
    print("="*60)
    print()


    audio_path = download_audio(video_url)


    transcript = transcribe_audio(audio_path)
    full_text = transcript["text"]


    chunks = chunk_transcript(transcript["segments"], chunk_duration=180)


    results = process_all_chunks(chunks)


    notes = generate_detailed_notes(results, video_url, full_text)


    with open("detailed_notes.md", "w", encoding="utf-8") as f:
        f.write(notes)

    print("="*60)
    print(" COMPLETE!")
    print("="*60)

    print("\n Preview:\n")
    print(notes[:2000])

    from google.colab import files
    files.download("detailed_notes.md")

    return notes

print(" Pipeline ready!")

In [None]:
video_url = input("Enter YouTube URL: ").strip()
notes = process_video(video_url)