In [None]:
!pip install openai google-generativeai faiss-cpu sentence-transformers python-dotenv \
            chromadb fastapi uvicorn aiohttp pydantic openvino optimum[openvino] \
            transformers nltk spacy diffusers llm-guard \
             yt-dlp huggingface_hub ffmpeg-python

Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0.post1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.0 kB)
Collecting python-dotenv
  Downloading python_dotenv-1.1.1-py3-none-any.whl.metadata (24 kB)
Collecting chromadb
  Downloading chromadb-1.0.15-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.0 kB)
Collecting openvino
  Downloading openvino-2025.2.0-19140-cp311-cp311-manylinux2014_x86_64.whl.metadata (12 kB)
Collecting llm-guard
  Downloading llm_guard-0.3.16-py3-none-any.whl.metadata (10 kB)
Collecting yt-dlp
  Downloading yt_dlp-2025.7.21-py3-none-any.whl.metadata (175 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m175.4/175.4 kB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
Collecting ffmpeg-python
  Downloading ffmpeg_python-0.2.0-py3-none-any.whl.metadata (1.7 kB)
Collecting optimum[openvino]
  Downloading optimum-1.26.1-py3-none-any.whl.metadata (16 kB)
Collecting pybase64>=1.4.1 (from chromadb)
  Downloading 

In [None]:
# ✅ COMPLETE LECTURE PROCESSING PIPELINE
# Integrates: Audio Processing + Transcription + Topic Identification + Notes Generation

import os, re, tempfile, subprocess, torch, torchaudio
from pathlib import Path
from transformers import AutoProcessor, AutoTokenizer
from optimum.intel.openvino import OVModelForSeq2SeqLM, OVModelForSpeechSeq2Seq
import yt_dlp
import warnings
from transformers import WhisperProcessor, WhisperTokenizer, WhisperFeatureExtractor, WhisperTokenizerFast
from openai import OpenAI
import tiktoken
import time

# ✅ DIRECTORIES
UPLOAD_FOLDER = "uploads"
MODEL_CACHE = "model_cache"
PROCESSED_AUDIO_DIR = "processed_audio"
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
os.makedirs(MODEL_CACHE, exist_ok=True)
os.makedirs(PROCESSED_AUDIO_DIR, exist_ok=True)

# ✅ GLOBAL VARIABLES (will be initialized)
whisper_model = None
processor = None
device = None

# ✅ TRANSCRIPTION FUNCTIONS (UNCHANGED)
def initialize_whisper_model():
    global whisper_model, processor, device
    warnings.filterwarnings("ignore")

    print("🔁 Loading Whisper (OpenVINO)...")
    whisper_model_path = os.path.join(MODEL_CACHE, "whisper-small-ov")

    if not os.path.exists(whisper_model_path):
        print("Downloading and exporting model...")
        whisper_model = OVModelForSpeechSeq2Seq.from_pretrained("openai/whisper-small", export=True)
        whisper_model.save_pretrained(whisper_model_path)
        print("Whisper Small OpenVINO model saved!")
    else:
        print("Loading existing Whisper Small OpenVINO model...")
        whisper_model = OVModelForSpeechSeq2Seq.from_pretrained(whisper_model_path)

    device = "cuda" if torch.cuda.is_available() else "cpu"

    # Initialize processor components
    tokenizer = WhisperTokenizerFast.from_pretrained("openai/whisper-small")
    feature_extractor = WhisperFeatureExtractor.from_pretrained("openai/whisper-small")
    processor = WhisperProcessor(
        tokenizer=WhisperTokenizer.from_pretrained("openai/whisper-small"),
        feature_extractor=feature_extractor
    )

    print("✅ Model and processor loaded successfully")
    return True

class AudioPreprocessor:
    def __init__(self, output_dir="processed_audio"):
        self.output_dir = Path(output_dir)
        self.output_dir.mkdir(exist_ok=True)

    def is_youtube_url(self, input_path):
        youtube_patterns = [
            r'(?:https?://)?(?:www\.)?youtube\.com/watch\?v=[\w-]+',
            r'(?:https?://)?(?:www\.)?youtu\.be/[\w-]+'
        ]
        return any(re.match(pattern, input_path) for pattern in youtube_patterns)

    def download_youtube_audio(self, url, temp_dir):
        output_path = os.path.join(temp_dir, "%(title)s.%(ext)s")
        ydl_opts = {
            'format': 'bestaudio/best',
            'outtmpl': output_path,
            'quiet': True,
            'no_warnings': True,
        }
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            info = ydl.extract_info(url, download=True)
            title = re.sub(r'[^\w\s-]', '', info.get('title', 'audio')).strip()
            for file in os.listdir(temp_dir):
                if file.endswith(('.wav', '.webm', '.m4a', '.mp3', '.opus')):
                    return os.path.join(temp_dir, file), title
        return None, None

    def preprocess_audio(self, input_file, output_file):
        cmd = [
            'ffmpeg', '-i', input_file,
            '-af', 'highpass=f=200,lowpass=f=3000,afftdn=nf=-25',
            '-ar', '16000', '-ac', '1',
            '-c:a', 'pcm_s16le', '-y', output_file
        ]
        result = subprocess.run(cmd, capture_output=True, text=True)
        if result.returncode != 0:
            raise Exception(f"FFmpeg error: {result.stderr}")

    def process(self, input_path, output_filename=None):
        with tempfile.TemporaryDirectory() as temp_dir:
            if self.is_youtube_url(input_path):
                print("🎬 Downloading from YouTube...")
                audio_file, title = self.download_youtube_audio(input_path, temp_dir)
                if not audio_file:
                    raise Exception("Failed to download YouTube audio")
                if not output_filename:
                    output_filename = f"{title}.wav"
            else:
                print("📂 Processing local file...")
                if not os.path.exists(input_path):
                    raise FileNotFoundError(f"File not found: {input_path}")
                audio_file = input_path
                if not output_filename:
                    base_name = Path(input_path).stem
                    output_filename = f"{base_name}_processed.wav"

            if not output_filename.endswith('.wav'):
                output_filename += '.wav'

            output_path = self.output_dir / output_filename
            print("🎧 Preprocessing audio...")
            self.preprocess_audio(audio_file, str(output_path))
            print(f"✅ Processed audio: {output_path}")
            return str(output_path)

def transcribe_audio(audio_path):
    """Main transcription function"""
    global whisper_model, processor

    if whisper_model is None or processor is None:
        raise Exception("Model not initialized. Call initialize_whisper_model() first.")

    print(f"📝 Loading audio from: {audio_path}")

    try:
        # Load and preprocess audio
        waveform, sr = torchaudio.load(audio_path)
        print(f"Original sample rate: {sr}, channels: {waveform.shape[0]}")

        # Convert to mono if stereo
        if waveform.shape[0] > 1:
            waveform = waveform.mean(dim=0, keepdim=True)

        # Resample to 16kHz if needed
        if sr != 16000:
            waveform = torchaudio.transforms.Resample(sr, 16000)(waveform)

        # Prepare inputs for the model
        audio_array = waveform.squeeze().numpy()
        print(f"Audio duration: {len(audio_array)/16000:.2f} seconds")

        # Process audio in chunks if it's too long
        chunk_length = 30 * 16000  # 30 seconds
        transcripts = []

        for i in range(0, len(audio_array), chunk_length):
            chunk = audio_array[i:i + chunk_length]
            inputs = processor(chunk, sampling_rate=16000, return_tensors="pt")

            print(f"🤖 Processing chunk {i//chunk_length + 1}...")

            # Generate transcription with optimized parameters
            with torch.no_grad():
                predicted_ids = whisper_model.generate(
                    inputs["input_features"],
                    max_new_tokens=444,
                    do_sample=False,
                    temperature=0.0,
                    return_dict_in_generate=False,
                    use_cache=True
                )

            # Decode the transcription
            chunk_transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
            if chunk_transcription.strip():
                transcripts.append(chunk_transcription.strip())

        # Combine all transcripts
        full_transcript = " ".join(transcripts)

        if not full_transcript.strip():
            return "No speech detected in the audio."

        return full_transcript.strip()

    except Exception as e:
        print(f"Error during transcription: {e}")
        import traceback
        traceback.print_exc()
        return f"Transcription failed: {str(e)}"

def test_transcription_setup():
    """Test if the model and processor are properly set up"""
    global whisper_model, processor

    try:
        # Create dummy audio (1 second of silence at 16kHz)
        dummy_audio = torch.zeros(16000)
        # Process with the processor
        inputs = processor(
            dummy_audio.numpy(),
            sampling_rate=16000,
            return_tensors="pt"
        )
        # Test generation
        with torch.no_grad():
            predicted_ids = whisper_model.generate(
                inputs["input_features"],
                max_length=10,  # Short for testing
                num_beams=1,
                do_sample=False,
            )
        # Decode
        transcription = processor.batch_decode(
            predicted_ids,
            skip_special_tokens=True
        )[0]
        print("✅ Transcription test successful!")
        print(f"Test result: '{transcription}' (should be empty/minimal for silence)")
        return True
    except Exception as e:
        print(f"❌ Transcription test failed: {e}")
        return False

def complete_transcription(input_source, output_filename=None, save_transcript=True):
    """Complete end-to-end transcription function"""
    global whisper_model, processor

    # Initialize model if not already done
    if whisper_model is None or processor is None:
        print("🔧 Initializing Whisper model...")
        initialize_whisper_model()
        test_transcription_setup()

    # Initialize audio preprocessor
    pre = AudioPreprocessor(output_dir=PROCESSED_AUDIO_DIR)

    try:
        # Process audio
        print("🎧 Processing audio...")
        processed_path = pre.process(input_source, output_filename)

        # Transcribe
        print("📝 Starting transcription...")
        transcript = transcribe_audio(processed_path)

        print("\n" + "="*50)
        print("✅ FINAL TRANSCRIPT:")
        print("="*50)
        print(transcript)
        print("="*50)

        result = {
            'transcript': transcript,
            'processed_audio_path': processed_path,
            'transcript_path': None
        }

        # Save transcript if requested
        if save_transcript:
            save_path = processed_path.replace(".wav", "_transcript.txt")
            with open(save_path, "w", encoding="utf-8") as f:
                f.write(transcript)
            print(f"\n💾 Transcript saved at: {save_path}")
            result['transcript_path'] = save_path

        return result

    except Exception as e:
        print(f"❌ ERROR: {e}")
        import traceback
        traceback.print_exc()
        raise

# ✅ NOTES GENERATION FUNCTIONS (UNCHANGED)
def count_tokens(text, model='openai/gpt-4.1'):
    """Count tokens in text using tiktoken"""
    try:
        encoding = tiktoken.encoding_for_model(model)
        return len(encoding.encode(text))
    except:
        # Fallback approximation: ~4 characters per token
        return len(text) // 4

def chunk_transcript(transcript_text, topics_text, max_tokens=3000):
    """
    Chunk transcript intelligently while keeping topics intact
    max_tokens is set lower than 4k to leave room for the system prompt
    """
    # Split transcript into sentences for better chunking
    sentences = transcript_text.replace('\n', ' ').split('. ')

    chunks = []
    current_chunk = ""
    topics_tokens = count_tokens(topics_text)

    # Reserve tokens for system prompt (estimated ~1000 tokens)
    available_tokens = max_tokens - topics_tokens - 1000

    for sentence in sentences:
        sentence = sentence.strip() + '. '
        sentence_tokens = count_tokens(sentence)
        current_chunk_tokens = count_tokens(current_chunk)

        if current_chunk_tokens + sentence_tokens < available_tokens:
            current_chunk += sentence
        else:
            if current_chunk:
                chunks.append(current_chunk.strip())
            current_chunk = sentence

    # Add the last chunk if it exists
    if current_chunk:
        chunks.append(current_chunk.strip())

    return chunks

def chunk_text(text, chunk_size=7500):
    """Split text into chunks of given size."""
    return [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]

def identify_topics_from_transcript(transcript_path,
                                  token="your_token_here",
                                  endpoint="https://models.github.ai/inference",
                                  model="meta/Llama-4-Scout-17B-16E-Instruct",
                                  output_file="identified_topics.txt"):
    """
    ✅ ENHANCED TOPIC IDENTIFICATION FUNCTION
    Automatically identifies topics from transcript and returns the topics file path
    """
    print("🔍 IDENTIFYING TOPICS FROM TRANSCRIPT...")
    print("-"*40)

    # Read the transcript file
    with open(transcript_path, "r", encoding="utf-8") as f:
        transcript_text = f.read()

    # Chunk the transcript
    chunks = chunk_text(transcript_text, chunk_size=7500)
    print(f"📄 Transcript split into {len(chunks)} chunks for topic identification")

    # Set up OpenAI client
    client = OpenAI(
        base_url=endpoint,
        api_key=token,
    )

    # Collect identified topics
    all_topics = []

    for i, chunk in enumerate(chunks):
        print(f"🔍 Identifying topics in chunk {i+1}/{len(chunks)}...")
        response = client.chat.completions.create(
            messages=[
                {
                    "role": "system",
                    "content": (
                        "You are an expert academic assistant. Identify the key topics, concepts, and subjects covered in this transcript chunk. "
                        "Focus on:\n"
                        "- Main concepts and theories discussed\n"
                        "- Technical terms and definitions\n"
                        "- Mathematical formulas or equations mentioned\n"
                        "- Practical applications or examples\n"
                        "- Key learning objectives\n\n"
                        "Format your response as a clear list of topics with brief descriptions. "
                        "Do not hallucinate or include irrelevant topics. Only identify what is actually discussed in the text.\n\n"
                        f"TRANSCRIPT CHUNK:\n{chunk}"
                    ),
                }
            ],
            temperature=0.3,
            top_p=1.0,
            model=model,
            max_tokens=1000
        )

        result = response.choices[0].message.content.strip()
        all_topics.append(f"### Chunk {i+1} Topics:\n{result}\n")
        print(f"✅ Topics identified for chunk {i+1}")

    # Combine all topics and create final consolidated list
    print("🔗 Consolidating and organizing topics...")

    consolidation_prompt = f"""You are an expert academic assistant. You have been provided with topic lists from different chunks of the same lecture transcript.

Your task is to:
1. Consolidate and organize all topics into a coherent structure
2. Remove duplicate or overlapping topics
3. Group related topics together
4. Create a hierarchical organization of main topics and subtopics
5. Ensure the final list covers all important concepts from the lecture

Here are the individual chunk topics to consolidate:

{chr(10).join(all_topics)}

Create a final organized list of key topics that should be covered in detailed lecture notes. Format as a clear, structured list."""

    consolidation_response = client.chat.completions.create(
        messages=[
            {
                "role": "system",
                "content": consolidation_prompt,
            }
        ],
        temperature=0.2,
        model=model,
        max_tokens=1500
    )

    consolidated_topics = consolidation_response.choices[0].message.content.strip()

    # Save consolidated topics to file
    output_path = os.path.join(os.path.dirname(transcript_path), output_file)
    with open(output_path, "w", encoding="utf-8") as f:
        f.write("# IDENTIFIED KEY TOPICS FOR LECTURE NOTES\n\n")
        f.write("## Individual Chunk Analysis:\n")
        f.write("\n".join(all_topics))
        f.write("\n" + "="*50 + "\n")
        f.write("## CONSOLIDATED FINAL TOPICS:\n")
        f.write(consolidated_topics)

    print(f"✅ Topics identification completed!")
    print(f"📋 Topics saved to: {output_path}")
    print(f"🔢 Processed {len(chunks)} chunks")

    return output_path

def calculate_optimal_chunking(transcript_text, topics_text, max_tokens_per_call=7500, max_calls_per_day=50):
    """
    Calculate optimal chunking strategy with fixed 8-10 chunks regardless of content length
    """
    # Base system prompt tokens
    base_system_tokens = 4000  # Reduced for safety
    topics_tokens = count_tokens(topics_text)
    transcript_tokens = count_tokens(transcript_text)

    # Fixed chunk strategy: always use 8-10 chunks
    # Choose based on transcript length for optimal distribution
    if transcript_tokens < 5000:
        optimal_chunks = 8
    elif transcript_tokens < 15000:
        optimal_chunks = 9
    else:
        optimal_chunks = 10

    # System overhead (prompt + topics)
    system_overhead = base_system_tokens + topics_tokens

    # Safe token limits with buffer
    max_safe_input = 5000  # Safe input limit per chunk
    max_safe_output = 4000  # Safe output limit per chunk

    # Ensure we don't exceed model limits
    total_per_call = system_overhead + max_safe_input + max_safe_output
    if total_per_call > max_tokens_per_call:
        # Adjust if still too large
        max_safe_input = 6000
        max_safe_output = 4700

    return {
        'optimal_chunks': optimal_chunks,
        'max_input_tokens_per_chunk': max_safe_input,
        'max_output_tokens_per_chunk': max_safe_output,
        'total_transcript_tokens': transcript_tokens,
        'system_overhead_tokens': system_overhead,
        'estimated_total_calls': optimal_chunks + 3
    }

def smart_chunk_transcript(transcript_text, max_tokens_per_chunk):
    """
    Intelligently chunk transcript into exactly the specified number of chunks
    """
    # Calculate total tokens and determine chunk count from the chunking strategy
    total_tokens = count_tokens(transcript_text)

    # Split by sentences for better coherence
    sentences = []
    current_sentence = ""

    # Simple sentence splitting
    for char in transcript_text:
        current_sentence += char
        if char in '.!?' and len(current_sentence.strip()) > 20:
            sentences.append(current_sentence.strip())
            current_sentence = ""

    # Add remaining text
    if current_sentence.strip():
        sentences.append(current_sentence.strip())

    # If we have very few sentences, split by paragraphs or fixed length
    if len(sentences) < 8:
        # Fallback: split into fixed-size chunks
        words = transcript_text.split()
        chunk_size = len(words) // 8  # Aim for 8 chunks minimum
        sentences = []
        for i in range(0, len(words), chunk_size):
            chunk_words = words[i:i + chunk_size]
            sentences.append(' '.join(chunk_words))

    # Now distribute sentences across target number of chunks
    target_chunks = 8 if total_tokens < 10000 else (9 if total_tokens < 20000 else 10)
    sentences_per_chunk = max(1, len(sentences) // target_chunks)

    chunks = []
    current_chunk = ""
    sentence_count = 0

    for sentence in sentences:
        current_chunk += sentence + " "
        sentence_count += 1

        # Check if we should close this chunk
        chunk_tokens = count_tokens(current_chunk)

        if (sentence_count >= sentences_per_chunk and chunk_tokens > 500) or chunk_tokens >= max_tokens_per_chunk:
            if current_chunk.strip():
                chunks.append(current_chunk.strip())
                current_chunk = ""
                sentence_count = 0

    # Add remaining content
    if current_chunk.strip():
        chunks.append(current_chunk.strip())

    # Ensure we have the right number of chunks by merging small ones
    while len(chunks) > target_chunks:
        # Find the smallest chunk and merge it with the next one
        smallest_idx = min(range(len(chunks)), key=lambda i: len(chunks[i]))
        if smallest_idx < len(chunks) - 1:
            chunks[smallest_idx] += " " + chunks[smallest_idx + 1]
            chunks.pop(smallest_idx + 1)
        else:
            chunks[smallest_idx - 1] += " " + chunks[smallest_idx]
            chunks.pop(smallest_idx)

    return chunks

def generate_detailed_notes(transcription_result, topics_path, output_dir=None,
                          token="your_token_here", endpoint="https://models.github.ai/inference",
                          model="openai/gpt-4.1"):
    """Generate detailed exam-ready notes with improved completeness and code examples."""

    # Extract transcript path from result
    transcript_path = transcription_result['transcript_path']

    if not transcript_path or not os.path.exists(transcript_path):
        raise FileNotFoundError(f"Transcript file not found: {transcript_path}")

    if not os.path.exists(topics_path):
        raise FileNotFoundError(f"Topics file not found: {topics_path}")

    # Set output directory
    if output_dir is None:
        output_dir = os.path.dirname(transcript_path)

    os.makedirs(output_dir, exist_ok=True)

    print(f"📚 Generating notes from transcript: {transcript_path}")
    print(f"📋 Using topics from: {topics_path}")
    print(f"📁 Output directory: {output_dir}")

    # Read transcript and topics
    with open(transcript_path, "r", encoding="utf-8") as f:
        transcript_text = f.read()

    with open(topics_path, "r", encoding="utf-8") as f:
        identified_topics = f.read()

    # Calculate optimal chunking strategy
    chunking_strategy = calculate_optimal_chunking(transcript_text, identified_topics)

    print(f"📊 ENHANCED TOKEN MANAGEMENT:")
    print(f"   Total transcript tokens: {chunking_strategy['total_transcript_tokens']}")
    print(f"   Optimal chunks: {chunking_strategy['optimal_chunks']}")
    print(f"   Max input tokens per chunk: {chunking_strategy['max_input_tokens_per_chunk']}")
    print(f"   Max output tokens per chunk: {chunking_strategy['max_output_tokens_per_chunk']}")
    print(f"   Estimated total API calls: {chunking_strategy['estimated_total_calls']}")

    # OpenAI configuration
    client = OpenAI(
        base_url=endpoint,
        api_key=token,
    )

    # Smart chunk the transcript
    transcript_chunks = smart_chunk_transcript(
        transcript_text,
        chunking_strategy['max_input_tokens_per_chunk']
    )

    print(f"📄 Transcript split into {len(transcript_chunks)} chunks")

    # ENHANCED base system prompt with code emphasis and completion instructions
    base_system_prompt = f"""You are an expert academic assistant and technical writer specializing in computer science, machine learning, and technical subjects. Your task is to create DETAILED, COMPREHENSIVE, EXAM-READY notes that enable deep understanding and successful exam performance.

**IDENTIFIED KEY TOPICS TO FOCUS ON:**
{identified_topics}

**CRITICAL REQUIREMENTS FOR DETAILED NOTES:**

1. **COMPLETE COVERAGE**: You MUST cover ALL concepts mentioned in the transcript chunk thoroughly. Do NOT stop abruptly or leave topics incomplete. Ensure every important concept gets full explanation.

2. **CODE EXAMPLES MANDATORY**: For any computer science, programming, machine learning, or technical topics:
   - Provide complete, working code examples
   - Include step-by-step code explanations
   - Show multiple implementation approaches when relevant
   - Add comments explaining each line of complex code
   - Include practical examples that students can run
   - Demonstrate best practices and common patterns
   - Show error handling and edge cases

3. **DEPTH OVER BREVITY**: Create detailed explanations, not summaries. Each concept should be explained thoroughly enough that a student can understand it completely from your notes alone.

4. **EXAM-READY CONTENT**: Structure content so students can:
   - Answer conceptual questions confidently
   - Solve mathematical problems step-by-step
   - Write and explain code implementations
   - Compare and contrast different approaches
   - Apply concepts to new scenarios

5. **DUAL-LEVEL EXPLANATIONS**: For every concept, provide:
   - **Intuitive/Primitive Explanation**: Simple, everyday language with analogies
   - **Technical/Mathematical Explanation**: Precise definitions, formulas, mathematical rigor
   - **Implementation Details**: Code examples and practical applications
   - **Bridge Between All Levels**: Show how intuitive connects to technical to implementation

6. **COMPREHENSIVE EXAMPLES**: Include:
   - Step-by-step worked examples with detailed explanations
   - Multiple examples per concept (simple → complex)
   - Common exam-style problems and solutions
   - Real-world applications with specific implementations
   - Code examples with line-by-line explanations
   - Algorithm implementations with complexity analysis

7. **MATHEMATICAL AND ALGORITHMIC RIGOR**: For all formulas, algorithms, and code:
   - Derive from first principles when possible
   - Explain each variable, parameter, and function
   - Show step-by-step mathematical manipulations
   - Provide time/space complexity analysis
   - Include common variations and optimizations
   - Show pseudocode AND actual implementation

8. **STRUCTURED LEARNING FLOW**: For each topic covered in this chunk:
   - **Introduction**: What is it and why does it matter?
   - **Intuitive Understanding**: Simple explanation with analogies
   - **Technical Details**: Mathematical formulations, algorithms, data structures
   - **Code Implementation**: Complete examples with explanations
   - **Detailed Examples**: Multiple worked examples with code
   - **Applications**: Real-world uses with specific implementations
   - **Performance Analysis**: Complexity, trade-offs, optimizations
   - **Common Pitfalls**: What students often get wrong (including coding mistakes)
   - **Exam Tips**: Key points to remember for tests

9. **INDUSTRY AND PRACTICAL CONTEXT**: Add relevant details about:
   - How concepts are implemented in practice
   - Popular libraries, frameworks, and tools
   - Performance considerations and benchmarks
   - Current research directions and limitations
   - Production-level considerations

10. **COMPLETION REQUIREMENT**: You must continue writing until ALL topics in the chunk are thoroughly covered. Do not stop mid-explanation or leave concepts incomplete. If approaching token limits, prioritize completing current explanations over starting new ones.

**CRITICAL FORMATTING REQUIREMENTS:**

11. **MATHEMATICAL EQUATIONS - MANDATORY FORMAT**: ALL mathematical equations, formulas, expressions, and symbols MUST be formatted using LaTeX block equations with double dollar signs. This is NON-NEGOTIABLE:
    - ✅ CORRECT: Use `$$equation$$` on separate lines for ALL math
    - ❌ NEVER use: `$equation$`, `[equation]`, or raw LaTeX without delimiters
    - Apply this to: variables with subscripts/superscripts, fractions, summations, integrals, matrices, vectors, ALL mathematical notation which are grouped together, not for solo equations

12. **RESPONSE LENGTH AND COMPLETION MANAGEMENT**:
    - You have approximately 4000-6000 tokens available for detailed responses
    - Plan your response to ensure completion of all topics within this limit
    - If you have many topics to cover, provide essential details for each rather than incomplete deep-dives
    - Always end with a complete thought, never cut off mid-sentence or mid-concept
    - If you must choose between breadth and depth due to length constraints, prioritize covering all topics with good depth rather than leaving topics completely uncovered
    - Monitor your response length and adjust detail level accordingly while maintaining quality

13. **RESPONSE COMPLETION STRATEGIES**:
    - Begin each topic with a brief overview to gauge required space
    - For lengthy topics, focus on the most exam-relevant aspects first
    - Always provide at least one complete code example per programming concept
    - Ensure each major topic has: definition, example, and key takeaway
    - If running short on space, summarize remaining topics with promise of coverage in subsequent chunks
    - Never end abruptly without proper conclusion for the current topic

**IMPORTANT**: This is chunk {{chunk_num}} of {len(transcript_chunks)} parts of a larger lecture. Focus only on the topics/concepts present in this specific chunk, but maintain the same detailed format and depth. You MUST complete ALL concepts mentioned in this chunk before finishing your response. Plan your response length to ensure completion while maintaining the required detail and proper LaTeX formatting for all mathematical content."""

    # Process each chunk with enhanced parameters
    all_notes = []
    chunk_summaries = []

    for i, chunk in enumerate(transcript_chunks):
        print(f"🔄 Processing chunk {i+1}/{len(transcript_chunks)}... (Tokens: {count_tokens(chunk)})")

        # Create chunk-specific prompt
        chunk_prompt = base_system_prompt.replace("{chunk_num}", str(i+1))
        full_prompt = f"{chunk_prompt}\n\n**TRANSCRIPT CHUNK:**\n{chunk}"

        # Calculate token limits with better utilization
        input_tokens = count_tokens(full_prompt)
        max_output = chunking_strategy['max_output_tokens_per_chunk']

        # Ensure we're using close to the full limit
        if input_tokens + max_output > 7700:  # Reduced safety buffer to use more tokens
            max_output = max(4000, 7700 - input_tokens)  # Minimum 1000 tokens for output
            print(f"   Adjusted max output to: {max_output}")

        print(f"   Input tokens: {input_tokens}, Max output: {max_output}")

        try:
            response = client.chat.completions.create(
                messages=[
                    {
                        "role": "system",
                        "content": full_prompt,
                    }
                ],
                temperature=0.2,  # Slightly lower for more consistent code generation
                top_p=1.0,
                model=model,
                max_tokens=max_output
            )

            chunk_notes = response.choices[0].message.content
            all_notes.append(f"## Chunk {i+1} Content\n\n{chunk_notes}")

            # Extract a brief summary for integration
            chunk_summaries.append(f"Chunk {i+1}: {chunk[:200]}...")

            print(f"✅ Chunk {i+1} completed ({count_tokens(chunk_notes)} tokens generated)")

            # Small delay to avoid rate limiting
            time.sleep(2)

        except Exception as e:
            print(f"❌ Error processing chunk {i+1}: {e}")
            # Add placeholder to maintain structure
            all_notes.append(f"## Chunk {i+1} Content\n\n[Error processing this chunk: {str(e)}]")

    # Create integrated final notes with enhanced integration
    print("🔗 Creating integrated final notes...")

    # Combine notes for integration, but manage size intelligently
    combined_notes = "\n\n".join(all_notes)
    combined_tokens = count_tokens(combined_notes)

    # Enhanced integration strategy
    if combined_tokens > 7000:  # Increased threshold for better integration
        print("📝 Large content detected - creating comprehensive integration...")
        # Create more detailed summaries of each chunk
        summary_notes = []
        for i, note in enumerate(all_notes):
            # Take first 800 characters and key points
            summary = note[:4000] + "..." if len(note) > 4000 else note
            summary_notes.append(summary)
        combined_notes = "\n\n".join(summary_notes)

    # Enhanced integration prompt
    integration_prompt = f"""Create comprehensive integrated lecture notes from the following chunk notes.

**IDENTIFIED TOPICS:**
{identified_topics}

**ENHANCED INTEGRATION TASK:**
1. Combine all chunks into coherent, comprehensive lecture notes
2. Ensure all code examples are complete and properly formatted
3. Remove redundancy while maintaining completeness
4. Add connections between concepts and cross-references
5. Include comprehensive overview and detailed summary sections
6. For technical subjects, ensure all algorithms and implementations are complete
7. Add a complete code examples section if applicable
8. Create proper sections and subsections for easy navigation
9. Ensure mathematical formulas and derivations are complete
10. Add practical applications and real-world examples

**CHUNK NOTES TO INTEGRATE:**
{combined_notes}

Create final integrated notes with proper structure, comprehensive coverage, and all code examples complete. Do not truncate or abbreviate - provide complete, exam-ready notes."""

    try:
        # Increased token limit for integration
        integration_tokens = count_tokens(integration_prompt)
        max_integration_output = min(4000, 8000 - integration_tokens - 200)  # Increased integration output

        final_response = client.chat.completions.create(
            messages=[
                {
                    "role": "system",
                    "content": integration_prompt,
                }
            ],
            temperature=0.2,
            model=model,
            max_tokens=max_integration_output
        )

        final_notes = final_response.choices[0].message.content
    except Exception as e:
        print(f"⚠️  Integration failed: {e}")
        # Enhanced fallback: combine with better formatting
        final_notes = f"# Comprehensive Lecture Notes\n\n{chr(10).join(all_notes)}\n\n## Summary\nThis document contains detailed notes covering all topics from the lecture transcript."

    # Save all outputs with enhanced formatting
    chunks_output_path = os.path.join(output_dir, "individual_chunk_notes.md")
    with open(chunks_output_path, "w", encoding="utf-8") as f:
        f.write("# Individual Chunk Notes\n\n")
        f.write("*This file contains detailed notes for each processed chunk*\n\n")
        f.write("\n\n---\n\n".join(all_notes))

    final_output_path = os.path.join(output_dir, "detailed_exam_ready_notes.md")
    with open(final_output_path, "w", encoding="utf-8") as f:
        f.write("# Complete Exam-Ready Lecture Notes\n\n")
        f.write("*Generated from lecture transcript with comprehensive coverage*\n\n")
        f.write(final_notes)

    print(f"📝 Individual chunk notes saved to: {chunks_output_path}")
    print(f"📖 Final integrated notes saved to: {final_output_path}")

    # Generate enhanced study guide
    print("📚 Generating comprehensive study guide...")

    # Enhanced study guide prompt
    study_guide_prompt = f"""Based on these key topics and the comprehensive notes generated, create a detailed study guide:

**KEY TOPICS:**
{identified_topics}

**ENHANCED STUDY GUIDE REQUIREMENTS:**
- Key formulas, algorithms, and code snippets for quick reference
- Important concepts with brief explanations
- Code templates and common patterns
- Quick reference format but with sufficient detail
- Include complexity analysis for algorithms
- Common interview questions and exam topics
- Under 2000 words but comprehensive

Focus on exam-ready and interview-ready content that students can quickly review before tests."""

    try:
        study_guide_response = client.chat.completions.create(
            messages=[
                {
                    "role": "system",
                    "content": study_guide_prompt
                }
            ],
            temperature=0.2,
            model=model,
            max_tokens=3000  # Increased for more comprehensive study guide
        )

        study_guide_content = study_guide_response.choices[0].message.content
    except Exception as e:
        print(f"⚠️  Study guide generation failed: {e}")
        study_guide_content = f"# Comprehensive Study Guide\n\nBased on topics:\n{identified_topics}\n\n[Study guide generation failed, but key topics are listed above]"

    # Save enhanced study guide
    study_guide_path = os.path.join(output_dir, "comprehensive_study_guide.md")
    with open(study_guide_path, "w", encoding="utf-8") as f:
        f.write("# Comprehensive Study Guide\n\n")
        f.write("*Quick reference for exam preparation and interview practice*\n\n")
        f.write(study_guide_content)

    print(f"📋 Comprehensive study guide saved to: {study_guide_path}")
    print(f"\n🎉 Enhanced processing complete! Generated {len(transcript_chunks)} chunk notes + integrated final notes + comprehensive study guide")
    print(f"📊 Token utilization: Used approximately {chunking_strategy['estimated_total_calls']} API calls with enhanced token limits")

    # Return paths for further use
    return {
        'individual_chunks_path': chunks_output_path,
        'final_notes_path': final_output_path,
        'study_guide_path': study_guide_path,
        'chunks_processed': len(transcript_chunks),
        'output_directory': output_dir,
        'token_strategy': chunking_strategy
    }
def complete_notes_generation(transcription_result, topics_path,
                            token="your_token_here",
                            endpoint="https://models.github.ai/inference",
                            model="openai/gpt-4.1",
                            output_dir=None):
    """Complete end-to-end notes generation function."""
    try:
        result = generate_detailed_notes(
            transcription_result=transcription_result,
            topics_path=topics_path,
            output_dir=output_dir,
            token=token,
            endpoint=endpoint,
            model=model
        )
        return result
    except Exception as e:
        print(f"❌ Notes generation failed: {e}")
        import traceback
        traceback.print_exc()
        raise

# ✅ COMPLETE PIPELINE FUNCTION - FIXED VERSION
def process_lecture_complete_pipeline(input_source,
                                    openai_token="your_token_here",
                                    openai_endpoint="https://models.github.ai/inference",
                                    openai_model="openai/gpt-4.1",
                                    topics_model="meta/Llama-4-Scout-17B-16E-Instruct",
                                    output_dir=None,
                                    custom_filename=None):
    """
    🎯 COMPLETE LECTURE PROCESSING PIPELINE - FULLY AUTOMATED

    This function handles the entire process:
    1. Audio processing (YouTube download or local file)
    2. Audio transcription with Whisper
    3. Automatic topic identification from transcript
    4. Detailed notes generation with AI
    5. Study guide creation

    Args:
        input_source: YouTube URL or local audio/video file path
        openai_token: Your OpenAI API token
        openai_endpoint: API endpoint URL
        openai_model: Model to use for notes generation
        topics_model: Model to use for topic identification
        output_dir: Custom output directory (optional)
        custom_filename: Custom filename for processed audio (optional)

    Returns:
        dict: Complete results with all file paths and processing info
    """

    print("🚀 STARTING COMPLETE LECTURE PROCESSING PIPELINE")
    print("="*60)

    try:
        # Step 1: Audio Processing & Transcription
        print("📱 STEP 1: PROCESSING AUDIO & TRANSCRIPTION")
        print("-"*40)

        transcription_result = complete_transcription(
            input_source=input_source,
            output_filename=custom_filename,
            save_transcript=True
        )

        print(f"✅ Transcription completed!")
        print(f"📝 Transcript: {transcription_result['transcript_path']}")
        print(f"🎵 Processed audio: {transcription_result['processed_audio_path']}")

        print(f"😴 Sleeping for 15 seconds...")
        time.sleep(15)

        # Step 2: Automatic Topic Identification
        print("\n🧠 STEP 2: IDENTIFYING KEY TOPICS FROM TRANSCRIPT")
        print("-"*40)

        # Generate topics file name based on transcript location
        transcript_dir = os.path.dirname(transcription_result['transcript_path'])
        identified_topics_path = os.path.join(transcript_dir, "identified_topics.txt")

        identify_topics_result_path = identify_topics_from_transcript(
            transcription_result['transcript_path'],
            token=openai_token, # Pass the token here
            endpoint=openai_endpoint, # Pass the endpoint here
            model=topics_model, # Pass the topics model here
            output_file="identified_topics.txt"
        )
        topics_path = "/content/processed_audio/identified_topics.txt" # Update topics_path here
        print(f"✅ Topics identified and saved to: {topics_path}")

        print(f"😴 Sleeping for 15 seconds...")
        time.sleep(15)

        # Step 3: Notes Generation
        print("\n📚 STEP 3: GENERATING DETAILED NOTES")
        print("-"*40)

        notes_result = complete_notes_generation(
            transcription_result=transcription_result,
            topics_path=topics_path, # Use the potentially new topics_path
            token=openai_token,
            endpoint=openai_endpoint,
            model=openai_model,
            output_dir=output_dir
        )

        print(f"✅ Notes generation completed!")

        print(f"😴 Sleeping for 15 seconds...")
        time.sleep(15)
        # Step 4: Combine Results
        final_result = {
            # Transcription outputs
            'transcript': transcription_result['transcript'],
            'transcript_path': transcription_result['transcript_path'],
            'processed_audio_path': transcription_result['processed_audio_path'],

            # Notes outputs
            'individual_chunks_path': notes_result['individual_chunks_path'],
            'final_notes_path': notes_result['final_notes_path'],
            'study_guide_path': notes_result['study_guide_path'],
            'chunks_processed': notes_result['chunks_processed'],
            'output_directory': notes_result['output_directory'],

            # Pipeline info
            'input_source': input_source,
            'topics_used': topics_path, # Use the potentially new topics_path
            'pipeline_success': True
        }

        # Final Summary
        print("\n" + "="*60)
        print("🎉 PIPELINE COMPLETED SUCCESSFULLY!")
        print("="*60)
        print(f"📁 Output Directory: {final_result['output_directory']}")
        print(f"📝 Transcript: {final_result['transcript_path']}")
        print(f"📖 Final Notes: {final_result['final_notes_path']}")
        print(f"📋 Study Guide: {final_result['study_guide_path']}")
        print(f"📊 Individual Chunks: {final_result['individual_chunks_path']}")
        print(f"🎵 Processed Audio: {final_result['processed_audio_path']}")
        print(f"🔢 Chunks Processed: {final_result['chunks_processed']}")
        print("="*60)

        return final_result

    except Exception as e:
        print(f"❌ PIPELINE FAILED: {e}")
        import traceback
        traceback.print_exc()

        # Return error result
        return {
            'pipeline_success': False,
            'error': str(e),
            'input_source': input_source,
            'topics_used': topics_path if 'topics_path' in locals() else None
        }

# ✅ SIMPLE USAGE FUNCTION - FIXED VERSION
def process_lecture(youtube_url_or_file_path, your_openai_token="your_token_here"):
    """
    🎯 SIMPLE ONE-LINE LECTURE PROCESSOR - FULLY AUTOMATED TOPIC IDENTIFICATION

    Usage:
        result = process_lecture(
            "https://youtube.com/watch?v=...",
            "your_openai_token"
        )
    """
    return process_lecture_complete_pipeline(
        input_source=youtube_url_or_file_path,
        openai_token=your_openai_token,
        openai_endpoint="https://models.github.ai/inference",
        openai_model="openai/gpt-4.1",
        topics_model="meta/Llama-4-Scout-17B-16E-Instruct" # Default topics model
    )

# ✅ MAIN EXECUTION - FIXED VERSION
if __name__ == "__main__":
    print("🎓 LECTURE PROCESSING PIPELINE")
    print("="*50)

    # Get inputs
    input_source = input("🔤 Enter YouTube URL or local file path: ").strip()
    openai_token = input("🔑 Enter your OpenAI token: ").strip()

    # Process lecture
    result = process_lecture_complete_pipeline(
        input_source=input_source,
        openai_token=openai_token,
        openai_endpoint="https://models.github.ai/inference", # Explicitly pass endpoint
        openai_model="openai/gpt-4.1", # Explicitly pass notes model
        topics_model="meta/Llama-4-Scout-17B-16E-Instruct" # Explicitly pass topics model
    )

    if result['pipeline_success']:
        print("\n🎉 SUCCESS! All files are ready for studying.")
        print(f"📚 Check your notes at: {result['final_notes_path']}")
    else:
        print(f"\n❌ FAILED: {result['error']}")

No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda'
Multiple distributions found for package optimum. Picked distribution: optimum-intel


🎓 LECTURE PROCESSING PIPELINE
🔤 Enter YouTube URL or local file path: https://www.youtube.com/watch?v=P6FORpg0KVo
🔑 Enter your OpenAI token: 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
🚀 STARTING COMPLETE LECTURE PROCESSING PIPELINE
📱 STEP 1: PROCESSING AUDIO & TRANSCRIPTION
----------------------------------------
🔧 Initializing Whisper model...
🔁 Loading Whisper (OpenVINO)...
Downloading and exporting model...


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/967M [00:00<?, ?B/s]

generation_config.json: 0.00B [00:00, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

normalizer.json: 0.00B [00:00, ?B/s]

added_tokens.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

preprocessor_config.json: 0.00B [00:00, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.


Whisper Small OpenVINO model saved!


Due to a bug fix in https://github.com/huggingface/transformers/pull/28687 transcription using a multilingual Whisper will default to language detection followed by transcription instead of translation to English.This might be a breaking change for your use case. If you want to instead always translate your audio to English, make sure to pass `language='en'`.


✅ Model and processor loaded successfully


`generation_config` default values have been modified to match model-specific defaults: {'suppress_tokens': [1, 2, 7, 8, 9, 10, 14, 25, 26, 27, 28, 29, 31, 58, 59, 60, 61, 62, 63, 90, 91, 92, 93, 359, 503, 522, 542, 873, 893, 902, 918, 922, 931, 1350, 1853, 1982, 2460, 2627, 3246, 3253, 3268, 3536, 3846, 3961, 4183, 4667, 6585, 6647, 7273, 9061, 9383, 10428, 10929, 11938, 12033, 12331, 12562, 13793, 14157, 14635, 15265, 15618, 16553, 16604, 18362, 18956, 20075, 21675, 22520, 26130, 26161, 26435, 28279, 29464, 31650, 32302, 32470, 36865, 42863, 47425, 49870, 50254, 50258, 50360, 50361, 50362], 'begin_suppress_tokens': [220, 50257], 'forced_decoder_ids': [[1, None], [2, 50359]]}. If this is not desired, please set these values explicitly.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
A custom logits processo

✅ Transcription test successful!
Test result: ' you' (should be empty/minimal for silence)
🎧 Processing audio...
🎬 Downloading from YouTube...
🎧 Preprocessing audio...
✅ Processed audio: processed_audio/How to Make Learning as Addictive as Social Media  Duolingos Luis Von Ahn  TED.wav
📝 Starting transcription...
📝 Loading audio from: processed_audio/How to Make Learning as Addictive as Social Media  Duolingos Luis Von Ahn  TED.wav
Original sample rate: 16000, channels: 1
Audio duration: 774.11 seconds
🤖 Processing chunk 1...
🤖 Processing chunk 2...
🤖 Processing chunk 3...
🤖 Processing chunk 4...
🤖 Processing chunk 5...
🤖 Processing chunk 6...
🤖 Processing chunk 7...
🤖 Processing chunk 8...
🤖 Processing chunk 9...
🤖 Processing chunk 10...
🤖 Processing chunk 11...
🤖 Processing chunk 12...
🤖 Processing chunk 13...
🤖 Processing chunk 14...
🤖 Processing chunk 15...
🤖 Processing chunk 16...
🤖 Processing chunk 17...
🤖 Processing chunk 18...
🤖 Processing chunk 19...
🤖 Processing chunk 20...
🤖 

In [None]:
# RAG Agent for Notes App - Part 2: Gemini Integration and Complete Pipeline
# Improved version with bug fixes and better architecture


import os
import json
import asyncio
from typing import List, Dict, Any, Optional, Tuple, Union
from dataclasses import dataclass, asdict
from datetime import datetime
import logging
from pathlib import Path

# HTTP and API
import aiohttp
import requests
from fastapi import FastAPI, HTTPException, Query, BackgroundTasks
from pydantic import BaseModel, Field
import uvicorn

# Google Gemini
import google.generativeai as genai
from google.generativeai.types import HarmCategory, HarmBlockThreshold

# Advanced retrieval
from sentence_transformers import CrossEncoder
import numpy as np

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# FIXED: Proper base classes that should be imported from Part 1
# These would normally be imported, but included here for completeness
@dataclass
class DocumentChunk:
    content: str
    metadata: Dict[str, Any]
    chunk_id: str
    source_file: str
    chunk_index: int
    token_count: int

class DocumentProcessor:
    def load_markdown_file(self, file_path: str) -> str:
        """Load markdown file with proper error handling"""
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                return f.read()
        except FileNotFoundError:
            logger.error(f"File not found: {file_path}")
            return ""
        except Exception as e:
            logger.error(f"Error loading file {file_path}: {e}")
            return ""

    def create_chunks(self, text: str, source_file: str, file_type: str,
                     chunk_size: int = 1000, chunk_overlap: int = 200) -> List[DocumentChunk]:
        """Create overlapping chunks for better context preservation"""
        if not text.strip():
            return []

        chunks = []
        words = text.split()

        for i in range(0, len(words), chunk_size - chunk_overlap):
            chunk_words = words[i:i + chunk_size]
            chunk_text = ' '.join(chunk_words)

            if chunk_text.strip():
                chunk_id = f"{Path(source_file).stem}_{i // (chunk_size - chunk_overlap)}"
                chunk = DocumentChunk(
                    content=chunk_text.strip(),
                    metadata={
                        "source_file": source_file,
                        "file_type": file_type,
                        "created_at": datetime.now().isoformat()
                    },
                    chunk_id=chunk_id,
                    source_file=source_file,
                    chunk_index=i // (chunk_size - chunk_overlap),
                    token_count=len(chunk_words)
                )
                chunks.append(chunk)

        return chunks

class VectorDatabase:
    """Mock vector database - replace with actual ChromaDB in production"""

    def __init__(self, db_path: str = "./chroma_db", collection_name: str = "notes_collection"):
        self.db_path = Path(db_path)
        self.collection_name = collection_name
        self.documents = []  # In-memory storage for demo
        self.db_path.mkdir(exist_ok=True)
        logger.info(f"Initialized VectorDatabase at {db_path}")

    def add_documents(self, chunks: List[DocumentChunk]) -> None:
        """Add documents to the database"""
        try:
            for chunk in chunks:
                # In a real implementation, you'd generate embeddings here
                doc_dict = {
                    'content': chunk.content,
                    'metadata': chunk.metadata,
                    'chunk_id': chunk.chunk_id,
                    'similarity_score': 0.0  # Placeholder
                }
                self.documents.append(doc_dict)

            logger.info(f"Added {len(chunks)} documents to database")
        except Exception as e:
            logger.error(f"Error adding documents: {e}")
            raise

    def search(self, query: str, n_results: int = 5,
              file_type_filter: Optional[str] = None) -> List[Dict[str, Any]]:
        """Search documents (mock implementation)"""
        try:
            # Filter by file type if specified
            filtered_docs = self.documents
            if file_type_filter:
                filtered_docs = [
                    doc for doc in self.documents
                    if doc['metadata'].get('file_type') == file_type_filter
                ]

            # Mock similarity scoring based on keyword matching
            query_words = set(query.lower().split())
            for doc in filtered_docs:
                content_words = set(doc['content'].lower().split())
                similarity = len(query_words.intersection(content_words)) / max(len(query_words), 1)
                doc['similarity_score'] = similarity

            # Sort by similarity and return top results
            sorted_docs = sorted(filtered_docs, key=lambda x: x['similarity_score'], reverse=True)
            return sorted_docs[:n_results]

        except Exception as e:
            logger.error(f"Error searching database: {e}")
            return []

    def get_collection_stats(self) -> Dict[str, Any]:
        """Get database statistics"""
        return {
            "total_chunks": len(self.documents),
            "collection_name": self.collection_name,
            "db_path": str(self.db_path),
            "embedding_model": "mock_embeddings"
        }

class NotesRAGSystem:
    """Enhanced RAG system with better error handling"""

    def __init__(self, notes_directory: str = "/content/processed_audio", db_path: str = "./chroma_db"):
        self.notes_directory = Path(notes_directory)
        self.db_path = db_path
        self.doc_processor = DocumentProcessor()
        self.vector_db = VectorDatabase(db_path=db_path)

        # FIXED: More descriptive file type mapping
        self.file_types = {
            "individual": "main_notes",
            "comprehensive": "study_guide",
            "detailed": "quick_summary",
            "exam": "quick_summary"
        }

    def setup_directories(self):
        """Create necessary directories"""
        try:
            self.notes_directory.mkdir(exist_ok=True)
            Path(self.db_path).mkdir(exist_ok=True)
            logger.info("Directories setup complete")
        except Exception as e:
            logger.error(f"Error setting up directories: {e}")
            raise

    def ingest_documents(self, file_paths: List[str]) -> Dict[str, Any]:
        """Ingest documents with detailed feedback"""
        results = {
            "successful": 0,
            "failed": 0,
            "total_chunks": 0,
            "errors": []
        }

        all_chunks = []

        for file_path in file_paths:
            try:
                content = self.doc_processor.load_markdown_file(file_path)
                if not content:
                    results["failed"] += 1
                    results["errors"].append(f"Empty or missing file: {file_path}")
                    continue

                # Determine file type
                file_name = Path(file_path).stem.lower()
                file_type = "unknown"
                for key, value in self.file_types.items():
                    if key in file_name:
                        file_type = value
                        break

                chunks = self.doc_processor.create_chunks(content, file_path, file_type)
                all_chunks.extend(chunks)
                results["successful"] += 1
                results["total_chunks"] += len(chunks)

                logger.info(f"Processed {file_path}: {len(chunks)} chunks")

            except Exception as e:
                results["failed"] += 1
                results["errors"].append(f"Error processing {file_path}: {str(e)}")
                logger.error(f"Error processing {file_path}: {e}")

        # Add all chunks to database
        if all_chunks:
            try:
                self.vector_db.add_documents(all_chunks)
            except Exception as e:
                results["errors"].append(f"Database error: {str(e)}")
                logger.error(f"Database ingestion error: {e}")

        return results

    def search_notes(self, query: str, n_results: int = 5,
                    file_type: Optional[str] = None) -> List[Dict[str, Any]]:
        """Search notes with validation"""
        if not query.strip():
            logger.warning("Empty query provided")
            return []

        return self.vector_db.search(query, n_results, file_type)

    def get_system_stats(self) -> Dict[str, Any]:
        """Get comprehensive system statistics"""
        stats = self.vector_db.get_collection_stats()
        stats.update({
            "notes_directory": str(self.notes_directory),
            "supported_file_types": list(self.file_types.values()),
            "file_type_mapping": self.file_types
        })
        return stats

@dataclass
class RAGResponse:
    """Structured RAG response with validation"""
    answer: str
    sources: List[Dict[str, Any]]
    confidence_score: float
    query: str
    response_time: float
    metadata: Dict[str, Any]

class GeminiConfig:
    """Configuration for Gemini API with validation"""

    def __init__(self, api_key: str, model: str = "gemini-1.5-flash-latest"):
        if not api_key:
            raise ValueError("API key is required")

        self.api_key = api_key
        self.model = model
        self.temperature = 0.3
        self.max_output_tokens = 2048
        self.top_p = 0.8
        self.top_k = 40

class AdvancedRetriever:
    """Enhanced retrieval with better error handling"""

    def __init__(self, cross_encoder_model: str = "cross-encoder/ms-marco-MiniLM-L-6-v2"):
        self.has_reranker = False
        try:
            self.cross_encoder = CrossEncoder(cross_encoder_model)
            self.has_reranker = True
            logger.info(f"Loaded cross-encoder: {cross_encoder_model}")
        except Exception as e:
            logger.warning(f"Could not load cross-encoder: {e}. Using basic retrieval.")

    def expand_query(self, query: str) -> List[str]:
        """Generate query variations for better retrieval"""
        if not query.strip():
            return [query]

        expansions = [query]
        query_lower = query.lower()

        # Add common academic variations
        if "how" in query_lower:
            expansions.append(query.replace("how to", "method for"))
            expansions.append(query.replace("how", "what is the process of"))

        if "what" in query_lower:
            expansions.append(query.replace("what is", "definition of"))
            expansions.append(query.replace("what", "explain"))

        # Add context-specific terms
        academic_terms = ["study", "learn", "understand", "concept", "method", "approach"]
        for term in academic_terms:
            if term not in query_lower and len(expansions) < 4:
                expansions.append(f"{query} {term}")

        return expansions[:3]  # Limit to avoid too many queries

    def rerank_results(self, query: str, results: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
        """Rerank results using cross-encoder with error handling"""
        if not self.has_reranker or len(results) <= 1:
            return results

        try:
            # Prepare pairs for reranking
            pairs = [(query, result['content'][:512]) for result in results]  # Limit content length

            # Get reranking scores
            scores = self.cross_encoder.predict(pairs)

            # Add rerank scores to results
            for i, result in enumerate(results):
                result['rerank_score'] = float(scores[i])

            # Sort by rerank score
            reranked = sorted(results, key=lambda x: x.get('rerank_score', 0), reverse=True)

            logger.info(f"Reranked {len(results)} results")
            return reranked

        except Exception as e:
            logger.error(f"Reranking failed: {e}")
            return results

    def filter_results_by_relevance(self, results: List[Dict[str, Any]],
                                  min_similarity: float = 0.1) -> List[Dict[str, Any]]:
        """Filter out low-relevance results"""
        filtered = [r for r in results if r.get('similarity_score', 0) >= min_similarity]
        logger.info(f"Filtered {len(results)} -> {len(filtered)} results (min_sim: {min_similarity})")
        return filtered

class GeminiRAGGenerator:
    """Handles response generation using Gemini with better error handling"""

    def __init__(self, config: GeminiConfig):
        self.config = config

        try:
            # Configure Gemini
            genai.configure(api_key=config.api_key)

            # Initialize model
            self.model = genai.GenerativeModel(
                model_name=config.model,
                generation_config=genai.types.GenerationConfig(
                    temperature=config.temperature,
                    max_output_tokens=config.max_output_tokens,
                    top_p=config.top_p,
                    top_k=config.top_k,
                ),
                safety_settings={
                    HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
                    HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
                    HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
                    HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
                }
            )

            logger.info(f"Initialized Gemini model: {config.model}")

        except Exception as e:
            logger.error(f"Failed to initialize Gemini: {e}")
            raise

    def create_context(self, results: List[Dict[str, Any]], max_context_length: int = 4000) -> str:
        """Create context from retrieved documents with length control"""
        if not results:
            return "No relevant context found."

        context_parts = []
        current_length = 0

        for i, result in enumerate(results):
            source_info = f"[Source {i+1}: {result['metadata'].get('file_type', 'unknown')} - {Path(result['metadata'].get('source_file', 'unknown')).name}]"
            content = result['content']

            # Estimate token count (rough approximation: 1 token ≈ 0.75 words)
            estimated_tokens = len(content.split()) * 0.75

            if current_length + estimated_tokens > max_context_length:
                # Truncate content if needed
                remaining_tokens = max_context_length - current_length
                if remaining_tokens > 50:  # Only add if there's meaningful space
                    words_to_take = int(remaining_tokens / 0.75)
                    content = ' '.join(content.split()[:words_to_take]) + "..."
                else:
                    break

            context_parts.append(f"{source_info}\n{content}\n")
            current_length += estimated_tokens

        return "\n---\n".join(context_parts)

    def create_prompt(self, query: str, context: str, conversation_history: Optional[List[Dict]] = None) -> str:
        """Create the prompt for Gemini"""

        system_prompt = """You are an AI study assistant helping students with their notes. You have access to three types of study materials:

1. **Main Notes**: Comprehensive individual study notes
2. **Study Guide**: Guidelines on how to use the notes effectively
3. **Quick Summary**: Condensed exam-ready notes

Instructions:
- Provide accurate, helpful answers based ONLY on the provided context
- If the context doesn't contain enough information, say so clearly
- Reference specific sources when possible using [Source X] notation
- Use clear, student-friendly language
- Structure your response with headings and bullet points when helpful
- If asked about study methods, reference the study guide material
- For quick reviews, prioritize the summary material
- Do not make up information not present in the context"""

        # Add conversation history if provided
        history_text = ""
        if conversation_history:
            history_text = "\n\nRecent conversation context:\n"
            for turn in conversation_history[-2:]:  # Last 2 turns to avoid too much context
                history_text += f"Q: {turn.get('question', '')[:100]}...\nA: {turn.get('answer', '')[:150]}...\n"

        prompt = f"""{system_prompt}

Context from your study materials:
{context}
{history_text}

Student Question: {query}

Please provide a comprehensive answer based on the context above:"""

        return prompt

    async def generate_response(self, prompt: str) -> str:
        """Generate response using Gemini with timeout and error handling"""
        try:
            # Use asyncio.wait_for to add timeout
            response = await asyncio.wait_for(
                asyncio.to_thread(self.model.generate_content, prompt),
                timeout=30.0  # 30 second timeout
            )

            if not response or not response.text:
                raise Exception("Empty response from Gemini")

            return response.text

        except asyncio.TimeoutError:
            logger.error("Gemini API timeout")
            raise Exception("Response generation timed out")
        except Exception as e:
            logger.error(f"Error generating response: {e}")
            raise Exception(f"Failed to generate response: {str(e)}")

    def calculate_confidence(self, query: str, results: List[Dict[str, Any]], response: str) -> float:
        """Calculate confidence score for the response"""
        if not results or not response:
            return 0.0

        try:
            # Factors for confidence calculation
            avg_similarity = np.mean([r.get('similarity_score', 0) for r in results])
            num_sources = len(results)
            response_length = len(response.split())

            # Check if response indicates uncertainty
            uncertainty_phrases = ["don't know", "not sure", "unclear", "cannot find", "no information"]
            has_uncertainty = any(phrase in response.lower() for phrase in uncertainty_phrases)

            # Simple confidence calculation
            confidence = (
                avg_similarity * 0.4 +  # Similarity weight
                min(num_sources / 3, 1.0) * 0.3 +  # Source diversity weight
                min(response_length / 100, 1.0) * 0.3  # Response completeness weight
            )

            # Reduce confidence if uncertainty is detected
            if has_uncertainty:
                confidence *= 0.6

            return min(max(confidence, 0.0), 1.0)  # Clamp between 0 and 1

        except Exception as e:
            logger.error(f"Error calculating confidence: {e}")
            return 0.5  # Default moderate confidence

class ComprehensiveRAGPipeline:
    """Complete RAG pipeline with enhanced error handling and monitoring"""

    def __init__(self,
                 gemini_api_key: str,
                 notes_directory: str = "/content/processed_audio",
                 db_path: str = "./chroma_db"):

        # Validate inputs
        if not gemini_api_key:
            raise ValueError("Gemini API key is required")

        # Initialize components with error handling
        try:
            self.rag_system = NotesRAGSystem(notes_directory, db_path)
            self.retriever = AdvancedRetriever()

            # Initialize Gemini
            gemini_config = GeminiConfig(gemini_api_key)
            self.generator = GeminiRAGGenerator(gemini_config)

            # Conversation history with size limit
            self.conversation_history = []
            self.max_history_size = 10

            logger.info("Initialized Complete RAG Pipeline")

        except Exception as e:
            logger.error(f"Failed to initialize RAG pipeline: {e}")
            raise

    async def query(self,
                   question: str,
                   file_type_filter: Optional[str] = None,
                   n_results: int = 5,
                   use_history: bool = True) -> RAGResponse:
        """Complete RAG query pipeline with comprehensive error handling"""

        start_time = datetime.now()

        # Validate input
        if not question or not question.strip():
            raise ValueError("Question cannot be empty")

        try:
            # Step 1: Query expansion
            expanded_queries = self.retriever.expand_query(question)
            logger.info(f"Expanded queries: {len(expanded_queries)}")

            # Step 2: Retrieve documents for each expanded query
            all_results = []
            for query in expanded_queries:
                try:
                    results = self.rag_system.search_notes(
                        query,
                        n_results=n_results * 2,  # Get more results for filtering
                        file_type=file_type_filter
                    )
                    all_results.extend(results)
                except Exception as e:
                    logger.warning(f"Search failed for query '{query}': {e}")

            # Step 3: Remove duplicates by chunk_id
            seen_ids = set()
            unique_results = []
            for result in all_results:
                chunk_id = result.get('chunk_id', '')
                if chunk_id and chunk_id not in seen_ids:
                    unique_results.append(result)
                    seen_ids.add(chunk_id)

            logger.info(f"Retrieved {len(unique_results)} unique results")

            # Step 4: Filter by relevance
            filtered_results = self.retriever.filter_results_by_relevance(unique_results)

            # Step 5: Rerank results
            reranked_results = self.retriever.rerank_results(question, filtered_results)

            # Take top results after reranking
            final_results = reranked_results[:n_results]

            # Step 6: Create context
            context = self.generator.create_context(final_results)

            # Step 7: Generate prompt
            history = self.conversation_history if use_history else None
            prompt = self.generator.create_prompt(question, context, history)

            # Step 8: Generate response
            response_text = await self.generator.generate_response(prompt)

            # Step 9: Calculate confidence
            confidence = self.generator.calculate_confidence(question, final_results, response_text)

            # Step 10: Create response object
            response_time = (datetime.now() - start_time).total_seconds()

            rag_response = RAGResponse(
                answer=response_text,
                sources=[{
                    'content': r['content'][:200] + "..." if len(r['content']) > 200 else r['content'],
                    'source_file': r['metadata'].get('source_file', 'unknown'),
                    'file_type': r['metadata'].get('file_type', 'unknown'),
                    'similarity_score': r.get('similarity_score', 0.0),
                    'rerank_score': r.get('rerank_score', 0.0)
                } for r in final_results],
                confidence_score=confidence,
                query=question,
                response_time=response_time,
                metadata={
                    'num_results_retrieved': len(unique_results),
                    'num_results_filtered': len(filtered_results),
                    'num_results_used': len(final_results),
                    'context_length': len(context),
                    'expanded_queries': expanded_queries,
                    'has_reranker': self.retriever.has_reranker
                }
            )

            # Step 11: Update conversation history
            if use_history:
                self.conversation_history.append({
                    'question': question,
                    'answer': response_text[:300],  # Truncated for memory efficiency
                    'timestamp': datetime.now().isoformat(),
                    'confidence': confidence
                })

                # Keep only recent conversations
                if len(self.conversation_history) > self.max_history_size:
                    self.conversation_history = self.conversation_history[-self.max_history_size:]

            logger.info(f"Query completed in {response_time:.2f}s with confidence {confidence:.3f}")
            return rag_response

        except Exception as e:
            logger.error(f"Error in RAG pipeline: {e}")
            # Return error response instead of raising
            response_time = (datetime.now() - start_time).total_seconds()
            return RAGResponse(
                answer=f"I apologize, but I encountered an error while processing your question: {str(e)}",
                sources=[],
                confidence_score=0.0,
                query=question,
                response_time=response_time,
                metadata={"error": str(e)}
            )

    def clear_history(self):
        """Clear conversation history"""
        self.conversation_history = []
        logger.info("Conversation history cleared")

    def get_pipeline_status(self) -> Dict[str, Any]:
        """Get pipeline status information"""
        return {
            "status": "active",
            "conversation_history_size": len(self.conversation_history),
            "has_reranker": self.retriever.has_reranker,
            "gemini_model": self.generator.config.model,
            "system_stats": self.rag_system.get_system_stats()
        }

# FastAPI Application with improved error handling
app = FastAPI(
    title="Notes RAG API",
    version="2.0.0",
    description="Enhanced RAG system for educational notes with Gemini integration"
)

# Global RAG pipeline instance
rag_pipeline: Optional[ComprehensiveRAGPipeline] = None

# Enhanced utility functions and CLI interface

def validate_environment():
    """Validate that all required dependencies are available"""
    missing_deps = []

    try:
        import google.generativeai
    except ImportError:
        missing_deps.append("google-generativeai")

    try:
        import sentence_transformers
    except ImportError:
        missing_deps.append("sentence-transformers")

    try:
        import fastapi
    except ImportError:
        missing_deps.append("fastapi")

    try:
        import uvicorn
    except ImportError:
        missing_deps.append("uvicorn")

    if missing_deps:
        logger.error(f"Missing dependencies: {missing_deps}")
        return False, missing_deps

    return True, []

async def run_cli_interface():
    """Interactive CLI interface for testing the RAG system"""
    print("\n🤖 Notes RAG System - Interactive CLI")
    print("=" * 50)

    # Check environment
    valid_env, missing_deps = validate_environment()
    if not valid_env:
        print(f"❌ Missing dependencies: {missing_deps}")
        print("Please install them with: pip install " + " ".join(missing_deps))
        return

    # Get API key
    api_key = os.getenv("GEMINI_API_KEY")
    if not api_key:
        try:
            from google.colab import userdata
            api_key = userdata.get('GEMINI_API_KEY')
        except:
            pass

    if not api_key:
        print("❌ GEMINI_API_KEY not found!")
        print("Please set it as an environment variable or in Colab secrets.")
        return

    try:
        # Initialize pipeline
        print("🔄 Initializing RAG pipeline...")
        pipeline = ComprehensiveRAGPipeline(api_key)
        pipeline.rag_system.setup_directories()
        print("✅ Pipeline initialized successfully!")

        # After this line: pipeline.rag_system.setup_directories()
# Add this:
        import glob
        audio_files = glob.glob("/content/processed_audio/*.md")
        if audio_files:
            print(f"🔄 Found {len(audio_files)} files, ingesting...")
            results = pipeline.rag_system.ingest_documents(audio_files)
            print(f"✅ Ingested {results['successful']} files with {results['total_chunks']} chunks")

        # Check if there are any documents
        stats = pipeline.get_pipeline_status()
        total_docs = stats['system_stats']['total_chunks']

        if total_docs == 0:
            print("\n⚠️  No documents found in the database.")
            print("You can add documents using the /ingest endpoint or by placing files in the /content/processed_audio directory")

            # Offer to create sample documents
            create_sample = input("\nWould you like to create sample documents for testing? (y/n): ").lower().strip()
            if create_sample == 'y':
                await create_sample_documents(pipeline)
        else:
            print(f"📚 Found {total_docs} document chunks in the database")

        # Interactive query loop
        print("\n💬 You can now ask questions about your notes!")
        print("Commands:")
        print("  'quit' or 'exit' - Exit the CLI")
        print("  'stats' - Show system statistics")
        print("  'clear' - Clear conversation history")
        print("  'help' - Show this help message")
        print("-" * 50)

        while True:
            try:
                question = input("\n🤔 Your question: ").strip()

                if not question:
                    continue

                if question.lower() in ['quit', 'exit']:
                    print("👋 Goodbye!")
                    break

                if question.lower() == 'stats':
                    stats = pipeline.get_pipeline_status()
                    print("\n📊 System Statistics:")
                    print(f"  - Documents: {stats['system_stats']['total_chunks']} chunks")
                    print(f"  - Conversation history: {stats['conversation_history_size']} entries")
                    print(f"  - Reranker available: {stats['has_reranker']}")
                    print(f"  - Model: {stats['gemini_model']}")
                    continue

                if question.lower() == 'clear':
                    pipeline.clear_history()
                    print("🗑️ Conversation history cleared!")
                    continue

                if question.lower() == 'help':
                    print("\n💡 Help:")
                    print("Ask questions about your study notes, and I'll search through them to provide answers.")
                    print("You can ask about specific topics, request explanations, or get study guidance.")
                    print("Examples:")
                    print("  - 'What is the main concept in chapter 5?'")
                    print("  - 'How should I study for the exam?'")
                    print("  - 'Explain the difference between X and Y'")
                    continue

                # Process the question
                print("🔍 Searching and generating response...")
                start_time = datetime.now()

                response = await pipeline.query(question)

                # Display response
                print(f"\n🤖 Answer (confidence: {response.confidence_score:.2f}):")
                print("-" * 30)
                print(response.answer)

                # Show sources if available
                if response.sources:
                    print(f"\n📖 Sources ({len(response.sources)}):")
                    for i, source in enumerate(response.sources, 1):
                        file_name = Path(source['source_file']).name
                        print(f"  {i}. {file_name} ({source['file_type']}) - Score: {source['similarity_score']:.3f}")

                print(f"\n⏱️ Response time: {response.response_time:.2f}s")

            except KeyboardInterrupt:
                print("\n\n👋 Interrupted by user. Goodbye!")
                break
            except Exception as e:
                print(f"\n❌ Error: {e}")
                print("Please try again with a different question.")

    except Exception as e:
        print(f"❌ Failed to initialize pipeline: {e}")

async def create_sample_documents(pipeline: ComprehensiveRAGPipeline):
    """Create sample documents for testing"""
    print("📝 Creating sample documents...")

    # Create notes directory
    notes_dir = Path("/content/processed_audio")
    notes_dir.mkdir(exist_ok=True)

    # Sample documents
    sample_docs = {
        "individual_chunk_notes_physics.md": """# Physics Study Notes

## Newton's Laws of Motion

### First Law (Law of Inertia)
An object at rest stays at rest and an object in motion stays in motion with the same speed and in the same direction unless acted upon by an unbalanced force.

### Second Law
The acceleration of an object is directly proportional to the net force acting on it and inversely proportional to its mass.
F = ma

### Third Law
For every action, there is an equal and opposite reaction.

## Energy and Work

### Kinetic Energy
KE = 1/2 * m * v²
The energy possessed by an object due to its motion.

### Potential Energy
PE = mgh
The energy possessed by an object due to its position.

### Conservation of Energy
Energy cannot be created or destroyed, only transformed from one form to another.
""",

        "comprehensive_study_guide.md": """# Study Guide: How to Use These Physics Notes

## Effective Study Strategies

### 1. Active Reading
- Don't just read passively
- Take notes while reading
- Ask questions about the material
- Relate new concepts to what you already know

### 2. Practice Problems
- Work through example problems step by step
- Try variations of problems
- Focus on understanding the process, not memorizing solutions

### 3. Concept Mapping
- Create visual connections between related topics
- Use diagrams to represent relationships
- Connect mathematical formulas to physical concepts

## Review Schedule

### Daily Review (15 minutes)
- Review notes from today's class
- Identify any unclear concepts
- Write down questions for next class

### Weekly Review (1 hour)
- Go through all notes from the week
- Practice key formulas
- Complete practice problems

### Before Exams
- Create summary sheets of key concepts
- Practice old exam questions
- Form study groups for discussion
""",

        "detailed_exam_ready_notes.md": """# Physics Quick Reference - Exam Ready

## Key Formulas

**Newton's Second Law:** F = ma
**Kinetic Energy:** KE = ½mv²
**Potential Energy:** PE = mgh
**Work:** W = Fd cos θ
**Power:** P = W/t

## Important Constants
- g = 9.8 m/s² (acceleration due to gravity)
- c = 3 × 10⁸ m/s (speed of light)

## Problem-Solving Strategy
1. Identify what's given and what you need to find
2. Choose the appropriate formula
3. Substitute values and solve
4. Check units and reasonableness of answer

## Common Mistakes to Avoid
- Forgetting to convert units
- Using wrong signs for vectors
- Not considering direction in vector problems
- Mixing up kinetic and potential energy

## Exam Tips
- Read questions carefully
- Show all work
- Include units in final answers
- Double-check calculations
"""
    }

    # Write sample files
    created_files = []
    for filename, content in sample_docs.items():
        file_path = notes_dir / filename
        try:
            with open(file_path, 'w', encoding='utf-8') as f:
                f.write(content)
            created_files.append(str(file_path))
            print(f"  ✅ Created: {filename}")
        except Exception as e:
            print(f"  ❌ Failed to create {filename}: {e}")

    # Ingest the documents
    if created_files:
        try:
            print("🔄 Ingesting sample documents...")
            results = pipeline.rag_system.ingest_documents(created_files)
            print(f"✅ Successfully ingested {results['successful']} documents with {results['total_chunks']} chunks")

            if results['errors']:
                print("⚠️ Some errors occurred:")
                for error in results['errors']:
                    print(f"  - {error}")

        except Exception as e:
            print(f"❌ Failed to ingest documents: {e}")

def run_api_server(host: str = "0.0.0.0", port: int = 8000):
    """Run the FastAPI server with proper configuration"""
    print(f"🚀 Starting Notes RAG API server on {host}:{port}")
    print("📖 API Documentation will be available at:")
    print(f"   - Swagger UI: http://{host}:{port}/docs")
    print(f"   - ReDoc: http://{host}:{port}/redoc")

    try:
        # For Jupyter/Colab environments
        try:
            import nest_asyncio
            nest_asyncio.apply()
            print("✅ Applied nest_asyncio patch for Jupyter/Colab")
        except ImportError:
            print("ℹ️ nest_asyncio not available (not needed for standalone execution)")

        # Configure uvicorn
        config = uvicorn.Config(
            app=app,
            host=host,
            port=port,
            log_level="info",
            reload=False  # Disable reload for stability
        )

        server = uvicorn.Server(config)
        server.run()

    except Exception as e:
        print(f"❌ Failed to start server: {e}")
        print("💡 Try using a different port or check if the port is already in use")

# Main execution functions
async def main_cli():
    """Main CLI entry point"""
    await run_cli_interface()

def main_api():
    """Main API entry point"""
    run_api_server()

# Example usage and testing
if __name__ == "__main__":
    import sys

    if len(sys.argv) > 1 and sys.argv[1] == "api":
        # Run API server
        main_api()
    else:
        # Run CLI interface
        try:
            asyncio.run(main_cli())
        except RuntimeError as e:
            if "asyncio.run() cannot be called from a running event loop" in str(e):
                print("🔄 Detected running event loop (Jupyter/Colab), starting CLI directly...")
                # For Jupyter/Colab environments
                import nest_asyncio
                nest_asyncio.apply()
                loop = asyncio.get_event_loop()
                loop.run_until_complete(main_cli())
            else:
                raise

# Convenience functions for Jupyter/Colab usage
def start_api():
    """Convenience function to start API server in Jupyter/Colab"""
    main_api()

def start_cli():
    """Convenience function to start CLI in Jupyter/Colab"""
    try:
        asyncio.run(main_cli())
    except RuntimeError:
        # Handle nested event loop in Jupyter/Colab
        import nest_asyncio
        nest_asyncio.apply()
        loop = asyncio.get_event_loop()
        loop.run_until_complete(main_cli())

# Quick test function
async def quick_test():
    """Quick test function to verify the system works"""
    print("🧪 Running quick test...")

    api_key = os.getenv("GEMINI_API_KEY")
    if not api_key:
        try:
            from google.colab import userdata
            api_key = userdata.get('GEMINI_API_KEY')
        except:
            pass

    if not api_key:
        print("❌ GEMINI_API_KEY not found!")
        return False

    try:
        # Initialize minimal pipeline
        pipeline = ComprehensiveRAGPipeline(api_key)

        # Create a test document
        test_content = "The capital of France is Paris. Paris is known for the Eiffel Tower."
        chunks = [DocumentChunk(
            content=test_content,
            metadata={"source_file": "test.md", "file_type": "main_notes"},
            chunk_id="test_1",
            source_file="test.md",
            chunk_index=0,
            token_count=12
        )]

        pipeline.rag_system.vector_db.add_documents(chunks)

        # Test query
        response = await pipeline.query("What is the capital of France?")

        print(f"✅ Test completed successfully!")
        print(f"   Answer: {response.answer[:100]}...")
        print(f"   Confidence: {response.confidence_score:.3f}")
        print(f"   Response time: {response.response_time:.2f}s")

        return True

    except Exception as e:
        print(f"❌ Test failed: {e}")
        return False

# Usage instructions
print("""
📚 Notes RAG System - Enhanced Version

🚀 Quick Start:
  1. Set your GEMINI_API_KEY environment variable
  2. Run one of these commands:

CLI Interface:
  python script.py
  # OR in Jupyter/Colab:
  start_cli()

API Server:
  python script.py api
  # OR in Jupyter/Colab:
  start_api()

Quick Test:
  # In Jupyter/Colab:
  await quick_test()

📖 For full documentation, visit the API docs at /docs when running the server.
""")#d Pydantic models
class QueryRequest(BaseModel):
    question: str = Field(..., min_length=1, max_length=1000, description="The question to ask")
    file_type_filter: Optional[str] = Field(None, description="Filter by file type: main_notes, study_guide, or quick_summary")
    n_results: int = Field(5, ge=1, le=20, description="Number of results to retrieve")
    use_history: bool = Field(True, description="Whether to use conversation history")

class QueryResponse(BaseModel):
    answer: str
    sources: List[Dict[str, Any]]
    confidence_score: float
    query: str
    response_time: float
    metadata: Dict[str, Any]

class IngestRequest(BaseModel):
    file_paths: List[str] = Field(..., min_items=1, description="List of file paths to ingest")

@app.on_event("startup")
async def startup_event():
    """Initialize RAG pipeline on startup with better error handling"""
    global rag_pipeline

    try:
        # Get API key from environment with multiple fallback options
        gemini_api_key = os.getenv("GEMINI_API_KEY")

        if not gemini_api_key:
            # Try Colab userdata
            try:
                from google.colab import userdata
                gemini_api_key = userdata.get('GEMINI_API_KEY')
            except (ImportError, Exception):
                pass

        if not gemini_api_key:
            logger.error("GEMINI_API_KEY not found in environment variables or Colab secrets")
            raise Exception("GEMINI_API_KEY is required. Please set it as an environment variable.")

        # Initialize pipeline
        rag_pipeline = ComprehensiveRAGPipeline(gemini_api_key)

        # Setup directories
        rag_pipeline.rag_system.setup_directories()

        logger.info("RAG API startup complete")

    except Exception as e:
        logger.error(f"Startup failed: {e}")
        raise

@app.post("/query", response_model=QueryResponse)
async def query_notes(request: QueryRequest):
    """Query the notes RAG system"""
    if not rag_pipeline:
        raise HTTPException(status_code=503, detail="RAG pipeline not initialized")

    try:
        response = await rag_pipeline.query(
            question=request.question,
            file_type_filter=request.file_type_filter,
            n_results=request.n_results,
            use_history=request.use_history
        )

        return QueryResponse(**asdict(response))

    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
    except Exception as e:
        logger.error(f"Query failed: {e}")
        raise HTTPException(status_code=500, detail="Internal server error occurred")

@app.post("/ingest")
async def ingest_documents(request: IngestRequest, background_tasks: BackgroundTasks):
    """Ingest documents into the RAG system"""
    if not rag_pipeline:
        raise HTTPException(status_code=503, detail="RAG pipeline not initialized")

    try:
        # Validate file paths
        for file_path in request.file_paths:
            if not Path(file_path).exists():
                raise HTTPException(status_code=400, detail=f"File not found: {file_path}")

        # Process ingestion
        results = rag_pipeline.rag_system.ingest_documents(request.file_paths)

        return {
            "message": f"Ingestion completed",
            "results": results
        }

    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Ingestion failed: {e}")
        raise HTTPException(status_code=500, detail="Ingestion failed")

@app.get("/stats")
async def get_stats():
    """Get comprehensive system statistics"""
    if not rag_pipeline:
        raise HTTPException(status_code=503, detail="RAG pipeline not initialized")

    try:
        return rag_pipeline.get_pipeline_status()
    except Exception as e:
        logger.error(f"Stats retrieval failed: {e}")
        raise HTTPException(status_code=500, detail="Could not retrieve stats")

@app.delete("/history")
async def clear_conversation_history():
    """Clear conversation history"""
    if not rag_pipeline:
        raise HTTPException(status_code=503, detail="RAG pipeline not initialized")

    try:
        rag_pipeline.clear_history()
        return {"message": "Conversation history cleared successfully"}
    except Exception as e:
        logger.error(f"History clearing failed: {e}")
        raise HTTPException(status_code=500, detail="Could not clear history")

@app.get("/health")
async def health_check():
    """Health check endpoint"""
    return {
        "status": "healthy" if rag_pipeline else "unhealthy",
        "timestamp": datetime.now().isoformat(),
        "version": "2.0.0"
    }

🔄 Detected running event loop (Jupyter/Colab), starting CLI directly...

🤖 Notes RAG System - Interactive CLI
🔄 Initializing RAG pipeline...


config.json:   0%|          | 0.00/794 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/132 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

✅ Pipeline initialized successfully!

⚠️  No documents found in the database.
You can add documents using the /ingest endpoint or by placing files in the /content/processed_audio directory


KeyboardInterrupt: Interrupted by user