# Lecture Extraction System - LLM Server
Run all cells in order. Copy the ngrok URL at the end.

In [None]:
!pip install flask pyngrok transformers torch accelerate opencv-python openai-whisper easyocr -q

In [None]:
from pyngrok import ngrok

# Paste your token from https://dashboard.ngrok.com/get-started/your-authtoken
ngrok.set_auth_token("YOUR_NGROK_TOKEN_HERE")

In [None]:
from flask import Flask, request, jsonify
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
import cv2
import whisper
import easyocr
import subprocess
import os
import tempfile
import uuid
import threading
import numpy as np

app = Flask(__name__)
model = None
tokenizer = None
jobs = {}
FRAME_RATE = 30

def process_video_task(job_id, video_path):
    try:
        jobs[job_id]["status"] = "processing"
        jobs[job_id]["progress"] = 10
        jobs[job_id]["message"] = "Extracting audio..."

        audio_path = video_path.replace(".mp4", ".wav").replace(".avi", ".wav").replace(".mov", ".wav").replace(".mkv", ".wav")
        subprocess.run(["ffmpeg", "-i", video_path, "-vn", "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", "-y", audio_path],
                       stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True)

        jobs[job_id]["progress"] = 25
        jobs[job_id]["message"] = "Transcribing with Whisper..."

        whisper_model = whisper.load_model("base")
        result = whisper_model.transcribe(audio_path, verbose=False)
        segments = result.get("segments", [])
        transcript = [{"start": s["start"], "end": s["end"], "text": s.get("text", "").strip(), "confidence": 0.0} for s in segments]
        duration = segments[-1]["end"] if segments else 0

        jobs[job_id]["progress"] = 50
        jobs[job_id]["message"] = "Extracting frames..."

        cap = cv2.VideoCapture(video_path)
        fps = cap.get(cv2.CAP_PROP_FPS)
        frames_data = []
        frame_count = 0
        saved = 0
        ocr_reader = easyocr.Reader(["en"], gpu=True)

        while True:
            ret, frame = cap.read()
            if not ret:
                break
            if frame_count % FRAME_RATE == 0:
                timestamp = frame_count / fps
                results = ocr_reader.readtext(frame)
                printed_text = []
                handwritten_text = []
                for (bbox, text, conf) in results:
                    if conf >= 0.5:
                        printed_text.append(text)
                frames_data.append({
                    "timestamp": timestamp,
                    "printed_text": " ".join(printed_text),
                    "handwritten_text": " ".join(handwritten_text),
                    "ocr_confidence": 0.0
                })
                saved += 1
                if saved % 10 == 0:
                    jobs[job_id]["progress"] = 50 + int(40 * saved / max(1, int(cap.get(cv2.CAP_PROP_FRAME_COUNT) / FRAME_RATE)))
                    jobs[job_id]["message"] = f"OCR on frames... {saved} done"
            frame_count += 1
        cap.release()

        jobs[job_id]["progress"] = 95
        jobs[job_id]["message"] = "Finalizing..."

        jobs[job_id]["result"] = {"transcript": transcript, "frames": frames_data, "duration": duration}
        jobs[job_id]["status"] = "completed"
        jobs[job_id]["progress"] = 100
        jobs[job_id]["message"] = "Done"

        os.remove(video_path)
        if os.path.exists(audio_path):
            os.remove(audio_path)
    except Exception as e:
        jobs[job_id]["status"] = "failed"
        jobs[job_id]["error"] = str(e)
        jobs[job_id]["message"] = str(e)

@app.route('/health', methods=['GET'])
def health_check():
    return jsonify({"status": "healthy", "model_loaded": model is not None}), 200

@app.route('/upload', methods=['POST'])
def upload_video():
    try:
        if 'video' not in request.files:
            return jsonify({"error": "No video file"}), 400
        file = request.files['video']
        if file.filename == '':
            return jsonify({"error": "No file selected"}), 400
        job_id = str(uuid.uuid4())
        video_path = os.path.join(tempfile.gettempdir(), f"{job_id}_{file.filename}")
        file.save(video_path)
        jobs[job_id] = {"status": "processing", "progress": 0, "message": "Starting...", "result": None, "error": None}
        threading.Thread(target=process_video_task, args=(job_id, video_path)).start()
        return jsonify({"job_id": job_id}), 200
    except Exception as e:
        return jsonify({"error": str(e)}), 500

@app.route('/status/<job_id>', methods=['GET'])
def get_status(job_id):
    if job_id not in jobs:
        return jsonify({"error": "Job not found"}), 404
    j = jobs[job_id]
    return jsonify({"status": j["status"], "progress": j["progress"], "message": j["message"], "error": j.get("error")}), 200

@app.route('/result/<job_id>', methods=['GET'])
def get_result(job_id):
    if job_id not in jobs:
        return jsonify({"error": "Job not found"}), 404
    if jobs[job_id]["status"] != "completed":
        return jsonify({"error": "Job not ready"}), 400
    return jsonify(jobs[job_id]["result"]), 200

@app.route('/generate', methods=['POST'])
def generate():
    try:
        data = request.json
        prompt = data.get('prompt', '')
        context = data.get('context', '')
        max_tokens = data.get('max_tokens', 500)
        temperature = data.get('temperature', 0.7)

        # Truncate context to fit Phi-2's 2048 token limit (leave room for prompt + response)
        max_context_chars = 4000
        if len(context) > max_context_chars:
            context = context[:max_context_chars] + "\n\n[Context truncated...]"

        full_prompt = f"""Based on the following lecture content, answer the question.

Lecture Content:
{context}

Question: {prompt}

Answer:"""

        inputs = tokenizer(full_prompt, return_tensors="pt", truncation=True, max_length=2048).to(model.device)

        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=max_tokens,
                temperature=temperature,
                do_sample=True,
                pad_token_id=tokenizer.eos_token_id
            )

        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
        answer = generated_text[len(full_prompt):].strip()

        return jsonify({
            "text": answer,
            "metadata": {
                "prompt_length": len(full_prompt),
                "tokens_generated": len(outputs[0]) - len(inputs['input_ids'][0])
            }
        }), 200

    except Exception as e:
        return jsonify({"error": str(e)}), 500

def load_llm():
    global model, tokenizer
    model_name = "microsoft/phi-2"
    print(f"Loading model: {model_name}")
    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
    config.pad_token_id = tokenizer.pad_token_id
    model = AutoModelForCausalLM.from_pretrained(model_name, config=config, torch_dtype=torch.float16, device_map="auto", trust_remote_code=True)
    print("Model loaded successfully!")

def start_server():
    load_llm()
    public_url = ngrok.connect(5000)
    print(f"\nðŸš€ Server is running!")
    print(f"ðŸ“¡ Public URL: {public_url}")
    print(f"Copy this URL to your local app\n")
    app.run(port=5000)

In [None]:
start_server()