In [17]:
# --- CELL 1: INSTALLATION ---
!pip install gradio transformers sentencepiece torch nltk spacy yt-dlp faster-whisper
!python -m spacy download en_core_web_sm
!apt-get install -y ffmpeg

Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m12.8/12.8 MB[0m [31m27.1 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m‚úî Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m‚ö† Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 2 not upgraded.


In [18]:
%%writefile requirements.txt
gradio
transformers
sentencepiece
torch
nltk
spacy
yt-dlp
faster-whisper
https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl

Overwriting requirements.txt


In [19]:
%%writefile packages.txt
ffmpeg

Overwriting packages.txt


In [None]:
# --- CELL 2: MAIN APP ---
# %%writefile app.py
import gradio as gr
import os
import torch
import spacy
import re
import random
import math
import yt_dlp
import nltk
import nltk.data
from transformers import pipeline, M2M100ForConditionalGeneration, M2M100Tokenizer
from faster_whisper import WhisperModel

# --- 1. SAFE DEPENDENCY LOADING ---
# NLTK Data Check
try:
    nltk.data.find('tokenizers/punkt')
except LookupError:
    print("‚¨áÔ∏è Downloading NLTK resources...")
    nltk.download('punkt')
    nltk.download('punkt_tab')
    nltk.download('stopwords')

# Spacy Model Check
try:
    nlp = spacy.load('en_core_web_sm')
except OSError:
    from spacy.cli import download
    download("en_core_web_sm")
    nlp = spacy.load('en_core_web_sm')

# Device Configuration
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"üöÄ AI Core initialized on {DEVICE}")

# --- 2. AI ENGINE (MODELS) ---
class AI_Engine:
    def __init__(self):
        print("‚è≥ Initializing AI Models...")

        # 1. WHISPER (Audio to Text) - Safe Loading
        try:
            self.whisper = WhisperModel("medium", device="cuda", compute_type="float16")
            print("   ‚úÖ Whisper loaded on CUDA")
        except Exception:
            print("   ‚ö†Ô∏è CUDA failed. Switching to CPU Mode...")
            self.whisper = WhisperModel("medium", device="cpu", compute_type="int8")

        # 2. SUMMARIZER (BART)
        self.summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=0 if torch.cuda.is_available() else -1)

        # 3. TRANSLATOR (M2M100)
        self.trans_model_name = "facebook/m2m100_418M"
        self.trans_tokenizer = M2M100Tokenizer.from_pretrained(self.trans_model_name)
        self.trans_model = M2M100ForConditionalGeneration.from_pretrained(self.trans_model_name)
        if torch.cuda.is_available():
            self.trans_model = self.trans_model.to("cuda")

        # 4. QUIZ GENERATOR (T5)
        self.qg_pipe = pipeline('text2text-generation', model='valhalla/t5-small-qa-qg-hl', device=0 if torch.cuda.is_available() else -1)

engine = AI_Engine()

# --- 3. ACCURACY CALCULATION LOGIC (ON DEMAND) ---
def calculate_metrics(segments_data, transcript_text, summary_text, notes_text):
    """
    Ye function tabhi chalega jab user 'Calculate Accuracy' button dabayega.
    """
    print("üìä Calculating Accuracy Metrics...")

    # 1. Transcription Accuracy (Confidence Score)
    trans_score = 0.0
    if segments_data:
        total_prob = 0
        count = 0
        for seg in segments_data:
            # Avg logprob ko percentage mein badalna
            prob = math.exp(seg.avg_logprob)
            total_prob += prob
            count += 1
        trans_score = round((total_prob / count) * 100, 1) if count > 0 else 0.0

    # Helper for Text Similarity (Jaccard Index)
    def get_text_overlap(source, target):
        if not source or not target: return 0.0
        stop_words = set(nltk.corpus.stopwords.words('english'))

        def tokenize(txt):
            return set([t.lower() for t in nltk.word_tokenize(txt) if t.isalnum() and t not in stop_words])

        src_tokens = tokenize(source)
        tgt_tokens = tokenize(target)

        if not tgt_tokens: return 0.0

        # Check faithfulness: kitne target words source mein maujood hain
        intersection = src_tokens.intersection(tgt_tokens)
        return round((len(intersection) / len(tgt_tokens)) * 100, 1)

    # 2. Summary Accuracy
    summ_score = get_text_overlap(transcript_text, summary_text)

    # 3. Notes Accuracy
    notes_score = get_text_overlap(transcript_text, notes_text)

    return trans_score, summ_score, notes_score

# --- 4. PROCESSING FUNCTIONS ---

def download_video(url, progress=gr.Progress()):
    if not url: return None, "Please enter a URL."
    progress(0, desc="Downloading...")
    output_path = "downloads"
    os.makedirs(output_path, exist_ok=True)

    ydl_opts = {
        'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best',
        'outtmpl': os.path.join(output_path, '%(title)s.%(ext)s'),
        'noplaylist': True,
    }
    try:
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            info = ydl.extract_info(url, download=True)
            filename = ydl.prepare_filename(info)
            return filename, f"‚úÖ Video downloaded: {info.get('title')}"
    except Exception as e:
        return None, f"‚ùå Error: {str(e)}"

def process_transcription(video_path, progress=gr.Progress()):
    if not video_path: return None, None, "Please download a video first."

    progress(0.2, desc="Transcribing...")
    try:
        segments_generator, info = engine.whisper.transcribe(video_path, beam_size=5)
        # Convert to list to store in State for later accuracy calc
        segments = list(segments_generator)
        transcript_text = " ".join([seg.text for seg in segments]).strip()

        return transcript_text, segments, "‚úÖ Transcription Complete"
    except Exception as e:
        return None, None, f"‚ùå Error: {str(e)}"

def generate_summary(text):
    if not text: return "No transcript."

    # Smart Chunking logic (Sentence based)
    sentences = nltk.tokenize.sent_tokenize(text)
    chunks = []
    current_chunk = ""
    for sent in sentences:
        if len(current_chunk) + len(sent) < 2500:
            current_chunk += sent + " "
        else:
            chunks.append(current_chunk)
            current_chunk = sent + " "
    if current_chunk: chunks.append(current_chunk)

    summaries = []
    for chunk in chunks[:3]:
        try:
            input_len = len(chunk.split())
            max_l = min(150, int(input_len * 0.6))
            min_l = min(30, int(input_len * 0.2))
            if max_l > min_l:
                summary = engine.summarizer(chunk, max_length=max_l, min_length=min_l, do_sample=False)
                summaries.append(summary[0]['summary_text'])
        except: pass

    return " ".join(summaries)

def generate_notes(text):
    if not text: return "No transcript."

    sentences = nltk.tokenize.sent_tokenize(text)
    chunk_size = 7
    chunks = [' '.join(sentences[i:i+chunk_size]) for i in range(0, len(sentences), chunk_size)]

    notes = []
    for chunk in chunks[:5]:
        try:
            res = engine.summarizer(chunk[:2500], max_length=60, min_length=15, do_sample=False)
            point = res[0]['summary_text']
            notes.append(f"- {point}")
        except: pass

    return "### Key Takeaways:\n" + "\n".join(notes)

def generate_quiz(text):
    if not text: return "No transcript."
    doc = nlp(text[:5000])
    entities = [ent.text for ent in doc.ents if ent.label_ in ['DATE', 'ORG', 'PERSON', 'GPE', 'EVENT']]
    unique_entities = list(set(entities))

    if len(unique_entities) < 3:
        unique_entities = [token.text for token in doc if token.pos_ == "NOUN" and len(token.text) > 4]
        unique_entities = list(set(unique_entities))

    random.shuffle(unique_entities)
    selected_answers = unique_entities[:5]
    quiz_output = ""

    for ans in selected_answers:
        for sent in nltk.tokenize.sent_tokenize(text):
            if ans in sent and len(sent) < 200:
                pattern = re.compile(re.escape(ans), re.IGNORECASE)
                input_text = pattern.sub(f"<hl>{ans}<hl>", sent)
                try:
                    q = engine.qg_pipe(f"generate question: {input_text}")[0]['generated_text']
                    quiz_output += f"**Q:** {q}\n**A:** ||{ans}||\n\n"
                    break
                except: continue
    return quiz_output

def translate_text(text, target_lang):
    if not text: return "No transcript."
    lang_map = {"Hindi": "hi", "French": "fr", "English": "en"}
    code = lang_map.get(target_lang, "en")
    tokenizer = engine.trans_tokenizer
    model = engine.trans_model
    tokenizer.src_lang = "en"
    encoded = tokenizer(text[:600], return_tensors="pt").to(DEVICE)
    generated_tokens = model.generate(**encoded, forced_bos_token_id=tokenizer.get_lang_id(code))
    return tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]

# --- 5. UI SETUP ---
custom_css = """
.gradio-container { background: linear-gradient(to bottom right, #0F172A, #1E1B4B) !important; }
body, .prose, .markdown-text, label, span, p, h1, h2, h3, h4, h5, h6 { color: #F8FAFC !important; font-family: 'Inter', sans-serif; }
.markdown-text, .prose p, .prose h1, .prose h2, .prose h3, .prose li, label span { margin-left: 12px !important; }
textarea, input, .gr-box, .prose, #output_box, #status_box { background-color: rgba(30, 41, 59, 0.8) !important; color: #FFFFFF !important; border: 1px solid #334155 !important; border-radius: 12px !important; backdrop-filter: blur(5px); }
h1 { background: linear-gradient(90deg, #818CF8, #22D3EE); -webkit-background-clip: text; -webkit-text-fill-color: transparent; text-align: center; font-weight: 800 !important; margin-bottom: 10px !important; margin-left: 0 !important; }
button.primary { background: linear-gradient(90deg, #4F46E5, #7C3AED) !important; color: white !important; border: none !important; }
button.secondary { background-color: #334155 !important; color: #E2E8F0 !important; border: 1px solid #475569 !important; }
"""

with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, title="Team Racoon Tool") as demo:

    # STATE VARIABLES (To store data for on-demand accuracy check)
    video_path_state = gr.State()
    transcript_state = gr.State()
    segments_state = gr.State()
    summary_state = gr.State()
    notes_state = gr.State()

    # --- HEADER ---
    with gr.Row():
        with gr.Column():
            gr.Markdown("# üéì AI Powered Lecture Intelligence Tool")

    # --- INPUT ---
    with gr.Row():
        with gr.Column(scale=1, variant="panel"):
            gr.Markdown('### 1. Source & Status')
            url_input = gr.Textbox(label="YouTube URL", placeholder="Paste video link...", lines=1)
            download_btn = gr.Button("‚¨áÔ∏è Load Video", variant="primary", size="lg")
            status_msg = gr.Textbox(label="System Logs", value="System Ready...", interactive=False, lines=6, elem_id="status_box")

        with gr.Column(scale=1):
            gr.Markdown('### 2. Video Preview')
            video_player = gr.Video(label="Player", interactive=True, elem_id="video_player")

    gr.Markdown("---")

    # --- OUTPUT ---
    with gr.Row(variant="panel"):

        # CONTROLS
        with gr.Column(scale=1):
            gr.Markdown("### 3. Controls")
            analyze_btn = gr.Button("‚ú® Transcribe", variant="primary", size="lg", interactive=False)

            gr.Markdown("#### Generation Tools")
            summ_btn = gr.Button("üìù Generate Summary", variant="secondary")
            notes_btn = gr.Button("üìå Extract Notes", variant="secondary")
            quiz_btn = gr.Button("‚ùì Create Quiz", variant="secondary")

            gr.Markdown("#### Translation")
            with gr.Row():
                lang_select = gr.Dropdown(["Hindi", "French", "English"], label="Target Language", value="Hindi")
                trans_btn = gr.Button("GO", variant="secondary")

        # RESULTS TAB
        with gr.Column(scale=2):
            gr.Markdown("### 4. Intelligence Output")
            with gr.Tabs():
                with gr.TabItem("üìÑ Transcript"):
                    transcript_output = gr.Textbox(label="Full Text", lines=15, show_copy_button=True, elem_id="output_box")
                with gr.TabItem("üìù Summary"):
                    summary_output = gr.Textbox(label="Abstract", lines=10, show_copy_button=True, elem_id="output_box")
                with gr.TabItem("üìå Notes"):
                    notes_output = gr.Markdown(elem_id="output_box")
                with gr.TabItem("‚ùì Quiz"):
                    quiz_output = gr.Markdown(elem_id="output_box")
                with gr.TabItem("üåç Translate"):
                    trans_output = gr.Textbox(label="Translation", lines=10, elem_id="output_box")

    # --- ACCURACY SECTION (ON DEMAND) ---
    gr.Markdown("---")
    with gr.Row(variant="panel"):
        with gr.Column(scale=1):
            acc_btn = gr.Button("üìä Calculate Accuracy Scores", variant="primary", size="lg")
            gr.Markdown("*Generate Summary & Notes first.*")

        with gr.Column(scale=3):
            with gr.Row():
                acc_trans_out = gr.Number(label="Transcription Confidence %", value=0)
                acc_summ_out = gr.Number(label="Summary Fidelity %", value=0)
                acc_notes_out = gr.Number(label="Notes Relevance %", value=0)

    # --- EVENTS ---

    # 1. Download
    def on_download(url):
        path, msg = download_video(url)
        return path, path, msg, gr.update(interactive=(path is not None))

    download_btn.click(on_download, inputs=[url_input], outputs=[video_player, video_path_state, status_msg, analyze_btn])

    # 2. Transcribe (Saves segments to State)
    def on_transcribe(video_path):
        text, segments, msg = process_transcription(video_path)
        return text, text, segments, msg

    analyze_btn.click(on_transcribe,
                      inputs=[video_path_state],
                      outputs=[transcript_output, transcript_state, segments_state, status_msg])

    # 3. Summary (Saves summary to State)
    def on_summary(text):
        summ = generate_summary(text)
        return summ, summ

    summ_btn.click(on_summary, inputs=[transcript_state], outputs=[summary_output, summary_state])

    # 4. Notes (Saves notes to State)
    def on_notes(text):
        notes = generate_notes(text)
        return notes, notes

    notes_btn.click(on_notes, inputs=[transcript_state], outputs=[notes_output, notes_state])

    # 5. Quiz & Translate
    quiz_btn.click(generate_quiz, inputs=[transcript_state], outputs=[quiz_output])
    trans_btn.click(translate_text, inputs=[transcript_state, lang_select], outputs=[trans_output])

    # 6. ACCURACY CALCULATION (Separate Trigger)
    acc_btn.click(calculate_metrics,
                  inputs=[segments_state, transcript_state, summary_state, notes_state],
                  outputs=[acc_trans_out, acc_summ_out, acc_notes_out])

demo.queue().launch(share=True, debug=True)



‚¨áÔ∏è Downloading NLTK resources...


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


üöÄ AI Core initialized on cpu
‚è≥ Initializing AI Models...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

vocabulary.txt: 0.00B [00:00, ?B/s]

model.bin:   0%|          | 0.00/1.53G [00:00<?, ?B/s]

   ‚ö†Ô∏è CUDA failed. Switching to CPU Mode...


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Device set to use cpu


tokenizer_config.json:   0%|          | 0.00/298 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/908 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.94G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.94G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/233 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/656 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/242M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/90.0 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/31.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Device set to use cpu
  with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, title="Team Racoon Tool") as demo:
  with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, title="Team Racoon Tool") as demo:


Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://85abad55f47a188464.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


[youtube] Extracting URL: https://www.youtube.com/shorts/sYOxk41KoW8
[youtube] sYOxk41KoW8: Downloading webpage




[youtube] sYOxk41KoW8: Downloading android sdkless player API JSON
[youtube] sYOxk41KoW8: Downloading web safari player API JSON




[youtube] sYOxk41KoW8: Downloading m3u8 information




[info] sYOxk41KoW8: Downloading 1 format(s): 399+140-11
[download] Destination: downloads/Bro Roasted Himselfü§£ü§Ø l  ‚Å®@Puravjha_   Mimicry Day by Day üìà.f399.mp4
[download] 100% of    3.53MiB in 00:00:00 at 16.68MiB/s  
[download] Destination: downloads/Bro Roasted Himselfü§£ü§Ø l  ‚Å®@Puravjha_   Mimicry Day by Day üìà.f140-11.m4a
[download] 100% of  381.17KiB in 00:00:00 at 1.14MiB/s   
[Merger] Merging formats into "downloads/Bro Roasted Himselfü§£ü§Ø l  ‚Å®@Puravjha_   Mimicry Day by Day üìà.mp4"
Deleting original file downloads/Bro Roasted Himselfü§£ü§Ø l  ‚Å®@Puravjha_   Mimicry Day by Day üìà.f399.mp4 (pass -k to keep)
Deleting original file downloads/Bro Roasted Himselfü§£ü§Ø l  ‚Å®@Puravjha_   Mimicry Day by Day üìà.f140-11.m4a (pass -k to keep)


In [None]:
from google.colab import files

# This will trigger a download to your computer immediately
try:
    files.download('app.py')
    files.download('requirements.txt')
    files.download('packages.txt')
except Exception as e:
    print("Error:", e)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>