In [None]:
!pip install pytube



In [None]:
!pip install pytesseract
!sudo apt update
!sudo apt install tesseract-ocr

Collecting pytesseract
  Downloading pytesseract-0.3.13-py3-none-any.whl.metadata (11 kB)
Downloading pytesseract-0.3.13-py3-none-any.whl (14 kB)
Installing collected packages: pytesseract
Successfully installed pytesseract-0.3.13
Get:1 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Get:2 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,632 B]
Hit:3 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:4 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Hit:5 http://archive.ubuntu.com/ubuntu jammy InRelease
Hit:6 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:7 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Get:8 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Get:9 http://security.ubuntu.com/ubuntu jammy-security/main amd64 Packages [3,103 kB]
Get:10 http://security.ubuntu.com/ubuntu jammy-security/universe amd6

In [None]:
!pip install whisper

Collecting whisper
  Downloading whisper-1.1.10.tar.gz (42 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/42.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.8/42.8 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: whisper
  Building wheel for whisper (setup.py) ... [?25l[?25hdone
  Created wheel for whisper: filename=whisper-1.1.10-py3-none-any.whl size=41120 sha256=d02300e28bab25f4ae2d7412c78d32d058daeae70d8680bb98d20a2adffd94f8
  Stored in directory: /root/.cache/pip/wheels/21/65/ee/4e6672aabfa486d3341a39a04f8f87c77e5156149299b5a7d0
Successfully built whisper
Installing collected packages: whisper
Successfully installed whisper-1.1.10


# Detailed Implementation Plan
1. YouTube Content Fetcher
Purpose: Retrieve video content from YouTube for processing.

In [None]:
from pytube import YouTube
import os

video_url = "https://www.youtube.com/watch?v=sWSLLO3DS1I&ab_channel=TheOrganicChemistryTutor"
def download_youtube_content(video_url):
    try:
        yt = YouTube(video_url)
        # Get the highest quality audio stream
        audio_stream = yt.streams.filter(only_audio=True).first()
        audio_path = audio_stream.download(output_path='temp_audio')

        # Optional: Download video for visual content extraction
        video_stream = yt.streams.filter(file_extension='mp4').first()
        video_path = video_stream.download(output_path='temp_video')

        return audio_path, video_path, yt.title
    except Exception as e:
        print(f"Error downloading video: {e}")
        return None, None, None

# 2. Audio/Visual Processing Pipeline
Purpose: Extract meaningful content from the video's audio and visuals.

Audio Processing:

In [None]:
import whisper  # OpenAI's Whisper model for speech-to-text

def transcribe_audio(audio_path):
    model = whisper.load_model("base")  # Can use "small", "medium", or "large" for better quality
    result = model.transcribe(audio_path)
    return result["text"], result["segments"]  # Full text and timed segments

##### Visual Processing (for slides/code extraction):


In [None]:
import cv2
import pytesseract
from PIL import Image

def extract_text_from_video(video_path, sample_rate=5):
    cap = cv2.VideoCapture(video_path)
    frames = []
    frame_count = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        if frame_count % sample_rate == 0:  # Sample every 5th frame
            # Convert frame to grayscale for better OCR
            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            text = pytesseract.image_to_string(Image.fromarray(gray))
            frames.append(text)

        frame_count += 1

    cap.release()
    return "\n".join(frames)

#3. Content Analysis & Structuring Engine
Purpose: Analyze the extracted content and structure it into educational components.

In [None]:
from transformers import pipeline
import re

class ContentAnalyzer:
    def __init__(self):
        self.summarizer = pipeline("summarization")
        self.classifier = pipeline("zero-shot-classification")

    def structure_content(self, text):
        # Identify content type (programming, math, history, etc.)
        categories = ["programming", "mathematics", "science", "history", "language", "other"]
        classification = self.classifier(text, categories)
        content_type = classification["labels"][0]

        # Summarize key points
        summary = self.summarizer(text, max_length=130, min_length=30, do_sample=False)

        # For programming videos, extract code blocks
        code_blocks = []
        if content_type == "programming":
            code_blocks = self.extract_code_blocks(text)

        return {
            "content_type": content_type,
            "summary": summary[0]["summary_text"],
            "key_points": self.extract_key_points(text),
            "code_blocks": code_blocks
        }

    def extract_key_points(self, text):
        # Use extractive summarization or pattern matching
        sentences = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', text)
        return sentences[:5]  # Simple approach - can be enhanced

    def extract_code_blocks(self, text):
        # Simple regex for code patterns - can be enhanced with AST parsing
        code_pattern = re.compile(r'(```[\s\S]*?```|`[^`]+`|def\s+\w+\(.*?\):[\s\S]*?(?=\n\S|\Z))')
        return code_pattern.findall(text)

# 4. Notebook/Exercise Generator
Purpose: Create Jupyter notebooks or other interactive exercises.

In [None]:
import nbformat as nbf

def generate_jupyter_notebook(code_blocks, explanations, title):
    nb = nbf.v4.new_notebook()
    nb["cells"] = []

    # Add title cell
    nb["cells"].append(nbf.v4.new_markdown_cell(f"# {title}"))

    # Alternate between explanations and code
    for exp, code in zip(explanations, code_blocks):
        if exp:
            nb["cells"].append(nbf.v4.new_markdown_cell(exp))
        if code:
            nb["cells"].append(nbf.v4.new_code_cell(code))

    # Add practice exercises at the end
    nb["cells"].append(nbf.v4.new_markdown_cell("## Practice Exercises"))
    nb["cells"].append(nbf.v4.new_code_cell("# Your solution here"))

    return nb

def save_notebook(nb, filename):
    with open(filename, 'w') as f:
        nbf.write(nb, f)

# 5. Quiz Generation Module
Purpose: Generate assessment questions based on video content.

In [None]:
from transformers import pipeline

class QuizGenerator:
    def __init__(self):
        self.qg_pipeline = pipeline("text2text-generation", model="mrm8488/t5-base-finetuned-question-generation-ap")

    def generate_questions(self, text, num_questions=5):
        questions = []
        # Split text into chunks to avoid model limits
        chunks = [text[i:i+1000] for i in range(0, len(text), 1000)]

        for chunk in chunks:
            # Generate question-answer pairs
            qa_pairs = self.qg_pipeline(f"generate questions: {chunk}", num_return_sequences=num_questions)
            for pair in qa_pairs:
                questions.append(pair["generated_text"])

        return questions[:num_questions]  # Return up to requested number

# 6. Multilingual Support System
Purpose: Make content accessible in multiple languages.

In [None]:
from transformers import MarianMTModel, MarianTokenizer

class Translator:
    def __init__(self):
        self.models = {
            "es": ("Helsinki-NLP/opus-mt-en-es", "Helsinki-NLP/opus-mt-es-en"),
            "fr": ("Helsinki-NLP/opus-mt-en-fr", "Helsinki-NLP/opus-mt-fr-en"),
            # Add more languages as needed
        }

    def translate(self, text, target_lang):
        if target_lang not in self.models:
            raise ValueError(f"Unsupported language: {target_lang}")

        model_name = self.models[target_lang][0]
        tokenizer = MarianTokenizer.from_pretrained(model_name)
        model = MarianMTModel.from_pretrained(model_name)

        # Tokenize and translate
        batch = tokenizer([text], return_tensors="pt", truncation=True)
        gen = model.generate(**batch)
        translated = tokenizer.batch_decode(gen, skip_special_tokens=True)

        return translated[0]

# 7. User Interface/Delivery System
Options:

Web Application (Flask/Django)

Browser Extension

Desktop Application

Example Flask API endpoint:

In [None]:
from flask import Flask, request, jsonify

app = Flask(__name__)

@app.route('/process_video', methods=['POST'])
def process_video():
    data = request.json
    video_url = data.get('url')
    target_lang = data.get('language', 'en')

    # Download content
    audio_path, video_path, title = download_youtube_content(video_url)

    # Transcribe audio
    transcript, segments = transcribe_audio(audio_path)

    # Analyze content
    analyzer = ContentAnalyzer()
    structured_content = analyzer.structure_content(transcript)

    # Generate notebook if programming
    if structured_content["content_type"] == "programming":
        notebook = generate_jupyter_notebook(
            structured_content["code_blocks"],
            structured_content["key_points"],
            title
        )
        notebook_path = f"output/{title.replace(' ', '_')}.ipynb"
        save_notebook(notebook, notebook_path)

    # Generate quiz
    quiz_gen = QuizGenerator()
    questions = quiz_gen.generate_questions(transcript)

    # Translate if needed
    if target_lang != 'en':
        translator = Translator()
        translated_content = translator.translate(structured_content["summary"], target_lang)
        translated_questions = [translator.translate(q, target_lang) for q in questions]
    else:
        translated_content = structured_content["summary"]
        translated_questions = questions

    return jsonify({
        "title": title,
        "summary": translated_content,
        "notebook": notebook_path if "notebook_path" in locals() else None,
        "quiz": translated_questions
    })

The error `ModuleNotFoundError: No module named 'pytube'` means that the Python interpreter could not find the `pytube` library when it tried to import it. This usually happens when the library is not installed in your environment. The code above installs the necessary library. After running this cell, you should be able to run the code to download YouTube content.

The error `ModuleNotFoundError: No module named 'whisper'` means that the Python interpreter could not find the `whisper` library when it tried to import it. This usually happens when the library is not installed in your environment. The code above installs the necessary library. After running this cell, you should be able to run the code to transcribe audio.

In [None]:
# Example usage of ContentAnalyzer
analyzer = ContentAnalyzer()
# You need to provide the 'text' variable with the content you want to analyze
# For example, you could use the 'transcript' variable from the audio processing step
# For now, let's use a placeholder text
sample_text = "This is a sample text about programming. def my_function(x): return x + 1. This is a key point. Another key point."

structured_content = analyzer.structure_content(sample_text)
display(structured_content)

No model was supplied, defaulted to sshleifer/distilbart-cnn-12-6 and revision a4f8f3e (https://huggingface.co/sshleifer/distilbart-cnn-12-6).
Using a pipeline without specifying a model name and revision in production is not recommended.
ERROR:root:Unexpected exception finding object shape
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/google/colab/_debugpy_repr.py", line 54, in get_shape
    shape = getattr(obj, 'shape', None)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/werkzeug/local.py", line 318, in __get__
    obj = instance._get_current_object()
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/werkzeug/local.py", line 519, in _get_current_object
    raise RuntimeError(unbound_message) from None
RuntimeError: Working outside of request context.

This typically means that you attempted to use functionality that needed
an active HTTP request. Consult the documenta

config.json: 0.00B [00:00, ?B/s]

ERROR:root:Unexpected exception finding object shape
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/google/colab/_debugpy_repr.py", line 54, in get_shape
    shape = getattr(obj, 'shape', None)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/werkzeug/local.py", line 318, in __get__
    obj = instance._get_current_object()
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/werkzeug/local.py", line 519, in _get_current_object
    raise RuntimeError(unbound_message) from None
RuntimeError: Working outside of request context.

This typically means that you attempted to use functionality that needed
an active HTTP request. Consult the documentation on testing for
information about how to avoid this problem.
ERROR:root:Unexpected exception finding object shape
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/google/colab/_debugpy_repr.py", line 5

pytorch_model.bin:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

ERROR:root:Unexpected exception finding object shape
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/google/colab/_debugpy_repr.py", line 54, in get_shape
    shape = getattr(obj, 'shape', None)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/werkzeug/local.py", line 318, in __get__
    obj = instance._get_current_object()
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/werkzeug/local.py", line 519, in _get_current_object
    raise RuntimeError(unbound_message) from None
RuntimeError: Working outside of request context.

This typically means that you attempted to use functionality that needed
an active HTTP request. Consult the documentation on testing for
information about how to avoid this problem.
ERROR:root:Unexpected exception finding object shape
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/google/colab/_debugpy_repr.py", line 5

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

ERROR:root:Unexpected exception finding object shape
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/google/colab/_debugpy_repr.py", line 54, in get_shape
    shape = getattr(obj, 'shape', None)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/werkzeug/local.py", line 318, in __get__
    obj = instance._get_current_object()
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/werkzeug/local.py", line 519, in _get_current_object
    raise RuntimeError(unbound_message) from None
RuntimeError: Working outside of request context.

This typically means that you attempted to use functionality that needed
an active HTTP request. Consult the documentation on testing for
information about how to avoid this problem.


vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

ERROR:root:Unexpected exception finding object shape
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/google/colab/_debugpy_repr.py", line 54, in get_shape
    shape = getattr(obj, 'shape', None)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/werkzeug/local.py", line 318, in __get__
    obj = instance._get_current_object()
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/werkzeug/local.py", line 519, in _get_current_object
    raise RuntimeError(unbound_message) from None
RuntimeError: Working outside of request context.

This typically means that you attempted to use functionality that needed
an active HTTP request. Consult the documentation on testing for
information about how to avoid this problem.
Device set to use cpu
No model was supplied, defaulted to facebook/bart-large-mnli and revision d7645e1 (https://huggingface.co/facebook/bart-large-mnli).
Using a pipeline wi

config.json: 0.00B [00:00, ?B/s]

ERROR:root:Unexpected exception finding object shape
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/google/colab/_debugpy_repr.py", line 54, in get_shape
    shape = getattr(obj, 'shape', None)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/werkzeug/local.py", line 318, in __get__
    obj = instance._get_current_object()
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/werkzeug/local.py", line 519, in _get_current_object
    raise RuntimeError(unbound_message) from None
RuntimeError: Working outside of request context.

This typically means that you attempted to use functionality that needed
an active HTTP request. Consult the documentation on testing for
information about how to avoid this problem.


model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

ERROR:root:Unexpected exception finding object shape
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/google/colab/_debugpy_repr.py", line 54, in get_shape
    shape = getattr(obj, 'shape', None)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/werkzeug/local.py", line 318, in __get__
    obj = instance._get_current_object()
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/werkzeug/local.py", line 519, in _get_current_object
    raise RuntimeError(unbound_message) from None
RuntimeError: Working outside of request context.

This typically means that you attempted to use functionality that needed
an active HTTP request. Consult the documentation on testing for
information about how to avoid this problem.
ERROR:root:Unexpected exception finding object shape
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/google/colab/_debugpy_repr.py", line 5

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

ERROR:root:Unexpected exception finding object shape
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/google/colab/_debugpy_repr.py", line 54, in get_shape
    shape = getattr(obj, 'shape', None)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/werkzeug/local.py", line 318, in __get__
    obj = instance._get_current_object()
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/werkzeug/local.py", line 519, in _get_current_object
    raise RuntimeError(unbound_message) from None
RuntimeError: Working outside of request context.

This typically means that you attempted to use functionality that needed
an active HTTP request. Consult the documentation on testing for
information about how to avoid this problem.


vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

ERROR:root:Unexpected exception finding object shape
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/google/colab/_debugpy_repr.py", line 54, in get_shape
    shape = getattr(obj, 'shape', None)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/werkzeug/local.py", line 318, in __get__
    obj = instance._get_current_object()
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/werkzeug/local.py", line 519, in _get_current_object
    raise RuntimeError(unbound_message) from None
RuntimeError: Working outside of request context.

This typically means that you attempted to use functionality that needed
an active HTTP request. Consult the documentation on testing for
information about how to avoid this problem.
Device set to use cpu
ERROR:root:Unexpected exception finding object shape
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/google/colab/_de

{'content_type': 'programming',
 'summary': ' This is a sample text about programming . Def my_function(x) return x + 1 is a key point. Another key point is another key point .',
 'key_points': ['This is a sample text about programming.',
  'def my_function(x): return x + 1.',
  'This is a key point.',
  'Another key point.'],
 'code_blocks': ['def my_function(x): return x + 1. This is a key point. Another key point.']}

In [None]:
# To run the Flask app, you can use this:
if __name__ == '__main__':
    # Make sure you have the 'output' directory created
    os.makedirs('output', exist_ok=True)
    app.run(debug=True, host='0.0.0.0', port=5000)

 * Serving Flask app '__main__'
 * Debug mode: on


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
RuntimeError: Working outside of request context.

This typically means that you attempted to use functionality that needed
an active HTTP request. Consult the documentation on testing for
information about how to avoid this problem.
ERROR:root:Unexpected exception finding object shape
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/google/colab/_debugpy_repr.py", line 54, in get_shape
    shape = getattr(obj, 'shape', None)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/werkzeug/local.py", line 318, in __get__
    obj = instance._get_current_object()
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/werkzeug/local.py", line 519, in _get_current_object
    raise RuntimeError(unbound_message) from None
RuntimeError: Working outside of request context.

This typically means that you attempted to use functionali