# CodeSensei: AI-Powered Learning From Youtube Tutorials

**CodeSensei** is an intelligent, interactive assistant built to help learners understand  programming by analyzing educational YouTube videos. It leverages Large Language Models (LLMs) to provide level-specific explanations, concept breakdowns, quizzes, and code debugging support.

---

### Key Features

- **Multi-Video Analysis**: Analyze and summarize multiple YouTube videos on the same topic.
- **Skill-Level Personalization**: Tailored explanations for Beginner, Intermediate, and Advanced learners.
- **Topic Breakdown with Timestamps**: Extract key concepts with precise time mappings from transcripts.
- **Multi-Video Concept Aggregation**: Combines overlapping concepts across videos and presents the best explanations.
- **LLM-Based Summarization**: Uses the LLaMA 3 (Groq API) model for natural language summarization and instruction.
- **Quiz Generator**: Automatically generates quizzes based on full video content and evaluates user answers.
- **Python Code Debugging**: Upload or paste code to receive structured feedback, optimization tips, and detailed explanations.

---

**Technologies Used**: Python 3.10+, Google Colab, Groq API (LLaMA 3), YouTube Transcript API, ipywidgets



**Install Required Packages**

In [None]:
import nbformat
import json

# Path to your current Colab notebook (save to Google Drive first)
notebook_path = '/content/Codesensie_fixed.ipynb'

# Create a copy of current notebook without broken widget metadata
def clean_metadata():
    import IPython
    from google.colab import _message

    # Get current notebook content as a dictionary
    response = _message.blocking_request('get_ipynb')
    nb_dict = response['ipynb']

    # Load the dictionary into an nbformat object
    nb = nbformat.from_dict(nb_dict)

    # Remove broken widget metadata if it exists
    if 'widgets' in nb['metadata']:
        del nb['metadata']['widgets']

    # Save cleaned notebook
    with open(notebook_path, 'w', encoding='utf-8') as f:
        nbformat.write(nb, f)

    print(f"✅ Cleaned notebook saved to: {notebook_path}")

clean_metadata()

In [None]:

!pip install --quiet youtube-transcript-api groq python-dotenv ipywidgets



**Import Libraries and Setup**

In [None]:
import os
import re
import datetime
from IPython.display import display, Markdown, clear_output
import ipywidgets as widgets
from youtube_transcript_api import YouTubeTranscriptApi, NoTranscriptFound, TranscriptsDisabled, VideoUnavailable
from groq import Groq

api_key = "Add your API Key"
client = Groq(api_key=api_key)

#  Multi-video storage setup
multi_video_data = {
    "videos": [],        # Individual results per video
    "combined_text": "", # Merged unique content
}


**Get YouTube Transcript**

This function extracts the video ID from a multiple YouTube URL and attempts to fetch the English transcript using the YouTubeTranscriptApi. If the official transcript isn't found, it tries to get an auto-generated one; otherwise, it returns an appropriate error.

In [None]:
def extract_video_id(url):
    match = re.search(r'(?:v=|youtu\.be/|embed/|v/|/videos/|/watch\?v=|\.be/)([a-zA-Z0-9_-]{11})', url)
    return match.group(1) if match else None

def get_transcript_for_video(video_id):
    try:
        transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
        try:
            transcript = transcript_list.find_transcript(['en'])
        except NoTranscriptFound:
            transcript = next((t for t in transcript_list if t.is_generated), None)
            if not transcript:
                return None, "No transcript available."
        return transcript.fetch(), None
    except (NoTranscriptFound, TranscriptsDisabled, VideoUnavailable) as e:
        return None, str(e)

def process_multiple_youtube_links(url_list):
    seen_lines = set()
    combined_text = ""
    multi_video_data["videos"] = []

    for url in url_list:
        video_id = extract_video_id(url.strip())
        if not video_id:
            multi_video_data["videos"].append({"url": url, "error": "Invalid YouTube URL."})
            continue

        transcript_data, error = get_transcript_for_video(video_id)
        if error:
            multi_video_data["videos"].append({"url": url, "error": error})
            continue

        # Extract transcript text and remove overlapping lines
        lines = [seg.text.strip() for seg in transcript_data]
        unique_lines = [line for line in lines if line and line not in seen_lines]
        for line in unique_lines:
            seen_lines.add(line)

        merged_text = " ".join(unique_lines)
        multi_video_data["videos"].append({
            "url": url,
            "video_id": video_id,
            "transcript": transcript_data,
            "merged_text": merged_text
        })

        combined_text += merged_text + "\n"

    multi_video_data["combined_text"] = combined_text.strip()


**LLM Interaction with ask_groq**

 sends a user prompt to the Groq API with a role-specific system message tailored to the learner’s level (Beginner, Intermediate, or Advanced). It returns the generated response or an error message if the API call fails.

In [None]:
def ask_groq(prompt, level="Intermediate"):
    system_msg = {
        "Beginner": (
            "You are a patient and beginner-friendly Python tutor. "
            "Use simple language, analogies, and walk through each line of code clearly. "
            "Avoid technical jargon unless explained."
        ),
        "Intermediate": (
            "You are a helpful programming mentor. "
            "The learner knows syntax but needs help understanding best practices, patterns, and common pitfalls."
        ),
        "Advanced": (
            "You are a senior software engineer and technical educator. "
            "Offer deep, precise explanations with code improvements, edge-case handling, and performance considerations."
        )
    }.get(level, "You are a helpful and knowledgeable programming assistant.")

    try:
        response = client.chat.completions.create(
            model="llama3-8b-8192",
            messages=[
                {"role": "system", "content": system_msg},
                {"role": "user", "content": prompt}
            ],
            temperature=0.5,
            max_tokens=4000
        )
        return response.choices[0].message.content.strip()

    except Exception as e:
        return f"Groq API error: {e}"


**Transcript Chunking and Topic Tagging**

This code splits a YouTube transcript into manageable chunks based on character length or duration, aligning chunks with natural sentence breaks. It then uses the Groq API to generate a title and explanation for each chunk, returning timestamped topic summaries with cleaned text.

In [None]:
import re
import datetime

def format_timestamp(seconds):
    return str(datetime.timedelta(seconds=int(seconds)))[2:]  # Outputs MM:SS or HH:MM:SS

def chunk_with_timestamps(transcript, max_chars=1200, max_duration=180):
    chunks = []
    current = {"start": None, "end": None, "text": ""}
    sentence_end = re.compile(r'([.!?])')  # Sentence-ending punctuation

    for seg in transcript:
        if current["start"] is None:
            current["start"] = seg.start

        current["end"] = seg.start + seg.duration
        current["text"] += seg.text.strip() + " "

        current_duration = current["end"] - current["start"]

        # Split if max conditions reached
        if len(current["text"]) >= max_chars or current_duration >= max_duration:
            # Try to split at sentence boundaries
            sentences = sentence_end.split(current["text"].strip())
            if len(sentences) > 1:
                grouped = ["".join(pair) for pair in zip(sentences[::2], sentences[1::2])]
                partial = ""
                for s in grouped:
                    if len(partial) + len(s) <= max_chars:
                        partial += s + " "
                    else:
                        break
                current["text"] = partial.strip()

            chunks.append(current)
            current = {"start": None, "end": None, "text": ""}

    if current["text"]:
        chunks.append(current)

    return chunks

def clean_and_bullet_text(text):
    # Remove markdown chars *, _, `
    cleaned = re.sub(r'[\*\_`]+', '', text).strip()

    # Split into sentences (split by .!? + whitespace)
    sentences = re.split(r'(?<=[.!?])\s+', cleaned)

    # Filter out empty sentences and add bullets
    bullets = [f"- {s.strip()}" for s in sentences if s.strip()]

    return "\n".join(bullets)

def tag_chunks_with_topics(chunks, level):
    tagged = []
    for chunk in chunks:
        start = format_timestamp(chunk["start"])
        end = format_timestamp(chunk["end"])
        timestamp = f"{start}–{end}"

        topic_prompt = (
            f"Give a 1-line title and a 1-line explanation of the following programming content "
            f"for a {level} learner:\n\n{chunk['text']}"
        )
        result = ask_groq(topic_prompt, level)

        # Extract title (first line) and explanation (rest)
        title_match = re.search(r"^(.*?)\n", result.strip())
        title = title_match.group(1).strip() if title_match else result.strip().splitlines()[0]
        explanation = result.strip().replace(title, "", 1).strip()

        # Clean title formatting
        clean_title = re.sub(r'[\*\_`]+', '', title)

        # Clean and convert explanation to bullet points
        clean_explanation = clean_and_bullet_text(explanation)

        tagged.append({
            "timestamp": timestamp,
            "title": clean_title,
            "explanation": clean_explanation,
            "full_text": chunk["text"].strip()  # optional: keep raw text for reference
        })

    return tagged


**Quiz Parsing and Generation**

Parses raw quiz text into structured questions by extracting question text, options (A–D), and the correct answer using regular expressions.
Sends a quiz generation prompt to the Groq API using the video transcript and user level, then parses and returns the formatted quiz questions.

In [None]:
def parse_quiz(raw_text):
    questions = []
    blocks = re.split(r'\n(?=Q\d+:)', raw_text.strip())
    for block in blocks:
        q_match = re.search(r'Q\d+:\s*(.+)', block)
        options = re.findall(r'([A-D])\.\s*(.+)', block)
        answer = re.search(r'Answer\s*[:\-]?\s*([A-D])', block, re.IGNORECASE)
        if q_match and options and answer:
            questions.append({
                "question": q_match.group(1).strip(),
                "options": dict((opt[0], opt[1].strip()) for opt in options),
                "answer": answer.group(1).strip().upper()
            })
    return questions


def generate_full_video_quiz(full_text, level):
    quiz_prompt = f"""
You are a programming tutor. From the following transcript, generate **exactly 5 MCQs**.
Each must have 4 options (A-D) with 1 correct answer. Use this format:

Q1: <question>
A. <option>
B. <option>
C. <option>
D. <option>
Answer: <A/B/C/D>

Transcript:
{full_text[:4000]}
"""
    quiz_text = ask_groq(quiz_prompt, level)
    return parse_quiz(quiz_text)


**Topic Explorer**

 It displays a user interface for exploring specific video topics by timestamp or title. When the user types a keyword or timestamp, it searches the tagged transcript chunks and displays the full explanation for the matching topic, storing the selected topic in a global variable.

In [None]:


global_tagged_chunks = {"data": [], "selected_topic": None, "level": None}

def show_topic_explorer(tagged):
    input_box = widgets.Text(
        placeholder="Type timestamp or topic keyword (or 'no')",
        layout=widgets.Layout(width='70%')
    )
    explore_btn = widgets.Button(description="Explore Topic", button_style='info')
    output_area = widgets.Output()

    def on_explore_click(_):
        with output_area:
            clear_output(wait=True)
            query = input_box.value.strip().lower()
            if query == "no":
                print("No topic selected for deep dive.")
                return

            found = False
            for t in tagged:
                if query in t['timestamp'].lower() or query in t['title'].lower():
                    display(Markdown(f"### {t['timestamp']} — {t['title']}"))
                    print(t['explanation'])
                    print("\nFull Explanation from Transcript:\n")

                    # Raw text
                    text = t['full_text'].strip()

                    # Step 1: Remove ending junk phrases
                    ending_patterns = [
                        r"(that's a wrap.*?)$",
                        r"(thanks for watching.*?)$",
                        r"(like and subscribe.*?)$",
                        r"(keep practicing.*?)$",
                        r"(visit our website.*?)$",
                        r"(happy coding.*?)$",
                        r"(for more.*?)(?:$|\n)",
                        r"(don't forget.*?)$"
                    ]
                    for pattern in ending_patterns:
                        text = re.sub(pattern, "", text, flags=re.IGNORECASE | re.DOTALL)

                    # Step 2: Split into clean sentences
                    raw_sentences = re.split(r'(?<=[.?!])\s+', text)

                    # Step 3: Filter out noisy lines (too short or no ending punctuation)
                    clean_sentences = [
                        s.strip() for s in raw_sentences
                        if len(s.strip().split()) >= 3 and re.search(r'[.?!]$', s.strip())
                    ]

                    # Step 4: Join back for display
                    cleaned_text = " ".join(clean_sentences).strip()

                    # Output cleaned explanation
                    if cleaned_text:
                        print(cleaned_text)
                    else:
                        print("Explanation not available in complete form.")

                    global_tagged_chunks["selected_topic"] = t
                    found = True
                    break

            if not found:
                print("Topic not found. Try another timestamp or keyword.")

    explore_btn.on_click(on_explore_click)
    display(Markdown("###  Explore a topic in more detail:"), input_box, explore_btn, output_area)


**YouTube Video Explainer & Quiz Generator**

This code allows users to input YouTube links, select their experience level, and receive a summarized explanation, topic breakdown, and quiz. It uses transcript analysis and LLM-powered prompts to generate educational insights.

In [None]:
import ipywidgets as widgets
from IPython.display import display, Markdown, clear_output

# Initialize global store
global_tagged_chunks = {}

# UI Widgets
video_url = widgets.Text(
    placeholder="Paste one or more YouTube video URLs separated by commas",
    layout=widgets.Layout(width='60%')
)

level_dropdown = widgets.Dropdown(
    options=[('Beginner', 'Beginner'), ('Intermediate', 'Intermediate'), ('Advanced', 'Advanced')],
    value='Intermediate',
    description='Experience Level:',
    style={'description_width': 'initial'}
)

submit_btn = widgets.Button(description="Explain", button_style='success')

display(Markdown("##  Paste YouTube Links and Select Level"))
display(video_url, level_dropdown, submit_btn)

# Submit Handler
def on_submit(btn):
    clear_output(wait=True)  #  Clear previous output

    # Redisplay input widgets
    display(Markdown("##  Paste YouTube Links and Select Level"))
    display(video_url, level_dropdown, submit_btn)

    urls = [u.strip() for u in video_url.value.strip().split(',') if u.strip()]
    user_level = level_dropdown.value

    if not urls:
        print(" Please enter at least one valid YouTube URL.")
        return

    all_tagged = []
    combined_text = ""
    all_summaries = []
    processed_videos_count = 0 # Counter for successfully processed videos

    for idx, url in enumerate(urls):
        video_id = extract_video_id(url)
        if not video_id:
            print(f" Error for video {idx+1}: Invalid YouTube URL.")
            continue

        transcript_data, error = get_transcript_for_video(video_id)
        if error:
            print(f" Error for video {idx+1}: {error}")
            continue

        print(f"\n Transcript fetched successfully for Video {idx+1}!")
        full_text = " ".join([seg.text for seg in transcript_data])
        combined_text += full_text + "\n\n"

        summary = ask_groq(
            f"Summarize this programming tutorial for a {user_level} learner:\n\n{full_text}",
            user_level
        )
        all_summaries.append(summary)

        print(f"\n SUMMARY for Video {idx+1} ({user_level}):\n{summary}")

        chunks = chunk_with_timestamps(transcript_data)
        tagged = tag_chunks_with_topics(chunks, user_level)
        all_tagged.extend(tagged)

        print(f"\n TOPIC BREAKDOWN WITH TIMESTAMPS (Video {idx+1}):\n")
        for t in tagged:
            print(f"{t['timestamp']} — {t['title']}\n{t['explanation']}\n")

        processed_videos_count += 1 # Increment counter for successful video

    if not combined_text:
        print(" No valid videos processed.")
        return

    # Unified summary only if multiple videos were successfully processed
    if processed_videos_count > 1:
      print("\n Generating unified comparative summary from multiple videos...")
      merged_summary = ask_groq(
          f"""These are summaries of Python programming videos on the same topic.
Please generate a single explanation by selecting the best parts, avoiding overlaps or repetition.
Target level: {user_level}.\n\n""" + "\n\n".join(all_summaries),
          user_level
      )
      print(f"\n UNIFIED COMPARATIVE SUMMARY ({user_level}):\n{merged_summary}")


    # Store topic data for explorer and quiz
    global_tagged_chunks["data"] = all_tagged
    global_tagged_chunks["level"] = user_level

    show_topic_explorer(all_tagged)

    # Quiz Generation
    quiz_questions = generate_full_video_quiz(combined_text, user_level)
    if not quiz_questions:
        print(" Could not generate quiz.")
        return

    quiz_index = 0
    score = 0
    user_answers = []
    quiz_output = widgets.Output()
    answer_box = widgets.Text(placeholder="Your answer (A/B/C/D)", layout=widgets.Layout(width='30%'))
    submit_answer = widgets.Button(description="Submit Answer", button_style="primary")

    def show_question(index):
        quiz_output.clear_output()
        with quiz_output:
            if index < len(quiz_questions):
                q = quiz_questions[index]
                print(f"\n Q{index+1}: {q['question']}")
                for k, v in q['options'].items():
                    print(f"  {k}. {v}")
            else:
                print(f"\n Quiz Completed!\n Your Score: {score}/{len(quiz_questions)}")
                print("\n Review Your Answers:")
                for idx, q in enumerate(quiz_questions):
                    user_ans = user_answers[idx]
                    correct_ans = q["answer"]
                    correctness = " Correct" if user_ans == correct_ans else f" Incorrect (Correct: {correct_ans})"
                    print(f"\nQ{idx + 1}: {q['question']}")
                    for opt_key, opt_text in q['options'].items():
                        print(f"  {opt_key}. {opt_text}")
                    print(f"Your Answer: {user_ans} — {correctness}")

    def on_answer_submit(_):
        nonlocal quiz_index, score
        user_ans = answer_box.value.strip().upper()
        if not user_ans or quiz_index >= len(quiz_questions):
            return
        current_q = quiz_questions[quiz_index]
        user_answers.append(user_ans)
        quiz_output.clear_output()
        with quiz_output:
            if user_ans == current_q['answer']:
                print(" Correct!")
                score += 1
            else:
                print(f" Incorrect. Correct Answer: {current_q['answer']}: {current_q['options'][current_q['answer']]}")
            quiz_index += 1
            show_question(quiz_index)

    submit_answer.on_click(on_answer_submit)
    display(Markdown("###  Answer the Quiz"), answer_box, submit_answer, quiz_output)
    show_question(quiz_index)

# Bind the button at the end
submit_btn.on_click(on_submit)

##  Paste YouTube Links and Select Level

Text(value='https://youtu.be/Gf9wLsCJDqc?si=HbKNII0xlkxidF8H', layout=Layout(width='60%'), placeholder='Paste …

Dropdown(description='Experience Level:', index=1, options=(('Beginner', 'Beginner'), ('Intermediate', 'Interm…

Button(button_style='success', description='Explain', style=ButtonStyle())


 Transcript fetched successfully for Video 1!

 SUMMARY for Video 1 (Intermediate):
Here's a summary of the tutorial on Python variables for an intermediate learner:

**Key Takeaways:**

1. **Dynamic typing**: Python variables don't need to be declared with a specific type. They can hold different types of data, such as integers, strings, or floats.
2. **Assigning values**: Create a variable by assigning a value to it, e.g., `x = 5` or `y = 'John'`.
3. **Casting**: Use casting to specify the data type of a variable, e.g., `x = int(4.5)` to convert a float to an integer.
4. **Type checking**: Use the `type()` function to check the type of a variable, e.g., `type(x)` returns `str`.
5. **String declaration**: Strings can be declared with either single or double quotes, e.g., `'hello'` or `"hello"`.
6. **Case sensitivity**: Python variable names are case-sensitive, meaning `a` and `A` are different variables.

**Best Practices:**

1. Use meaningful variable names that describe their purpo

###  Explore a topic in more detail:

Text(value='', layout=Layout(width='70%'), placeholder="Type timestamp or topic keyword (or 'no')")

Button(button_style='info', description='Explore Topic', style=ButtonStyle())

Output()

###  Answer the Quiz

Text(value='', layout=Layout(width='30%'), placeholder='Your answer (A/B/C/D)')

Button(button_style='primary', description='Submit Answer', style=ButtonStyle())

Output()

**Code Debugger**

 Upload or paste Python code and get explanations or fixes using a language model. It customizes responses based on the user's selected skill level.

In [None]:
from IPython.display import display, Markdown, Code, clear_output
import ipywidgets as widgets

# --- UI Elements ---
code_box = widgets.Textarea(
    value='',
    placeholder='Paste your Python code here...',
    description='Code:',
    layout=widgets.Layout(width='100%', height='200px')
)

upload = widgets.FileUpload(
    accept='.py',
    multiple=False,
    description='Upload .py File',
    style={'description_width': 'initial'}
)

prompt_box = widgets.Text(
    value='',
    placeholder='Optional: Explain, debug, optimize...',
    description='Prompt:',
    layout=widgets.Layout(width='100%')
)

level_dropdown = widgets.Dropdown(
    options=['Beginner', 'Intermediate', 'Advanced'],
    value='Intermediate',
    description='Level:',
    style={'description_width': 'initial'}
)

run_button = widgets.Button(description="Run", button_style='success')
clear_button = widgets.Button(description="Clear", button_style='warning')

output = widgets.Output()
markdown_output = widgets.Output()

# --- Layout ---
ui = widgets.VBox([
    markdown_output,
    code_box,
    upload,
    prompt_box,
    widgets.HBox([level_dropdown, run_button, clear_button]),
    output
])
display(ui)

# --- Markdown Intro ---
with markdown_output:
    display(Markdown("###  Paste Python code or upload a `.py` file for explanation/debugging"))

# --- File Upload Handler ---
def on_upload_change(change):
    if upload.value:
        uploaded_file = next(iter(upload.value.values()))
        file_content = uploaded_file['content'].decode('utf-8')
        code_box.value = file_content
        upload.value.clear()  # Reset upload

upload.observe(on_upload_change, names='value')

# --- Prompt Generator ---
def get_default_prompt(level):
    return {
        "Beginner": "Explain this code in simple language, point out errors, and suggest improvements.",
        "Intermediate": "Debug this code, explain its purpose, and provide enhancements if needed.",
        "Advanced": "Perform an in-depth analysis of this code, optimize it, and address edge cases."
    }.get(level, "Explain this code and fix any errors.")

def build_prompt(code, prompt, level):
    if not prompt:
        prompt = get_default_prompt(level)
    return f"{prompt}\n\n```python\n{code}\n```"

# --- Run Handler ---
def run_debugger(_):
    output.clear_output()
    code_to_debug = code_box.value.strip()
    user_instruction = prompt_box.value.strip()
    user_level = level_dropdown.value

    if not code_to_debug:
        with output:
            print(" Please paste code or upload a .py file.")
        return

    full_prompt = build_prompt(code_to_debug, user_instruction, user_level)

    with output:
        print(" Asking CodeSensei...")
        response = ask_groq(full_prompt, user_level)  # assumes Groq API wrapper is defined
        clear_output(wait=True)
        display(Markdown("####  Your Code"))
        display(Code(code_to_debug, language='python'))

        # Accordion with feedback
        accordion = widgets.Accordion(children=[widgets.Output()])
        with accordion.children[0]:
            # Removed Prompt Used line
            display(Markdown(f"**Skill Level:** {user_level}"))
            print(response)
        accordion.set_title(0, " CodeSensei's Feedback")
        display(accordion)


# --- Clear Handler ---
def clear_ui(_):
    code_box.value = ""
    prompt_box.value = ""
    upload.value.clear()
    output.clear_output()
    markdown_output.clear_output()
    with markdown_output:
        display(Markdown("###  Paste Python code or upload a `.py` file for explanation/debugging"))

# --- Bind Events ---
run_button.on_click(run_debugger)
clear_button.on_click(clear_ui)


VBox(children=(Output(), Textarea(value='', description='Code:', layout=Layout(height='200px', width='100%'), …