In [None]:
import os
from openai import OpenAI
from dotenv import load_dotenv

In [5]:
load_dotenv(override=True)

openai = OpenAI(
    base_url="https://openrouter.ai/api/v1", api_key=os.getenv("OPENAI_API_KEY")
)

In [None]:
from youtube_transcript_api import YouTubeTranscriptApi

def get_transcript(video_id: str, max_minutes: int) -> str:
    """
    Fetch transcript from YouTube video and join into one text.
    :param video_id: YouTube video ID (e.g. "dQw4w9WgXcQ")
    :return: Full transcript as a single string
    """
    ytt_api = YouTubeTranscriptApi()
    data = ytt_api.fetch(video_id)
    transcript = data.to_raw_data()
    
    max_seconds = max_minutes * 60
    limited_transcript = [
        entry['text'] 
        for entry in transcript 
        if entry['start'] <= max_seconds and entry['text'].strip() != ''
    ]
    
    full_text = " ".join(limited_transcript)
    
    return full_text

video_id = "-moW9jvvMr4" 
transcript_text = get_transcript(video_id, 10)

print(transcript_text)


When I was first learning to meditate, the instruction was to simply
pay attention to my breath, and when my mind wandered,
to bring it back. Sounded simple enough. Yet I'd sit on these silent retreats, sweating through T-shirts
in the middle of winter. I'd take naps every chance I got
because it was really hard work. Actually, it was exhausting. The instruction was simple enough but I was missing something
really important. So why is it so hard to pay attention? Well, studies show that even when we're really
trying to pay attention to something -- like maybe this talk -- at some point, about half of us
will drift off into a daydream, or have this urge
to check our Twitter feed. So what's going on here? It turns out that we're fighting one
of the most evolutionarily-conserved learning processes
currently known in science, one that's conserved back to the most basic
nervous systems known to man. This reward-based learning process is called positive
and negative reinforcement, and basica

In [43]:
system_prompt = """
You are an AI English Teaching Assistant specializing in grammar and vocabulary.  
Your task: process English subtitles from a YouTube conversation and create a quiz.  

Instructions:
1. Identify important **grammar structures** (tenses, prepositions, articles, etc.) and **vocabulary** (meaning, usage, collocations).  
2. Generate **multiple-choice questions (MCQs)** in this JSON format:
   {
     "question": "...",
     "options": ["A", "B", "C", "D"],
     "correct_answer": "A",
     "explanation": "..."
   }
3. Rules:
   - Use **natural English only** (no Vietnamese).  
   - Each MCQ must relate to the transcript.  
   - Focus only on **grammar and vocabulary** (no cultural/context questions).  
   - Explanations must be clear, like a teacher explaining to students.  
   - You MUST create **at least 10 MCQs** (preferably 12–15).  
   - Ensure the output is a **valid JSON array** that matches the Pydantic model.  
"""


In [None]:
from pydantic import BaseModel
from typing import List, Literal

class QuizItem(BaseModel):
    question: str
    options: List[str] 
    correct_answer: Literal["A", "B", "C", "D"]
    explanation: str

class QuizSet(BaseModel):
    questions: List[QuizItem]

In [48]:
def chat(transcript_text) -> QuizSet:
    messages = [{"role": "system", "content": system_prompt},
                {"role": "user", "content": transcript_text}]
    response = openai.beta.chat.completions.parse(
        model="deepseek/deepseek-r1:free",
        messages=messages,
        response_format=QuizSet
    )
    return response.choices[0].message.parsed


In [49]:
response = chat(transcript_text)

In [50]:
print(response)

questions=[QuizItem(question='The instruction was to pay attention to my breath, and when my mind wandered, to ______.', options=['A. bring it back', 'B. let it wander', 'C. stop meditating', 'D. take a nap'], correct_answer='A', explanation="The correct answer is 'bring it back' as the transcript states the instruction was to bring the mind back when it wanders."), QuizItem(question='The speaker used ______ as an example of stress-triggered habits.', options=['A. exercising', 'B. drinking water', 'C. eating chocolate', 'D. reading books'], correct_answer='C', explanation='Eating chocolate is mentioned as a behavior learned through the reward process when feeling bad.'), QuizItem(question="What grammatical structure dominates in 'we've gone from learning to survive to literally killing ourselves'?", options=['A. Present perfect continuous', 'B. Past simple', 'C. Present perfect', 'D. Future perfect'], correct_answer='C', explanation="The phrase uses present perfect tense ('have gone') 

In [2]:
from dotenv import load_dotenv
import os
from ollama import Client

In [3]:
load_dotenv(override=True)

client = Client(
    host="https://ollama.com",
    headers={'Authorization': os.getenv("OLLAMA_API_KEY")}
)

messages = [
  {'role': 'user', 'content': 'Why is the sky blue?'},
]

# lấy phản hồi, có thể stream nếu muốn
for part in client.chat('gpt-oss:120b', messages=messages, stream=True):
    print(part['message']['content'], end='', flush=True)


The sky looks blue because of a phenomenon called **Rayleigh scattering**.

### How it works

1. **Sunlight is made of many colors.**  
   Sunlight (white light) is actually a mixture of all the visible wavelengths, from short‑wavelength blue/violet light (~400 nm) to long‑wavelength red light (~700 nm).

2. **The Earth's atmosphere is full of tiny particles.**  
   Molecules of nitrogen, oxygen, and other gases are much smaller than the wavelength of visible light.

3. **Shorter wavelengths scatter more.**  
   When light encounters these tiny particles, it gets redirected (scattered) in many directions. The amount of scattering is inversely proportional to the fourth power of the wavelength (∝ 1/λ⁴). That means:
   - Blue light (≈ 450 nm) is scattered about **10 times** more than red light (≈ 650 nm).
   - Violet light scatters even more, but our eyes are less sensitive to violet and some of it is absorbed by the upper atmosphere.

4. **We see the scattered light.**  
   From any poi