# Imports

In [1]:
%pip install -qU langchain-huggingface langchain-ollama langchain-core langchain-community langchain faiss-cpu json-repair transformers langchain-google-genai

[0mNote: you may need to restart the kernel to use updated packages.


In [1]:
import gdown

def download_drive(file_id, file_name, output_path= '/kaggle/working/'):
    # Download the file
    url = f"https://drive.google.com/uc?id={file_id}&export=download"
    file = gdown.download(url, os.path.join(output_path, file_name), quiet=True, )
    
    print(f"File downloaded and saved as {file}")
    return file

In [2]:
# Import modules
from pydantic import BaseModel, Field
from typing import List
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline, ChatHuggingFace
from langchain_community.vectorstores import FAISS
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import AIMessage, SystemMessage, HumanMessage
from langchain_core.output_parsers import StrOutputParser, PydanticOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableLambda
import json_repair
import re

2026-01-20 12:37:43.059420: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1768912663.283673     132 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1768912663.338223     132 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


# We can use the ranked ideas we already have

In [3]:
# Declare models
class MainIdea(BaseModel):
    name: str = Field(..., description="Main idea name")
    summary: str = Field(..., description="Detailed full-sentence summary explaining the concept, its relevance, any examples or applications, its connections to other ideas, and its role in understanding the material.")

class MainIdeas(BaseModel):
    MainIdeas: List[MainIdea]

class Ranking(BaseModel):
    ranking: List[int] = Field(..., description='Rank corresponding to each idea')

In [4]:
# Declare ranked_ideas
ranked_ideas = [
    MainIdea(name='Basic Forms of the Past Simple Tense (Active Voice)', summary="The Past Simple tense in the active voice is formed by using the subject followed by the past form (second form) of the verb for affirmative statements, such as 'Dalia travelled to Aswan a week ago.' For interrogative (questions), it uses 'Did + subject + infinitive,' as in 'Did Maher invite you to the party last Friday?' or 'Wh-word + did + subject + infinitive' for Wh-questions like 'What did you study last night?' The negative form follows the structure 'Subject + didn’t + infinitive,' exemplified by 'My son didn’t use mobile phones 10 years ago,' indicating a completed action that did not occur in the past."),
    MainIdea(name='Past Simple Passive Voice', summary="The Past Simple tense in the passive voice is constructed using the object of the action, followed by 'was' or 'were,' and then the past participle (P.P.) of the main verb, to describe an action that was performed on the object in the past, such as 'The pyramids were built by the ancient Egyptians.' This form emphasizes the recipient of the action rather than the performer."),
    MainIdea(name="Past Simple of the Verb 'To Be'", summary="The verb 'to be' has unique past simple forms: 'was' for singular subjects (I, he, she, it) and 'were' for plural subjects (you, we, they), which are used to describe states of being or conditions in the past. These forms are applicable in affirmative statements, negative constructions (e.g., 'I wasn't at school'), and questions (e.g., 'Were you at home?'), as illustrated by 'I was at school last Monday.'"),
    MainIdea(name="Expressing Past Possession with 'Had'", summary="Past possession is expressed using the verb 'had' (the past simple of 'to have'), which indicates ownership or possession in the past. For affirmative statements, the structure is 'Subject + had + object,' as in 'I had a nice bike when I was young,' while the negative form uses 'Subject + didn’t have + object,' such as 'I didn’t have a bike when I was young.'"),
    MainIdea(name='Core Uses of the Past Simple Tense', summary="The Past Simple tense is primarily used to describe actions that started and finished at a specific time in the past, often indicated by time expressions like 'yesterday' or 'a week ago,' as in 'It rained heavily yesterday.' It also narrates a sequence of completed past actions, recounting events in stories (e.g., 'Jehan wiped the table after dinner, took a shower and went to bed'), and provides specific details about events that occurred after an action described by the present perfect tense, such as 'I’ve hurt my leg. I fell off a ladder.' Additionally, it can describe past habits or repeated actions, sometimes using frequency adverbs (e.g., 'He always visited us when he was young') or the phrase 'used to' (e.g., 'When I was young, I used to play in the street'), and can indicate the duration of a past action using 'for,' as in 'My father went to a primary school for 6 years.'"),
    MainIdea(name='Hypothetical and Advanced Uses of the Past Simple Tense', summary="Beyond its primary function, the Past Simple tense is employed in several advanced and hypothetical constructions. It is used after 'would rather' with a subject to express a present preference (e.g., 'I’d rather you went to school today'), and forms the 'if' clause of the second conditional to describe unlikely or hypothetical present/future situations (e.g., 'If I discovered a new planet, I’d give it my name'). Furthermore, it follows 'wish' and 'if only' to express wishes or regrets about the present (e.g., 'I wish I lived near my school'), and is used with 'supposing/suppose' to imagine unlikely or unreal scenarios (e.g., 'Suppose you met the president, what would you ask him to do?'). It also appears after 'as if/as though' for comparisons (e.g., 'Ahmed treats us as if he were a king') and after 'It's time' or 'It's high time' to indicate that an action should happen immediately (e.g., 'It’s time you went to bed')."),
    MainIdea(name='Associated Time Expressions for the Past Simple Tense', summary="The Past Simple tense is frequently accompanied by specific time expressions that clearly indicate a completed action in the past, helping to establish the timeframe of the event. Common associated time expressions include 'yesterday,' 'last + period' (e.g., last week, last year), 'period + ago' (e.g., an hour ago, a month ago), 'in + year' (e.g., in 2002), 'the other day,' and 'a month ago,' all of which signal that the action occurred and concluded at a definite point in the past."),
    MainIdea(name='Interactions of Past Simple with Other Past Tenses', summary="The Past Simple tense often interacts with other past tenses to describe complex sequences of events. It is used for a short, completed action that interrupts an ongoing action described by the Past Continuous tense, as seen in 'While Ali was climbing the ladder, he fell down,' where 'fell down' is the interrupting action. Additionally, it describes the second event in a sequence when the first event has already been completed, with the first event typically expressed in the Past Perfect tense, such as 'After he had booked the ticket, he took the train,' illustrating a clear chronological order of past actions.")
    ]

## BUILD RETRIEVAL INDEX

In [70]:
# Read the document
# with open('/content/01 - u1_Past Simple.txt', 'r') as file:
#     doc = file.read()

doc = '''Past Simple General Revision

Form:
Subj. + past form (second form) of the verb

Examples:
- Dalia travelled to Aswan a week ago.
- Yahya Haqqi wrote many interesting short stories.

Interrogative:
Did + subj. + inf.?/Wh-word + did + subj. + inf.?

Examples:
- Did Maher invite you to the party last Friday?
- What did you study last night?

Negative:
Subj. + didn’t + inf.

Examples:
- My son didn’t use mobile phones 10 years ago. 

Passive:
Obj. + was/were + P.P.

Examples:
- The pyramids were built by the ancient Egyptians.

Usage:
1-
 Explanation: An action that happened and finished in the past and is indicated by time expressions
 Example: It rained heavily yesterday.
2-
 Explanation: An action that happened and finished in the past without explicit time markers (historical past)
 Example: Graham Bell invented the telephone.
3-
 Explanation: To show a sequence of completed actions in the past
 Example: Jehan wiped the table after dinner, took a shower and went to bed.

 Explanation: Past habits
 Example: People in the past travelled on camels.
 Example: When I was on holiday, I went to the beach every day.
4-
 Explanation: We use "used to" for past habits; it is not used for single recent events that were not habits
 Example: When I was young, I used to play (played) in the street. ✓
 Example: He went to the sports club three times last week. ✓
5-
 Explanation: To give details of events after the present perfect
 Example: I’ve hurt my leg. I fell off a ladder.
6-
 Explanation: For narrating events in stories
 Example: Once, we saw a man in ... .
 Example: He sat down and ordered a coffee.
7-
 Explanation: The past simple is used with the following time expressions
 Example: Yesterday, last + period, period + ago, in 2002, the other day, a month ago, etc.

Note:
1-
 Explanation: Use the same frequency adverbs as with the present tense when appropriate
 Example: He always visited us when he was young.
2-
 Explanation: "For" can be used with the past simple if the action lasted a specified period in the past
 Example: My father went to a primary school for 6 years.
3-
 Explanation: Use "was/were" for the past of the verb "to be" in affirmative, negative and question forms
 Example: I was at school last Monday.
 Example: I wasn’t at school last Monday.
 Example: Were you at school last Monday?
4-
 Explanation: Use "had" in the past to indicate possession
 Example: I had a nice bike when I was young.
 Example: I didn’t have a bike when I was young.

Other uses:
 Explanation: The past simple is used in the following situations
1-
 Explanation: After "would rather" + past with a subject to express a present preference
 Example: I’d rather you went to school today.
2-
 Explanation: In the second conditional to express hypothetical or unlikely present/future situations
 Example: If I discovered a new planet, I’d give it my name.
3-
 Explanation: After "wish" and "if only" to express a wish or regret about the present
 Example: I wish I lived near my school.
4-
 Explanation: After "supposing/suppose" to imagine something unlikely or unreal
 Example: Suppose you met the president, what would you ask him to do?
5-
 Explanation: After "as if/as though" to compare or liken someone/something to someone/something else
 Example: Ahmed treats us as if he were a king.
6-
 Explanation: For a short/completed action that interrupts another ongoing action (past continuous)
 Example: While Ali was climbing the ladder, he fell down.
7-
 Explanation: For the second event when the first event is already completed (past perfect + past simple)
 Example: After he had booked the ticket, he took the train.
8-
 Explanation: After "It’s time/It’s high time" use the past simple to indicate an action that should happen now
 Example: It’s time you went to bed.'''
 
# Split into chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
chunks = splitter.split_text(doc)

print(f"Document split into {len(chunks)} chunks")


Document split into 16 chunks


In [71]:
embedding_model_id = "Qwen/Qwen3-Embedding-0.6B"
embedding = HuggingFaceEmbeddings(model= embedding_model_id)

In [72]:
vectorstore = FAISS.from_texts(chunks, embedding)

In [73]:
retriever = vectorstore.as_retriever(search_type= 'mmr', kwargs= {'k': 3, 'similarity_threshold': 0.7})

In [74]:
passages = []
for idea in ranked_ideas:
    top_chunks = retriever.invoke(idea.name)
    passages.append(top_chunks)
    
    print('# Summary:\n', idea.summary)
    print('# Chunks')
    print('\n# '.join([chunk.page_content for chunk in top_chunks]))
    print('\n'*3)

# Summary:
 The Past Simple tense in the active voice is formed by using the subject followed by the past form (second form) of the verb for affirmative statements, such as 'Dalia travelled to Aswan a week ago.' For interrogative (questions), it uses 'Did + subject + infinitive,' as in 'Did Maher invite you to the party last Friday?' or 'Wh-word + did + subject + infinitive' for Wh-questions like 'What did you study last night?' The negative form follows the structure 'Subject + didn’t + infinitive,' exemplified by 'My son didn’t use mobile phones 10 years ago,' indicating a completed action that did not occur in the past.
# Chunks
Past Simple General Revision

Form:
Subj. + past form (second form) of the verb

Examples:
- Dalia travelled to Aswan a week ago.
- Yahya Haqqi wrote many interesting short stories.

Interrogative:
Did + subj. + inf.?/Wh-word + did + subj. + inf.?
# Usage:
1-
 Explanation: An action that happened and finished in the past and is indicated by time expressions


## QUESTION GENERATION

In [10]:
import gc
import torch

def clean_cache():
    gc.collect()
    torch.cuda.empty_cache()

In [11]:
import os
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
from huggingface_hub import snapshot_download


In [12]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
GOOGLE_API_KEY = user_secrets.get_secret("google")
HF_TOKEN = user_secrets.get_secret("huggingface")
os.environ['GOOGLE_API_KEY'] = GOOGLE_API_KEY
os.environ['HF_TOKEN'] = HF_TOKEN
os.environ['TRANSFORMERS_VERBOSITY'] = 'info'

In [18]:
del hf_pipe, llm
clean_cache()

In [19]:
# Initialize ChatLLM
model_id = "Qwen/Qwen3-4B-Instruct-2507"
hf_pipe = HuggingFacePipeline.from_model_id(
    model_id= model_id,
    task= "text-generation",
    device_map='auto',
    pipeline_kwargs=dict(
        do_sample=False,
    ),
)
llm = ChatHuggingFace(llm=hf_pipe)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Device set to use cpu


In [59]:
from langchain_ollama import ChatOllama

llm = ChatOllama(
    base_url="https://59eafcb41c6c.ngrok-free.app",
    model="qwen2.5:7b-instruct-q6_K",
    temperature=0.7,
)

In [75]:
class MCQ(BaseModel):
    question_statement: str = Field(..., description="Short story with a missing word/idiom to be filled.")
    correct_answer: str = Field(..., description="Correct word/idiom completing the gap.")
    plausible_distractors: List[str] = Field(..., description="Three contextually appropriate distractors reflecting common misunderstandings; not contradictory or irrelevant.", min_length=3, max_length=3)
    explanation: str = Field(..., description="Why the correct answer fits; brief reasoning.")

class MultiMCQ(BaseModel):
    questions: List[MCQ] = Field(..., description="List of MCQ Questions")

q_parser = PydanticOutputParser(pydantic_object=MultiMCQ)

In [None]:
question_prompt = ChatPromptTemplate.from_messages([
    ("system",
        '\n'.join(["Instructions:",
                   "Based on the given main idea and its relevant passages, create {num_questions} multiple-choice questions that require deep understanding, critical thinking, and detailed analysis.",
                   "The question should be a situation, quote or speech with a missing area, and the choices are possible fillings, the correct one is what fills the empty area according to the content"
                   "Do not use the phrases 'main idea' or 'passages' in the question statement. Instead, directly address the content or concepts described.",
                   "Provide four answer choices for each question:",
                   "Note: The questions should be focused on one concept and not very long, DO NOT ask multiple questions in one.",
                   ""+q_parser.get_format_instructions().replace('{', '{{').replace('}', '}}')
                   ])),

    ("human",
        '\n'.join([
            "# Main Idea:",
            "{main_idea}",
            "# Passages:",
            "{passages}",
        ]))
])

question_chain = question_prompt | llm | q_parser

In [77]:
questions_per_idea = []
number_questions = 3

for i, (idea, psgs) in enumerate(zip(ranked_ideas, passages), 1):

    content = '\n'.join([psg.page_content for psg in psgs])
    q_output = question_chain.invoke(input={
        "num_questions":number_questions,
        "main_idea": idea.name,
        "passages": psgs,
    })

    print(f"\n=== Questions for Idea Rank {i} ===\n")
    display(q_output.model_dump())

    questions_per_idea.append(q_output)
    break


=== Questions for Idea Rank 1 ===



{'questions': [{'question_statement': 'Which of the following is a correct example of the Past Simple tense in active voice?',
   'correct_answer': 'Dalia travelled to Aswan a week ago.',
   'plausible_distractors': ['Yahya Haqqi wrote many interesting short stories.',
    'It rained heavily yesterday.',
    'I would rather you went to school today.'],
   'explanation': "The correct example is 'Dalia travelled to Aswan a week ago.' because it shows an action that happened and finished in the past, which is the definition of Past Simple tense in active voice."},
  {'question_statement': "What does the Past Simple tense indicate about actions in the sentence 'My father went to a primary school for 6 years.'?",
   'correct_answer': 'An action that happened and finished in the past without explicit time markers (historical past).',
   'plausible_distractors': ['An action that happened and finished in the past and is indicated by time expressions.',
    "After 'would rather' + past with a s