In [None]:
# lab preparation: match torch and torchtext versions

In [14]:
# Summary of Lab: 
# 1. Input Tutoring Session

# 2. Text Preprocessing with TorchText
# The text is tokenized 
# Pretrained GloVe word embeddings loading

# 3. Using GPT-2 for Text Generation
# A prompt is created that includes the tutoring session and a request
# The model generates a response based on the prompt using beam search.
# The result is printed


In [15]:
# --------------------------
# 0. Import libraries
# --------------------------
import torch
from torchtext.vocab import GloVe
from torchtext.data.utils import get_tokenizer
from transformers import GPT2LMHeadModel, GPT2Tokenizer

In [16]:
# --------------------------
# 1. Sample Tutoring Session
# --------------------------
session_text = """
Today, we worked on solving quadratic equations using the quadratic formula.
The student struggled with identifying the coefficients a, b, and c from the equation format.
We practiced several examples and focused on improving accuracy with signs under the square root.
"""

In [17]:
# --------------------------
# 2. TorchText Preprocessing
# --------------------------
tokenizer = get_tokenizer("basic_english")
tokens = tokenizer(session_text)

# Load pretrained GloVe embeddings
glove = GloVe(name='6B', dim=100)

# Filter to tokens with vectors in vocab
token_vectors = [glove[token] for token in tokens if token in glove.stoi]

# Combine vectors into a single embedding (e.g., mean-pooling)
if token_vectors:
    session_embedding = torch.stack(token_vectors).mean(dim=0)
else:
    session_embedding = torch.zeros(100)  # fallback if empty

print(f"Session embedding vector shape: {session_embedding.shape}")  # 100-dim vector


Session embedding vector shape: torch.Size([100])


In [18]:
# --------------------------
# 3. Use an LLM for Generation
# --------------------------
llm_tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
llm_model = GPT2LMHeadModel.from_pretrained("gpt2")
llm_model.eval()

# Add prompt for the model
prompt = (
    "Tutoring session summary:\n"
    f"{session_text}\n"
    "Please suggest exercises to improve understanding of identifying the coefficients for quadratic equations.\n" 
    "\n"
    "Summary and question:"
)

# Encode the prompt and generate
inputs = llm_tokenizer(prompt, return_tensors="pt")
with torch.no_grad():
    outputs = llm_model.generate(
        **inputs,
        max_new_tokens=100,
        num_beams=5,
        no_repeat_ngram_size=2,
        early_stopping=True
    )

generated = llm_tokenizer.decode(outputs[0], skip_special_tokens=True)
print("\n--- GPT-2 Output ---\n")
print(generated)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



--- GPT-2 Output ---

Tutoring session summary:

Today, we worked on solving quadratic equations using the quadratic formula.
The student struggled with identifying the coefficients a, b, and c from the equation format.
We practiced several examples and focused on improving accuracy with signs under the square root.

Please suggest exercises to improve understanding of identifying the coefficients for quadratic equations.

Summary and question: What is the best way to identify the coefficient for a given equation? What are the most common ways to do this? How can we improve the accuracy of the student's understanding? If you have any questions, please feel free to contact us.
