In [27]:
import warnings
warnings.filterwarnings('ignore')

In [1]:
import torch
from torchtext.vocab import GloVe
from torchtext.data.utils import get_tokenizer
from transformers import GPT2Tokenizer
from transformers import GPT2LMHeadModel


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.0.2 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/runpy.py", line 197, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "/Users/linkedin/Library/Python/3.9/lib/python/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/Users/li

In [2]:
import torch; print(torch.__version__)
import torchtext; print(torchtext.__version__)
import transformers
print(transformers.__version__)

2.2.0
0.17.0
4.36.2


In [3]:
# Step 1: Sample dataset
texts = [
    "The weather today is sunny and bright.",
    "Machine learning is transforming the world.",
    "PyTorch is a popular deep learning framework.",
]

In [4]:
# Step 3: Use GPT-2 (LLM) for text generation from a prompt
# Use Hugging Face tokenizer and model for generation
gpt2_tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
gpt2_model = GPT2LMHeadModel.from_pretrained("gpt2")
gpt2_model.eval()



GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [5]:
# Prepare prompt
prompt = "Machine learning is amazing because"
inputs = gpt2_tokenizer(prompt, return_tensors="pt")

In [6]:
# Generate continuation
with torch.no_grad():
    outputs = gpt2_model.generate(
        inputs["input_ids"],
        max_new_tokens=50,
        do_sample=True,
        top_k=50,
        top_p=0.95,
        temperature=0.8
    )

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [7]:
generated_text = gpt2_tokenizer.decode(outputs[0], skip_special_tokens=True)
print("\n💬 Generated Text:")
print(generated_text)


💬 Generated Text:
Machine learning is amazing because it is a very fast and very simple algorithm. It can compute new things in seconds. And the more things you learn, the faster and simpler it becomes.

One of the things we've been working on is the use of algorithms for learning


In [20]:
import torch
from torchtext.vocab import GloVe
from torchtext.data.utils import get_tokenizer
from transformers import GPT2LMHeadModel, GPT2Tokenizer

# --------------------------
# 1. Sample Tutoring Session
# --------------------------
session_text = """
Today, we worked on solving quadratic equations using the quadratic formula.
The student struggled with identifying the coefficients a, b, and c from the equation format.
We practiced several examples and focused on improving accuracy with signs under the square root.
"""

# --------------------------
# 2. TorchText Preprocessing
# --------------------------
tokenizer = get_tokenizer("basic_english")
tokens = tokenizer(session_text)

# Load pretrained GloVe embeddings
glove = GloVe(name='6B', dim=100)

# Filter to tokens with vectors in vocab
token_vectors = [glove[token] for token in tokens if token in glove.stoi]

# Combine vectors into a single embedding (e.g., mean-pooling)
if token_vectors:
    session_embedding = torch.stack(token_vectors).mean(dim=0)
else:
    session_embedding = torch.zeros(100)  # fallback if empty

print(f"Session embedding vector shape: {session_embedding.shape}")  # 100-dim vector

# --------------------------
# 3. Use an LLM for Generation
# --------------------------
llm_tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
llm_model = GPT2LMHeadModel.from_pretrained("gpt2")
llm_model.eval()

# Add prompt for the model
prompt = (
    "Tutoring session summary:\n"
    f"{session_text}\n\n"
    "Please summarize this session and suggest a next question for the student.\n"
    "Summary and question:"
)

# Encode the prompt and generate
inputs = llm_tokenizer(prompt, return_tensors="pt")
with torch.no_grad():
    outputs = llm_model.generate(
        **inputs,
        max_new_tokens=100,
        num_beams=5,
        no_repeat_ngram_size=2,
        early_stopping=True
    )

generated = llm_tokenizer.decode(outputs[0], skip_special_tokens=True)
print("\n--- GPT-2 Output ---\n")
print(generated)


Session embedding vector shape: torch.Size([100])


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



--- GPT-2 Output ---

Tutoring session summary:

Today, we worked on solving quadratic equations using the quadratic formula.
The student struggled with identifying the coefficients a, b, and c from the equation format.
We practiced several examples and focused on improving accuracy with signs under the square root.


Please summarize this session and suggest a next question for the student.
Summary and question: What is the best way to solve this problem? What can we do to improve the accuracy of the solution? How do we make sure that the problem is solved correctly? If you have any questions, please feel free to contact us.
