# Please run this notebook on Google Collab to not have dependencies issues.

### GPT-2 Model Training

In [13]:
!pip install transformers --quiet

import os
os.environ["WANDB_DISABLED"] = "true"

from transformers import (
    GPT2Tokenizer, GPT2LMHeadModel,
    DataCollatorForLanguageModeling,
    LineByLineTextDataset,
    Trainer, TrainingArguments
)
import pandas as pd


df = pd.read_csv("/content/physics_questions_500.csv")
df.fillna("N/A", inplace=True)

formatted_data = []
for _, row in df.iterrows():
    block = f"""[TOPIC: {row['Topic']}] [GRADE: {row['grade']}] [LEVEL: {row['StudentLevel']}] [DIFFICULTY: {row['Difficulty']}] [TYPE: {row['QuestionType']}] [COMPLEXITY: {row['QuestionComplexity']}]
[PREREQUISITES: {row['Prerequisites']}] [ESTIMATED_TIME: {row['EstimatedTime']}] [SUBJECT: {row['subject']}]

Q: {row['Question']}
A: {row['Answer']}

[EXPLANATION: {row['Explanation']}]
"""
    formatted_data.append(block)

with open("formatted_physics.txt", "w", encoding="utf-8") as f:
    f.write("\n\n".join(formatted_data))

tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token
model = GPT2LMHeadModel.from_pretrained("gpt2")

dataset = LineByLineTextDataset(
    tokenizer=tokenizer,
    file_path="formatted_physics.txt",
    block_size=128
)

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer, mlm=False
)

training_args = TrainingArguments(
    output_dir="./gpt2-physics",
    overwrite_output_dir=True,
    num_train_epochs=3,
    per_device_train_batch_size=2,
    save_steps=250,
    save_total_limit=1,
    logging_steps=100,
    prediction_loss_only=True
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    data_collator=data_collator
)

trainer.train()

trainer.save_model("./gpt2-physics")
tokenizer.save_pretrained("./gpt2-physics")
print("Training complete. Model saved!")

from transformers import GPT2LMHeadModel, GPT2Tokenizer

model = GPT2LMHeadModel.from_pretrained("./gpt2-physics")
tokenizer = GPT2Tokenizer.from_pretrained("./gpt2-physics")
model.eval()

prompt = "[TOPIC: Electrostatics] [GRADE: 12] [LEVEL: Advanced] [DIFFICULTY: 7]\nQ:"
input_ids = tokenizer.encode(prompt, return_tensors='pt')

generated = model.generate(
    input_ids,
    max_length=100,
    do_sample=True,
    top_k=50,
    top_p=0.95,
    temperature=0.8,
    pad_token_id=tokenizer.eos_token_id
)

print("\nGenerated Physics Question:\n")
print(tokenizer.decode(generated[0], skip_special_tokens=True))


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Step,Training Loss
100,2.0324
200,1.5944
300,1.5213
400,1.2975
500,1.4197
600,1.3657
700,1.2007
800,1.271
900,1.2478
1000,1.057


✅ Training complete. Model saved!

📘 Generated Physics Question:

[TOPIC: Electrostatics] [GRADE: 12] [LEVEL: Advanced] [DIFFICULTY: 7]
Q: Calculate the electrostatic force between two point charges using Coulomb's Law, derived from the superposition of forces due to charges. [TYPE: Proof] [COMPLEXITY: 1.2] [COMPLEXITY: 1.1] [COMPLEXITY: 3.1] [COMPLEXITY: 2.8]


Download

In [14]:
!zip -r gpt2-physics.zip ./gpt2-physics
from google.colab import files
files.download("gpt2-physics.zip")


  adding: gpt2-physics/ (stored 0%)
  adding: gpt2-physics/model.safetensors (deflated 7%)
  adding: gpt2-physics/vocab.json (deflated 68%)
  adding: gpt2-physics/config.json (deflated 51%)
  adding: gpt2-physics/tokenizer_config.json (deflated 56%)
  adding: gpt2-physics/runs/ (stored 0%)
  adding: gpt2-physics/runs/Apr12_09-28-08_e45f0af0f25d/ (stored 0%)
  adding: gpt2-physics/runs/Apr12_09-28-08_e45f0af0f25d/events.out.tfevents.1744450089.e45f0af0f25d.240.1 (deflated 64%)
  adding: gpt2-physics/runs/Apr12_09-23-03_e45f0af0f25d/ (stored 0%)
  adding: gpt2-physics/runs/Apr12_09-23-03_e45f0af0f25d/events.out.tfevents.1744449787.e45f0af0f25d.240.0 (deflated 61%)
  adding: gpt2-physics/generation_config.json (deflated 24%)
  adding: gpt2-physics/checkpoint-3750/ (stored 0%)
  adding: gpt2-physics/checkpoint-3750/model.safetensors (deflated 7%)
  adding: gpt2-physics/checkpoint-3750/vocab.json (deflated 68%)
  adding: gpt2-physics/checkpoint-3750/config.json (deflated 51%)
  adding: gpt2

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Evaluation

### Evaluation of Gemini and GPT-2

#### GPt-2 Generation and Evaluation

In [15]:
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel

model = GPT2LMHeadModel.from_pretrained("./gpt2-physics")
tokenizer = GPT2Tokenizer.from_pretrained("./gpt2-physics")
model.eval()

def get_loss_and_perplexity(text):
    inputs = tokenizer(text, return_tensors='pt')
    input_ids = inputs["input_ids"]

    with torch.no_grad():
        outputs = model(input_ids=input_ids, labels=input_ids)
        loss = outputs.loss
        total_log_likelihood = -loss.item() * input_ids.shape[1]
        perplexity = torch.exp(loss)

    print(f"\n Input Text: {text}")
    print(f"🔹 Token Count: {input_ids.shape[1]}")
    print(f"🔹 Cross-Entropy Loss: {loss.item():.4f}")
    print(f"🔹 Total Log-Likelihood: {total_log_likelihood:.4f}")
    print(f"🔹 Perplexity: {perplexity.item():.4f}")

get_loss_and_perplexity("Q: What is the unit of electric charge? A: Coulomb.")



📌 Input Text: Q: What is the unit of electric charge? A: Coulomb.
🔹 Token Count: 15
🔹 Cross-Entropy Loss: 2.0931
🔹 Total Log-Likelihood: -31.3962
🔹 Perplexity: 8.1099


In [16]:
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel

# Load fine-tuned model and tokenizer
model = GPT2LMHeadModel.from_pretrained("./gpt2-physics")
tokenizer = GPT2Tokenizer.from_pretrained("./gpt2-physics")
model.eval()

# Step 1: Generation prompt
prompt = "[TOPIC: Electrostatics] [GRADE: 12] [LEVEL: Intermediate] [DIFFICULTY: 5]\nQ:"
input_ids = tokenizer.encode(prompt, return_tensors='pt')

# Step 2: Generate continuation
output_ids = model.generate(
    input_ids,
    max_length=100,
    do_sample=True,
    temperature=0.9,
    top_k=50,
    top_p=0.95,
    pad_token_id=tokenizer.eos_token_id
)

generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
print("📝 Generated Question:\n", generated_text)

# Step 3: Evaluate loss, log-likelihood, and perplexity of generated output
def evaluate_generated_text(text):
    inputs = tokenizer(text, return_tensors='pt')
    input_ids = inputs["input_ids"]

    with torch.no_grad():
        outputs = model(input_ids=input_ids, labels=input_ids)
        loss = outputs.loss
        total_log_likelihood = -loss.item() * input_ids.shape[1]
        perplexity = torch.exp(loss)

    print("\n📊 Evaluation Metrics:")
    print(f"🔹 Token Count: {input_ids.shape[1]}")
    print(f"🔹 Cross-Entropy Loss: {loss.item():.4f}")
    print(f"🔹 Total Log-Likelihood: {total_log_likelihood:.4f}")
    print(f"🔹 Perplexity: {perplexity.item():.4f}")

# Step 4: Evaluate
evaluate_generated_text(generated_text)


📝 Generated Question:
 [TOPIC: Electrostatics] [GRADE: 12] [LEVEL: Intermediate] [DIFFICULTY: 5]
Q: What is the electric field at a point P due to two charges q1 and q2? [PERC: N/A] [SUBJECT: Physics] [TYPE: General] [COMPLEXITY: 2.6] [COMPLEXITY: 2.1] [COMPLEXITY: 1.4] [COMPLE

📊 Evaluation Metrics:
🔹 Token Count: 100
🔹 Cross-Entropy Loss: 0.9971
🔹 Total Log-Likelihood: -99.7149
🔹 Perplexity: 2.7105


#### GPT-2 Evaluation of Gemini Created Question

In [None]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch

model = GPT2LMHeadModel.from_pretrained("gpt2")
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model.eval()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

def compute_log_likelihood(text):
    inputs = tokenizer(text, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model(**inputs, labels=inputs["input_ids"])
        loss = outputs.loss
        log_likelihood = -loss.item() * inputs["input_ids"].shape[1]
        avg_log_likelihood = log_likelihood / inputs["input_ids"].shape[1]
    return {
        "log_likelihood": log_likelihood,
        "avg_log_likelihood": avg_log_likelihood,
        "perplexity": torch.exp(loss).item()
    }

gemini_mcq = """
Question:  A gas is compressed adiabatically from a volume of 10 L to 5 L. The initial temperature is 300 K. If the adiabatic exponent (γ) of the gas is 1.4, what is the final temperature of the gas?
A. 390 K
B. 424 K
C. 458 K
D. 492 K
Answer: B. 424 K
"""

result = compute_log_likelihood(gemini_mcq)
print("GPT-2 Evaluation of Gemini Output:")
print(f"Log-Likelihood: {result['log_likelihood']:.4f}")
print(f"Average Log-Likelihood per Token: {result['avg_log_likelihood']:.4f}")
print(f"Perplexity: {result['perplexity']:.2f}")
