In [7]:
!pip install torch transformers numpy



In [8]:
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

Using device: cpu


In [9]:
model_name = "gpt2" 
print("Loading Tokenizer...")
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
print("Loading Model...")
model = GPT2LMHeadModel.from_pretrained(model_name)
model = model.to(device) 
model.eval()
print("Success! GPT-2 is loaded.")

Loading Tokenizer...
Loading Model...
Success! GPT-2 is loaded.


In [10]:
# ARCHITECTURE
print(model)

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)


In [11]:
# 5. TEST GENERATION
input_text = "The quick brown fox"
inputs = tokenizer(input_text, return_tensors="pt").to(device)

print(f"Input Tokens: {inputs['input_ids']}")

# Ask the model to continue the sentence
with torch.no_grad(): # Disable gradient calculation to save memory
    outputs = model.generate(
        **inputs, 
        max_length=20, 
        num_return_sequences=1,
        pad_token_id=tokenizer.eos_token_id
    )

decoded_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print("\nOutput:", decoded_text)

Input Tokens: tensor([[  464,  2068,  7586, 21831]])

Output: The quick brown foxes are a great way to get a little bit of a kick out of your


In [13]:
# BLOCK 5: ROBUST CHAT (WITH REPETITION PENALTY)
# This version fixes the "AI is AI" loop problem.

def chat_with_model_robust():
    print("="*50)
    print("   BASELINE GPT-2 (UNCOMPRESSED)")
    print("   (Type 'exit' to stop)")
    print("="*50)

    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    # STRICTER PRIMER: Forces Q&A format
    primer = """
Q: What is the capital of France?
A: The capital of France is Paris.

Q: What is 2 + 2?
A: The answer is 4.

Q: Who wrote Hamlet?
A: William Shakespeare.

"""

    while True:
        user_input = input("YOU: ")
        if user_input.lower() in ["exit", "quit"]:
            print("Ending chat...")
            break
        
        # Format input to match the primer
        full_prompt = f"{primer}Q: {user_input}\nA:"
        
        inputs = tokenizer(full_prompt, return_tensors="pt", padding=True).to(device)
        
        with torch.no_grad():
            output_ids = model.generate(
                inputs.input_ids,
                attention_mask=inputs.attention_mask,
                max_new_tokens=50,
                do_sample=True,
                top_k=50,
                top_p=0.90,             # Stricter (was 0.95)
                temperature=0.1,        # Lower temp = Less random/crazy
                repetition_penalty=1.2, # <--- THIS FIXES THE LOOPING
                no_repeat_ngram_size=2, # <--- PREVENTS "AI is AI" phrases
                pad_token_id=tokenizer.eos_token_id,
                eos_token_id=tokenizer.encode("\n")[0]
            )

        full_response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
        
        # Extract answer
        # We split by "A:" and take the last part
        try:
            # Find the part after our specific prompt
            new_text = full_response[len(full_prompt):]
            # Stop at the next new line (if the model tries to write a new Q:)
            answer = new_text.split("\n")[0].strip()
            # If empty (sometimes happens), take the raw generation
            if not answer: 
                answer = new_text.strip()
        except:
            answer = "..."

        print(f"AI:  {answer}\n")

# Run the Fixed Chat
chat_with_model_robust()

   BASELINE GPT-2 (UNCOMPRESSED)
   (Type 'exit' to stop)


YOU:  


AI:  I am not sure if you know what a "Hammer" means, but it's probably an English word for something that was written in England and then translated into French by someone else who did so too (or maybe they were both writers). It



YOU:  What is Artificial Inteliigence ? 


AI:  It's a computer program that can read and write to any number, including numbers with no digits or letters in them (like "1" for example). This means it has an infinite amount more memory than most computers on earth! You could say this



YOU:  2+2 = ?


AI:  Yes, it's a question that has been asked many times before and I think we all know what to do with this one!The following article was originally published on May 18th 2015 at 9am by David Boulton in his blog "



YOU:  exit


Ending chat...
