In [26]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"  # or "distilgpt2"

# Load model without device_map
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16  # Still keeps 2x speed boost
).to("cuda" if torch.cuda.is_available() else "cpu")  # Manual device placement




In [None]:
# Input with STRICT instructions
input_text = """Generate exactly ONE question about the following topic. Do NOT answer it. Make it FUN and INFORMAL.
Topic: The person loves soccer, yesterday Manchester won to Athletic Club on the Europa League semifinals.
Question:"""

inputs = tokenizer(input_text, return_tensors="pt").to(model.device)

# Generate with tight constraints
outputs = model.generate(
    **inputs,
    max_new_tokens=40,  # Strict limit to prevent rambling
    do_sample=True,
    temperature=0.7,    # Balanced creativity
    top_k=40,
    top_p=0.9,
    num_return_sequences=3,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    repetition_penalty=1.2
)

# Extract JUST the question
for i, out in enumerate(outputs):
    full_text = tokenizer.decode(out, skip_special_tokens=True)
    # Isolate text AFTER "Question:" and before any new line
    question = full_text.split("Question:")[-1].split("\n")[0].split("?")[0].strip()
    print(f"Option {i+1}: {question}?")

Option 1: What was the final score?
Option 2: "What did the Manchester fan say after watching their team beat Athletic 3-1 in the second leg?
Option 3: What was the score of the match?
