In [None]:
import json
import random

qa_categories = {
    "math": [
        ("What is {a} + {b}?", lambda a, b: str(a + b)),
        ("What is {a} - {b}?", lambda a, b: str(a - b)),
        ("What is {a} * {b}?", lambda a, b: str(a * b)),
        ("What is the square of {a}?", lambda a: str(a ** 2)),
        ("What is the cube of {a}?", lambda a: str(a ** 3)),
    ],
    "gk": [
        ("What is the capital of India?", "New Delhi"),
        ("Who wrote Hamlet?", "William Shakespeare"),
        ("What is the currency of Japan?", "Yen"),
        ("Who discovered gravity?", "Isaac Newton"),
        ("What is the largest planet?", "Jupiter"),
    ],
    "science": [
        ("What gas do plants absorb?", "Carbon dioxide"),
        ("What is H2O commonly known as?", "Water"),
        ("What is the boiling point of water?", "100¬∞C"),
        ("Who invented the light bulb?", "Thomas Edison"),
        ("What organ pumps blood in the human body?", "Heart"),
    ],
    "grammar": [
        ("Is 'quickly' a noun or adverb?", "Adverb"),
        ("Give a synonym for 'smart'.", "Intelligent"),
        ("What is the opposite of 'happy'?", "Sad"),
        ("Is 'run' a verb in 'I run daily'?", "Yes"),
        ("What is the past tense of 'eat'?", "Ate"),
    ],
    "logic": [
        ("What comes after 2, 4, 6, 8?", "10"),
        ("Is every even number divisible by 2?", "Yes"),
        ("If A > B and B > C, is A > C?", "Yes"),
        ("If you have 4 apples and give away 2, how many are left?", "2"),
        ("Is 121 a square number?", "Yes")
    ]
}

qa_dataset = []
for _ in range(8000):  # Use 8000 entries
    category = random.choice(list(qa_categories.keys()))
    entry = random.choice(qa_categories[category])
    
    if callable(entry[1]):
        if "{a}" in entry[0] and "{b}" in entry[0]:
            a, b = random.randint(1, 50), random.randint(1, 50)
            question = entry[0].format(a=a, b=b)
            answer = entry[1](a, b)
        else:
            a = random.randint(1, 20)
            question = entry[0].format(a=a)
            answer = entry[1](a)
    else:
        question, answer = entry

    qa_dataset.append({"text": f"Q: {question}\nA: {answer}"})

# Save the dataset
with open("qa_decoder_dataset.json", "w", encoding="utf-8") as f:
    for item in qa_dataset:
        json.dump(item, f)
        f.write("\n")

print("Decoder-friendly dataset with 8000 entries saved as 'qa_decoder_dataset.json'")


In [None]:
import json

# Load the dataset
data = []
with open("qa_decoder_dataset.json", "r", encoding="utf-8") as f:
    for line in f:
        item = json.loads(line.strip())
        data.append(item["text"])

print(f"Loaded {len(data)} examples.")
print("Sample:", data[0])


In [None]:
import torch
import torch.nn as nn

class GPTStyleTransformer(nn.Module):
    def __init__(self, vocab_size, embed_dim=256, num_heads=4, num_layers=4, dropout=0.1, max_length=128):
        super(GPTStyleTransformer, self).__init__()
        self.token_embedding = nn.Embedding(vocab_size, embed_dim)
        self.pos_embedding = nn.Embedding(max_length, embed_dim)

        encoder_layer = nn.TransformerEncoderLayer(d_model=embed_dim, nhead=num_heads, dropout=dropout)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)

        self.output_layer = nn.Linear(embed_dim, vocab_size)
        self.max_length = max_length

    def forward(self, input_ids):
        device = input_ids.device
        seq_len = input_ids.size(1)

        positions = torch.arange(0, seq_len, device=device).unsqueeze(0)
        x = self.token_embedding(input_ids) + self.pos_embedding(positions)

        # Generate mask with correct shape based on input length
        causal_mask = nn.Transformer.generate_square_subsequent_mask(seq_len).to(device)

        x = self.transformer(x.transpose(0, 1), mask=causal_mask)  # shape: [seq_len, batch, embed]
        x = x.transpose(0, 1)  # back to [batch, seq_len, embed]

        return self.output_layer(x)


In [None]:
from transformers import AutoTokenizer
from torch.utils.data import Dataset, DataLoader

# Load a tokenizer (distilgpt2 is a small GPT-compatible tokenizer)
tokenizer = AutoTokenizer.from_pretrained("distilgpt2")
tokenizer.pad_token = tokenizer.eos_token  # Required for padding support

class QADecoderDataset(Dataset):
    def __init__(self, texts, tokenizer, max_length=128):
        self.tokenizer = tokenizer
        self.max_length = max_length
        self.examples = []

        for text in texts:
            encoding = tokenizer(
                text,
                max_length=max_length,
                truncation=True,
                padding="max_length",
                return_tensors="pt"
            )
            input_ids = encoding["input_ids"].squeeze()
            attention_mask = encoding["attention_mask"].squeeze()
            self.examples.append((input_ids, attention_mask))

    def __len__(self):
        return len(self.examples)

    def __getitem__(self, idx):
        input_ids, attention_mask = self.examples[idx]
        labels = input_ids.clone()
        return input_ids, attention_mask, labels

# Prepare dataset and dataloader
dataset = QADecoderDataset(data, tokenizer)
loader = DataLoader(dataset, batch_size=8, shuffle=True)

print("Tokenizer and dataset ready.")


In [None]:
import torch
import torch.nn as nn
from tqdm import tqdm

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
vocab_size = tokenizer.vocab_size

# Initialize model and optimizer
model = GPTStyleTransformer(vocab_size).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=5e-4)
criterion = nn.CrossEntropyLoss(ignore_index=tokenizer.pad_token_id)

# Training loop
EPOCHS = 9

model.train()
for epoch in range(EPOCHS):
    total_loss = 0
    print(f"\nEpoch {epoch + 1}/{EPOCHS}")
    
    for input_ids, attention_mask, labels in tqdm(loader, desc="Training"):
        input_ids = input_ids.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(input_ids)

        loss = criterion(outputs.view(-1, vocab_size), labels.view(-1))
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    avg_loss = total_loss / len(loader)
    print(f"Average Loss: {avg_loss:.4f}")


In [None]:
import torch
from torch.nn import functional as F

def generate_response(prompt, max_length=50, top_k=10):
    model.eval()
    input_text = prompt
    inputs = tokenizer(input_text, return_tensors="pt").to(device)
    input_ids = inputs["input_ids"]

    generated = input_ids.clone()

    for _ in range(max_length):
        with torch.no_grad():
            outputs = model(generated)
            next_token_logits = outputs[:, -1, :]

            probs = F.softmax(next_token_logits, dim=-1)
            top_k_probs, top_k_indices = torch.topk(probs, k=top_k, dim=-1)
            next_token = top_k_indices[0, torch.multinomial(top_k_probs[0], 1)].unsqueeze(0)

        generated = torch.cat((generated, next_token), dim=1)

        if next_token.item() == tokenizer.eos_token_id:
            break

    response = tokenizer.decode(generated[0][input_ids.shape[1]:], skip_special_tokens=True)
    return response.strip()


In [None]:
print("Response:", generate_response("Q: What is 7 + 5?\nA:"))



In [None]:
from transformers import GPT2LMHeadModel, AutoTokenizer

print("Loading model...")

tokenizer = AutoTokenizer.from_pretrained("distilgpt2")
tokenizer.pad_token = tokenizer.eos_token  # Use EOS token for padding

model = GPT2LMHeadModel.from_pretrained("distilgpt2")
model.resize_token_embeddings(len(tokenizer))
model.to(device)

print("Model and tokenizer loaded successfully.")



In [None]:
import json

# Load your cleaned Q/A dataset from before
with open("qa_decoder_dataset.json", "r", encoding="utf-8") as f:
    lines = [json.loads(line)["text"] for line in f]

# Join each Q/A into a single large training string
dataset_texts = "\n\n".join(lines)
print("Loaded and formatted dataset.")



In [None]:
class GPT2QADataset(Dataset):
    def __init__(self, text, tokenizer, max_length=1024):
        self.tokenizer = tokenizer
        self.inputs = []

        # Tokenize the whole dataset and flatten it
        tokens = tokenizer(text, return_tensors="pt", truncation=False)["input_ids"][0]

        # Split into 1024-token chunks
        for i in range(0, len(tokens) - max_length, max_length):
            chunk = tokens[i:i+max_length]
            self.inputs.append(chunk)

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        input_ids = self.inputs[idx]
        return input_ids, input_ids  # (input, label) pair


In [None]:
train_dataset = GPT2QADataset(dataset_texts, tokenizer)
train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True)

print("Dataset ready with", len(train_dataset), "samples.")


In [None]:
class GPT2QADataset(Dataset):
    def __init__(self, text, tokenizer, max_length=1024):
        self.tokenizer = tokenizer
        self.inputs = []

        # Tokenize the whole dataset and flatten it
        tokens = tokenizer(text, return_tensors="pt", truncation=False)["input_ids"][0]

        # Split into 1024-token chunks
        for i in range(0, len(tokens) - max_length, max_length):
            chunk = tokens[i:i+max_length]
            self.inputs.append(chunk)

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        input_ids = self.inputs[idx]
        return input_ids, input_ids  # input = label


In [None]:
train_dataset = GPT2QADataset(dataset_texts, tokenizer)
train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True)

print("Dataset ready with", len(train_dataset), "samples.")


In [122]:
from torch.optim import AdamW
import torch.nn as nn

optimizer = AdamW(model.parameters(), lr=5e-5)
criterion = nn.CrossEntropyLoss(ignore_index=tokenizer.pad_token_id)

model.train()
print("Starting quick test loop...")

for step, (input_ids, labels) in enumerate(train_loader):
    if step > 3:  # Just 4 steps to test training
        break

    input_ids = input_ids.to(device)
    labels = labels.to(device)

    optimizer.zero_grad()
    outputs = model(input_ids, labels=labels)
    loss = outputs.loss

    print(f"Step {step} | Loss: {loss.item():.4f}")
    loss.backward()
    optimizer.step()


Starting quick test loop...
Step 0 | Loss: 0.4381
Step 1 | Loss: 0.4323
Step 2 | Loss: 0.4551
Step 3 | Loss: 0.3939


In [None]:
subset_loader = DataLoader(train_dataset[:20], batch_size=2, shuffle=True)
print("Subset DataLoader ready with 20 samples.")


In [None]:
from torch.utils.data import Subset

subset = Subset(train_dataset, list(range(20)))  # first 20 examples
subset_loader = DataLoader(subset, batch_size=2, shuffle=True)


In [None]:
model.train()
EPOCHS = 2

for epoch in range(EPOCHS):
    total_loss = 0
    print(f"\nEpoch {epoch + 1}/{EPOCHS}")

    for input_ids, labels in subset_loader:
        input_ids = input_ids.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(input_ids, labels=labels)
        loss = outputs.loss

        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    avg_loss = total_loss / len(subset_loader)
    print(f"Average Loss: {avg_loss:.4f}")
    


In [None]:
def generate_response(prompt, max_new_tokens=50):
    model.eval()
    inputs = tokenizer(prompt, return_tensors="pt", padding=True).to(device)

    with torch.no_grad():
        outputs = model.generate(
            input_ids=inputs["input_ids"],
            attention_mask=inputs["attention_mask"],  # <-- Fix added here
            max_new_tokens=max_new_tokens,
            pad_token_id=tokenizer.eos_token_id,
            do_sample=True,
            top_k=50,
            top_p=0.95
        )

    return tokenizer.decode(outputs[0], skip_special_tokens=True).strip()


In [112]:

print(generate_response("Q: Who wrote Hamlet?\nA:"))
print(generate_response("Q: What is the capital of India?\nA:"))
print(generate_response("Q: What is the boiling point of water?\nA:"))
print(generate_response("Q: Give a synonym for 'smart'.\nA:"))


Q: Who wrote Hamlet?
A: William Shakespeare
Q: What is the capital of India?
A: New Delhi
Q: What is the capital of India?
A: New Delhi
Q: Who wrote Hamlet?
A: William Shakespeare

Q: What
Q: What is the capital of India?
A: New Delhi

Q: What is the square of 2?
A: 64

Q: Is 'run' a verb in 'I run daily'?
A: Yes

Q: What is the past tense of 'eat'?
Q: What is the boiling point of water?
A: 100¬∞C
Q: What is the boiling point of water?
A: 100¬∞C
Q: What is the largest planet?
A: Jupiter
Q: Is 'run' a verb in 'I run daily'?
A
Q: Give a synonym for 'smart'.
A: Intelligent
Q: What is the cube of 9?
A: 256

Q: What is the capital of India?
A: New Delhi

Q: What is the largest planet?
A: Jupiter

Q: What


In [120]:
import os

# List all files and folders in the current working directory
print("Current Directory:", os.getcwd())
print("Contents:")
print(os.listdir())


Current Directory: C:\Users\ferna
Contents:
['-1.14-windows.xml', '.anaconda', '.cache', '.conda', '.condarc', '.continuum', '.idlerc', '.ipynb_checkpoints', '.ipython', '.jupyter', '.Ld9VirtualBox', '.node_repl_history', '.openjfx', '.vscode', '3D Objects', 'anaconda3', 'anaconda_projects', 'ansel', 'AppData', 'Application Data', 'bluej', 'BrawlhallaReplays', 'ceaser.py', 'cleaned_dataset.json', 'Contacts', 'Cookies', 'diffie hellman.py', 'Documents', 'Downloads', 'Favorites', 'from Crypto.py', 'generated_dataset.json', 'IntelGraphicsProfiles', 'Links', 'llama2_test.py', 'llm project 2.ipynb', 'llm project.ipynb', 'llm-env', 'Local Settings', 'Music', 'My Documents', 'NetHood', 'NTUSER.DAT', 'ntuser.dat.LOG1', 'ntuser.dat.LOG2', 'NTUSER.DAT{f591b2a5-fa68-11ef-a828-b301f62e9121}.TM.blf', 'NTUSER.DAT{f591b2a5-fa68-11ef-a828-b301f62e9121}.TMContainer00000000000000000001.regtrans-ms', 'NTUSER.DAT{f591b2a5-fa68-11ef-a828-b301f62e9121}.TMContainer00000000000000000002.regtrans-ms', 'ntuser.i

In [124]:
# Save the model to 'output/' directory
model.save_pretrained("output")

# Save the tokenizer to the same folder
tokenizer.save_pretrained("output")


('output\\tokenizer_config.json',
 'output\\special_tokens_map.json',
 'output\\vocab.json',
 'output\\merges.txt',
 'output\\added_tokens.json',
 'output\\tokenizer.json')

In [126]:
import os
print(os.listdir("output"))


['config.json', 'generation_config.json', 'merges.txt', 'model.safetensors', 'special_tokens_map.json', 'tokenizer.json', 'tokenizer_config.json', 'vocab.json']


In [132]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch

# Load model and tokenizer from local fine-tuned folder
model = GPT2LMHeadModel.from_pretrained("output", local_files_only=True)
tokenizer = GPT2Tokenizer.from_pretrained("output", local_files_only=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()


easy_prompts = [
    "Q: What is 2 + 2?\nA:",
    "Q: What is the capital of India?\nA:",
    "Q: What is 5 - 3?\nA:",
    "Q: What is the opposite of hot?\nA:",
    "Q: Who wrote Hamlet?\nA:",
    "Q: What is the boiling point of water?\nA:",
    "Q: What comes after 3, 4, 5?\nA:",
    "Q: What color is the sky?\nA:",
    "Q: How many days are there in a week?\nA:",
    "Q: What is the capital of Japan?\nA:",
    "Q: What is 10 √∑ 2?\nA:",
    "Q: What is H2O commonly known as?\nA:",
    "Q: What is the plural of cat?\nA:",
    "Q: How many wheels does a car have?\nA:",
    "Q: What is 6 √ó 3?\nA:",
    "Q: What is the capital of France?\nA:",
    "Q: What is the past tense of eat?\nA:",
    "Q: What is the opposite of big?\nA:",
    "Q: How many legs does a spider have?\nA:",
    "Q: What is the color of a banana?\nA:"
]


def generate_response(prompt, max_length=50):
    input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)
    with torch.no_grad():
        output = model.generate(
            input_ids,
            max_length=max_length,
            num_return_sequences=1,
            pad_token_id=tokenizer.eos_token_id,
            do_sample=True,
            top_k=50
        )
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    return response[len(prompt):].strip()

print("----- Easy Prompt Responses -----\n")
for i, prompt in enumerate(easy_prompts, 1):
    response = generate_response(prompt)
    print(f"{i}. {prompt.strip()}\n   Response: {response}\n")


----- Easy Prompt Responses -----

1. Q: What is 2 + 2?
A:
   Response: A: 2, 4

Q: What is the past tense of 'eat'?
A: Ate
Q: Is 'run' a verb in 'I run daily'?

2. Q: What is the capital of India?
A:
   Response: New Delhi

Q: What is the capital of India?
A: New Delhi

Q: If A > B and B > C, is A > C?
A

3. Q: What is 5 - 3?
A:
   Response: 4
Q: Who invented the light bulb?
A: William Shakespeare
Q: What is H2O commonly known as?
A: Water
Q: Who discovered gravity?

4. Q: What is the opposite of hot?
A:
   Response: Cool

Q: Is 'quickly' a noun or adverb?
A: Adverb

Q: If A > B and B > C, is A > C

5. Q: Who wrote Hamlet?
A:
   Response: William Shakespeare

Q: What is the currency of Japan?
A: Yen

Q: What is the square of 6?
A: 944
Q: Give a synonym

6. Q: What is the boiling point of water?
A:
   Response: 100¬∞C
Q: Is 'quickly' the currency of Japan?
A: Yen
Q: What is 34 * 2?
A: 44
Q: If

7. Q: What comes after 3, 4, 5?
A:
   Response: 10 + 19

Q: Who discovered gravity?
A: Isaac

In [134]:

expected_answers = [
    "4",
    "New Delhi",
    "2",
    "cold",
    "William Shakespeare",
    "100¬∞C",
    "6",
    "blue",
    "7",
    "Tokyo",
    "5",
    "water",
    "cats",
    "4",
    "18",
    "Paris",
    "ate",
    "small",
    "8",
    "yellow"
]


easy_prompts = [
    "Q: What is 2 + 2?\nA:",
    "Q: What is the capital of India?\nA:",
    "Q: What is 5 - 3?\nA:",
    "Q: What is the opposite of hot?\nA:",
    "Q: Who wrote Hamlet?\nA:",
    "Q: What is the boiling point of water?\nA:",
    "Q: What comes after 3, 4, 5?\nA:",
    "Q: What color is the sky?\nA:",
    "Q: How many days are there in a week?\nA:",
    "Q: What is the capital of Japan?\nA:",
    "Q: What is 10 √∑ 2?\nA:",
    "Q: What is H2O commonly known as?\nA:",
    "Q: What is the plural of cat?\nA:",
    "Q: How many wheels does a car have?\nA:",
    "Q: What is 6 √ó 3?\nA:",
    "Q: What is the capital of France?\nA:",
    "Q: What is the past tense of eat?\nA:",
    "Q: What is the opposite of big?\nA:",
    "Q: How many legs does a spider have?\nA:",
    "Q: What is the color of a banana?\nA:"
]

correct = 0

for i, prompt in enumerate(easy_prompts):
    response = generate_response(prompt).lower().strip()
    expected = expected_answers[i].lower().strip()

    if expected in response:
        correct += 1
    print(f"{i+1}. " if expected in response else f"{i+1}. ‚ùå", f"Expected: {expected} | Got: {response}")


accuracy = correct / len(expected_answers) * 100
print(f"\nOverall Accuracy: {accuracy:.2f}%")


1. ‚ùå Expected: 4 | Got: a + 2
q: who discovered gravity?
a: isaac newton

q: what is the cube of light?
a: 1028

q: is 121 a square
2.  Expected: new delhi | Got: new delhi
q: is 'run' a verb in 'i run daily'?
a: yes
q: what is the boiling point of water?
a: 100¬∞c
3.  Expected: 2 | Got: 5
q: what is the square of 12?
a: 9
q: what is the square of 5?
a: 622
q: what is 8 * 21?
4. ‚ùå Expected: cold | Got: water

q: what is the opposite of hot?
a: water

q: what is the cube of 10?
a: 361

q: what is the
5.  Expected: william shakespeare | Got: william shakespeare
q: is every even number divisible by 2?
a: yes
q: who wrote hamlet?
a: william shakespeare
q: if a > b and b
6.  Expected: 100¬∞c | Got: 100¬∞c

q: what is the square of 9?
a: 2

q: what is the boiling point of water?
a: 100¬∞c
7.  Expected: 6 | Got: 10
q: is every even number divisible by 2?
a: yes
q: what comes after 2, 4, 6?
a: 10

q
8. ‚ùå Expected: blue | Got: orange

q: what is the past tense of 'eat'?
a: ate

q: what is

In [136]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch

# Load the model and tokenizer
model = GPT2LMHeadModel.from_pretrained("output", local_files_only=True)
tokenizer = GPT2Tokenizer.from_pretrained("output", local_files_only=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

# Updated prompt generator (Greedy decoding, normalized, shorter length)
def generate_response(prompt, max_length=30):
    # Normalize format
    prompt = prompt.strip()
    if not prompt.startswith("Q:"):
        prompt = f"Q: {prompt}"
    if not prompt.endswith("A:"):
        prompt += "\nA:"
    
    input_ids = tokenizer.encode(prompt, return_tensors="pt", truncation=True, max_length=128).to(device)
    
    with torch.no_grad():
        output = model.generate(
            input_ids,
            max_length=input_ids.shape[1] + max_length,
            do_sample=False,  # greedy decoding
            pad_token_id=tokenizer.eos_token_id
        )
    
    decoded = tokenizer.decode(output[0], skip_special_tokens=True)
    return decoded[len(prompt):].strip()


In [140]:
# Define simple test prompts and expected answers
test_prompts = [
    "What is the capital of India?",
    "What is the capital of Japan?",
    "What is the capital of France?",
    "What is H2O commonly known as?",
    "Who wrote Hamlet?",
    "What is the opposite of happy?",
    "What is the boiling point of water?",
    "What is the past tense of eat?",
    "Give a synonym for smart.",
    "What is the color of the sky?",
]

expected_answers = [
    "New Delhi",
    "Tokyo",
    "Paris",
    "Water",
    "William Shakespeare",
    "Sad",
    "100¬∞C",
    "Ate",
    "Intelligent",
    "Blue"
]

# Evaluation loop
correct = 0

print("----- Improved Model Accuracy Test -----\n")
for i, prompt in enumerate(test_prompts):
    response = generate_response(prompt).lower().strip()
    expected = expected_answers[i].lower().strip()
    
    # Soft match: expected keyword appears in model output
    is_correct = expected in response
    print(f"{i+1}. {'yes' if is_correct else 'no'} Prompt: {prompt}\n   Expected: {expected} | Response: {response}\n")
    
    if is_correct:
        correct += 1

accuracy = correct / len(expected_answers) * 100
print(f"\nFinal Accuracy: {accuracy:.2f}%")


----- Improved Model Accuracy Test -----

1. yes Prompt: What is the capital of India?
   Expected: new delhi | Response: new delhi
q: what is the capital of india?
a: new delhi
q: what is the capital of india?
a:

2. no Prompt: What is the capital of Japan?
   Expected: tokyo | Response: new delhi
q: what is the capital of india?
a: new delhi
q: what is the capital of india?
a:

3. no Prompt: What is the capital of France?
   Expected: paris | Response: new delhi
q: what is the capital of india?
a: new delhi
q: what is the capital of india?
a:

4. yes Prompt: What is H2O commonly known as?
   Expected: water | Response: water
q: what is the boiling point of water?
a: 100¬∞c

q: what is the boiling point of water?

5. yes Prompt: Who wrote Hamlet?
   Expected: william shakespeare | Response: william shakespeare
q: what is the capital of india?
a: new delhi
q: what is the capital of india?
a:

6. yes Prompt: What is the opposite of happy?
   Expected: sad | Response: sad
q: what is the 

In [146]:
def generate_response_instruction(prompt, max_length=30):
    # Match the 'Instruction/Response' format
    prompt = prompt.strip()
    if not prompt.startswith("Instruction:"):
        prompt = f"Instruction: {prompt}"
    if "Response:" not in prompt:
        prompt += "\nResponse:"
    
    input_ids = tokenizer.encode(prompt, return_tensors="pt", truncation=True, max_length=128).to(device)
    
    with torch.no_grad():
        output = model.generate(
            input_ids,
            max_length=input_ids.shape[1] + max_length,
            do_sample=False,  # Greedy decoding
            pad_token_id=tokenizer.eos_token_id
        )
    
    decoded = tokenizer.decode(output[0], skip_special_tokens=True)
    return decoded.split("Response:")[-1].strip()

# Prompts
instruction_prompts = [
    "What is the capital of India?",
    "What is the capital of Japan?",
    "What is the capital of France?",
    "What is H2O commonly known as?",
    "Who wrote Hamlet?",
    "What is the opposite of happy?",
    "What is the boiling point of water?",
    "What is the past tense of eat?",
    "Give a synonym for smart.",
    "What is the color of the sky?"
]

expected_answers = [
    "New Delhi",
    "Tokyo",
    "Paris",
    "Water",
    "William Shakespeare",
    "Sad",
    "100¬∞C",
    "Ate",
    "Intelligent",
    "Blue"
]

# Accuracy Test
correct = 0
print("----- Instruction/Response Format Accuracy Test -----\n")
for i, prompt in enumerate(instruction_prompts):
    response = generate_response_instruction(prompt).lower().strip()
    expected = expected_answers[i].lower().strip()
    is_correct = expected in response
    print(f"{i+1}. {'yes' if is_correct else 'no'} Prompt: {prompt}\n   Expected: {expected} | Response: {response}\n")
    if is_correct:
        correct += 1

accuracy = correct / len(expected_answers) * 100
print(f"\nFinal Accuracy (Instruction/Response): {accuracy:.2f}%")


----- Instruction/Response Format Accuracy Test -----

1. yes Prompt: What is the capital of India?
   Expected: new delhi | Response: yes

q: what is the capital of india?
answer: new delhi

q: what is the capital of india?
answer

2. no Prompt: What is the capital of Japan?
   Expected: tokyo | Response: yes

q: what is the capital of india?
answer: new delhi

q: what is the capital of india?
answer

3. no Prompt: What is the capital of France?
   Expected: paris | Response: yes

q: what is the capital of india?
answer: new delhi

q: what is the capital of india?
answer

4. no Prompt: What is H2O commonly known as?
   Expected: water | Response: yes

q: what is the currency of japan?
a: yen

q: what is the currency of japan?
a:

5. no Prompt: Who wrote Hamlet?
   Expected: william shakespeare | Response: yes

q: what is the cube of 12?
a: 1728

q: what is the cube of 12?
a

6. yes Prompt: What is the opposite of happy?
   Expected: sad | Response: yes

q: what is the opposite of sad?

In [149]:
def generate_response(prompt, max_length=30):
    # Ensure Q/A format is consistent
    prompt = prompt.strip()
    if not prompt.startswith("Q:"):
        prompt = f"Q: {prompt}"
    if not prompt.endswith("A:"):
        prompt += "\nA:"

    input_ids = tokenizer.encode(prompt, return_tensors="pt", truncation=True, max_length=128).to(device)
    
    with torch.no_grad():
        output = model.generate(
            input_ids,
            max_length=input_ids.shape[1] + max_length,
            do_sample=False,  # Greedy decoding
            pad_token_id=tokenizer.eos_token_id
        )
    
    decoded = tokenizer.decode(output[0], skip_special_tokens=True)
    return decoded[len(prompt):].strip()

# Your best-performing prompt list
test_prompts = [
    "What is the capital of India?",
    "What is the capital of Japan?",
    "What is the capital of France?",
    "What is H2O commonly known as?",
    "Who wrote Hamlet?",
    "What is the opposite of happy?",
    "What is the boiling point of water?",
    "What is the past tense of eat?",
    "Give a synonym for smart.",
    "What is the color of the sky?"
]

expected_answers = [
    "New Delhi",
    "Tokyo",
    "Paris",
    "Water",
    "William Shakespeare",
    "Sad",
    "100¬∞C",
    "Ate",
    "Intelligent",
    "Blue"
]

# Evaluation
correct = 0
print("----- Reverting to Q/A Format (Best Accuracy) -----\n")
for i, prompt in enumerate(test_prompts):
    response = generate_response(prompt).lower().strip()
    expected = expected_answers[i].lower().strip()
    is_correct = expected in response
    print(f"{i+1}. {'yes' if is_correct else 'no'} Prompt: {prompt}\n   Expected: {expected} | Response: {response}\n")
    if is_correct:
        correct += 1

accuracy = correct / len(expected_answers) * 100
print(f"\nüìä Final Accuracy (Q/A format): {accuracy:.2f}%")


----- Reverting to Q/A Format (Best Accuracy) -----

1. yes Prompt: What is the capital of India?
   Expected: new delhi | Response: new delhi
q: what is the capital of india?
a: new delhi
q: what is the capital of india?
a:

2. no Prompt: What is the capital of Japan?
   Expected: tokyo | Response: new delhi
q: what is the capital of india?
a: new delhi
q: what is the capital of india?
a:

3. no Prompt: What is the capital of France?
   Expected: paris | Response: new delhi
q: what is the capital of india?
a: new delhi
q: what is the capital of india?
a:

4. yes Prompt: What is H2O commonly known as?
   Expected: water | Response: water
q: what is the boiling point of water?
a: 100¬∞c

q: what is the boiling point of water?

5. yes Prompt: Who wrote Hamlet?
   Expected: william shakespeare | Response: william shakespeare
q: what is the capital of india?
a: new delhi
q: what is the capital of india?
a:

6. yes Prompt: What is the opposite of happy?
   Expected: sad | Response: sad
q: w