In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from peft import PeftModel
import torch

In [2]:
model_path = "./phase5_lora_llama3.2-1b"

In [3]:
tokenizer = AutoTokenizer.from_pretrained(model_path)

# 3️⃣ Load base LLaMA‑3.2‑1B model (frozen weights)
base_model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-3.2-1B",
    device_map="auto",
    torch_dtype=torch.float16,
    trust_remote_code=True
)

In [4]:
model = PeftModel.from_pretrained(base_model, model_path, torch_dtype=torch.float16)

# 5️⃣ Build a text‑generation pipeline
gen = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device_map="auto",
    torch_dtype=torch.float16,
    max_new_tokens=1024,
    do_sample=True,
    top_k=50,
    temperature=0.7
)

Device set to use cuda:0
The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['AriaTextForCausalLM', 'BambaForCausalLM', 'BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'Cohere2ForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'DiffLlamaForCausalLM', 'ElectraForCausalLM', 'Emu3ForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FalconMambaForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'Gemma2ForCausalLM', 'GitForCausalLM', 'GlmForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'GraniteForCausalLM', 'GraniteMoeForCausalLM', 'Jam

In [5]:
prompt = (
    "You are a grandmaster-level chess commentator providing live analysis. "
    "Use the context below to craft your answer in a clear, engaging style.\n\n"
    "Context:\n"
    "- Castling is the only move where two pieces (king and rook) move simultaneously.\n"
    "- It improves king safety, connects rooks, and can be done kingside or queenside.\n"
    "- There are specific conditions: neither piece has moved before, no pieces between them, and the king is not in check or moves through/in to check.\n\n"
    "Question:\n"
    "Describe when and why you would choose to castle in a high‑level game. "
    "Include notation examples, typical move orders leading to castling, strategic pros and cons of kingside vs queenside castling, "
    "and common pitfalls to avoid.\n\n"
    "Answer:"
)

In [6]:
outputs = gen(prompt)
print(outputs[0]["generated_text"])

You are a grandmaster-level chess commentator providing live analysis. Use the context below to craft your answer in a clear, engaging style.

Context:
- Castling is the only move where two pieces (king and rook) move simultaneously.
- It improves king safety, connects rooks, and can be done kingside or queenside.
- There are specific conditions: neither piece has moved before, no pieces between them, and the king is not in check or moves through/in to check.

Question:
Describe when and why you would choose to castle in a high‑level game. Include notation examples, typical move orders leading to castling, strategic pros and cons of kingside vs queenside castling, and common pitfalls to avoid.

Answer: Castling is a powerful move that improves king safety, connects rooks, and can be done kingside or queenside. There are specific conditions to use castling, such as neither piece has moved before, no pieces between them, and the king is not in check or moves through/in to check. Castling

In [7]:
!pip install evaluate sacrebleu rouge-score






**PHASE-6  Implement Evaluation Frameworks**


In [26]:
qa_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device_map="auto",
    torch_dtype=torch.float16,
    max_new_tokens=100,
    do_sample=True,
    top_k=50,
    temperature=0.7
)

Device set to use cuda:0


In [58]:
dataset = [
    {
        "question": "What is the most important rule in chess?", 
        "answer": "The most important rule in chess is the King’s safety. The King is the most vulnerable piece on the board and it is important to protect him at all times. The King must always be kept safe from check and checkmate situations."
    },
    {
        "question": "How does a pawn move?",
        "answer": "A pawn can move one square forward, one square to the right, or one square to the left. However, the pawn captures diagonally forward when it moves into an enemy piece's position."
    },
    {
        "question": "What is castling in chess?",
        "answer": "Castling and en passant are two different chess moves. Castling is a special move involving the King and a Rook, where the King moves two squares towards the Rook and the Rook moves to the square next to the King."
    },
    {
        "question": "Can a pawn capture diagonally?",
        "answer": "Yes. A pawn can capture diagonally if it moves into an enemy piece's position. It can only capture one square diagonally forward."
    },
    {
        "question": "What is the difference between a knight and a bishop?",
        "answer": "A “pawn” is a pawn that can move forward one square at a time. A “rook” can move forward and backward one square at a time. A knight moves in an L-shape, while a bishop moves diagonally across the board."
    }
]


preds, refs = [], []
formatted_preds, formatted_refs = [], []

In [59]:
for idx, sample in enumerate(dataset):
    prompt = f"Q: {sample['question']}\nA:"
    output = qa_pipeline(prompt, max_new_tokens=100)[0]["generated_text"]
    generated_answer = output.split("A:")[-1].strip()
    
    preds.append(generated_answer)
    refs.append(sample["answer"])

    # Format for SQuAD
    formatted_preds.append({
        "id": str(idx),
        "prediction_text": generated_answer
    })
    formatted_refs.append({
        "id": str(idx),
        "answers": [{"text": sample["answer"], "answer_start": 0}]
    })

In [60]:
print("Predictions:", preds)
print("References:", refs)

Predictions: ['The most important rule in chess is the principle of the game, which states that the game is won by the player who has', 'The captured', 'The rules of castling in chess are as follows:\nThe king and one of the two rooks must move together.\nThe king must move first.\nThe king and the rook must be moved to the opposite side of the board.\nThe king and the rook must be moved to the side of the board where', 'If the pawn is captured by its own King, it is considered a checkmate. The King can only be captured by an opponent’s piece. For example, if a pawn is captured by its own King, it is a checkmate. The King can only be captured by an opponent’s piece.\nQ: What if a pawn is captured', 'The knight moves in a way similar to a bishop, but can jump over pieces and attack diagonalally. The knight has a longer reach than a bishop, but can’t move through squares that are occupied by other pieces. This is why it’s often better to sacrifice a pawn to get a knight or a bishop. The 

In [56]:
import evaluate
exact_match = evaluate.load("exact_match")
bleu = evaluate.load("sacrebleu")
rouge = evaluate.load("rouge")
squad = evaluate.load("squad")


In [61]:
print("\n🔍 Evaluation Results:")
print("Exact Match:", exact_match.compute(predictions=preds, references=refs))
print("SQuAD F1/EM:", squad.compute(predictions=formatted_preds, references=formatted_refs))
print("BLEU:", bleu.compute(predictions=preds, references=[[r] for r in refs]))
print("ROUGE-L:", rouge.compute(predictions=preds, references=refs, rouge_types=["rougeL"]))


🔍 Evaluation Results:
Exact Match: {'exact_match': 0.0}
SQuAD F1/EM: {'exact_match': 0.0, 'f1': 23.086365769778887}
BLEU: {'score': 5.829433392819215, 'counts': [63, 17, 7, 5], 'totals': [246, 241, 236, 232], 'precisions': [25.609756097560975, 7.053941908713693, 2.9661016949152543, 2.1551724137931036], 'bp': 1.0, 'sys_len': 246, 'ref_len': 194}
ROUGE-L: {'rougeL': 0.22410442912144926}
