In [1]:
import pandas as pd
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import re
import os

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Loading test data
df = pd.read_csv("../data/test_dataset.csv")

In [None]:
# Fetching Model
model_name = "mistralai/Mistral-7B-Instruct-v0.2"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    dtype=torch.float16,
    device_map="auto"
)

Loading checkpoint shards: 100%|██████████| 3/3 [00:02<00:00,  1.49it/s]


In [None]:
def parse_ruling(text):
    """
    Parses the model-generated ruling into a structured decision and explanation.

    Parameters
    ----------
    text : str
        The raw ruling text produced by the model.

    Returns
    -------
    tuple[str, str]
        A pair consisting of:
        - decision : {"YES", "NO", "UNKNOWN"}
            The extracted decision label.
        - explanation : str
            The explanation text following the last "Explanation:" marker.
    """
    
    # Normalize whitespace
    cleaned = " ".join(text.split())

    # Extracts decision (YES/NO)
    decision_match = re.search(r"decision\s*[:\-]\s*(yes|no)", cleaned, re.IGNORECASE)
    decision = decision_match.group(1).upper() if decision_match else "UNKNOWN"

    # Finding all occurrences of "Explanation:"
    explanation_positions = [m.start() for m in re.finditer(r"explanation\s*[:\-]", cleaned, re.IGNORECASE)]

    if not explanation_positions:
        return decision, ""

    # Use only the last Explanation:
    last_pos = explanation_positions[-1]

    # Extract everything after the last Explanation:
    explanation = cleaned[last_pos:]
    explanation = re.sub(r"explanation\s*[:\-]\s*", "", explanation, flags=re.IGNORECASE).strip()

    return decision, explanation

In [None]:

def ask_baseline(game_state, query):
    """
    Queries the language model for a Yu-Gi-Oh! ruling and returns a
    structured interpretation of its output.

    Parameters
    ----------
    game_state : str
        A description of the current Yu-Gi-Oh! game state provided to
        the model.
    query : str
        The ruling question to be answered by the model.

    Returns
    -------
    dict
        A dictionary containing:
        - "model_decision" : {"YES", "NO", "UNKNOWN"}
            The parsed decision label extracted from the model output.
        - "model_explanation" : str
            The explanation associated with the decision.
        - "raw_output" : str
            The full, unparsed text generated by the model.
    """
    
    prompt = f"""
You are acting as a Yu-Gi-Oh! Judge.
Answer the ruling question based only on the game state and your internal knowledge.
Give a YES or NO answer and a short explanation.

### Game State
{game_state}

### Query
{query}

### Ruling Format
Decision: <YES/NO>
Explanation: <short explanation>
"""

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    output = model.generate(
        **inputs,
        max_new_tokens=200,
        temperature=0.0,
        do_sample=False
    )
    text = tokenizer.decode(output[0], skip_special_tokens=True)

    # Parse the model output
    decision, explanation = parse_ruling(text)

    return {
        "model_decision": decision,
        "model_explanation": explanation,
        "raw_output": text
    }

In [None]:
# Generate a Ruling & a Explanation for each scenario
for idx, row in df.iterrows():
    result = ask_baseline(row["game_state"], row["query"])
    df.at[idx, "baseline_decision"] = result["model_decision"]
    df.at[idx, "baseline_explanation"] = result["model_explanation"]
    df.at[idx, "baseline_raw"] = result["raw_output"]

The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for ope

In [9]:
# Saving the results
df.to_csv("../results/yugioh_rulings_baseline.csv", index=False)