In [None]:
# Temperature experiments
import json
import pandas as pd
import time
import re
from huggingface_hub import InferenceClient

# Set your Hugging Face API token here
hf_token = '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'

def main(model):
    model_name = model.split("/")[-1].replace(".", "")

    # Create an Inference API instance for the model
    def call_hf(model, messages, temperature=0.0):
        '''
        Function to query Hugging Face Inference API.
        '''
        client_hf = InferenceClient(api_key=hf_token)
        response = client_hf.chat_completion(
            model=model,
            messages=messages,
            max_tokens=1024,
            temperature=temperature
        )
        return response.choices[0].message.content

    # Set the system message
    system_message = {
        "role": "system",
        "content": """
            You are an expert at Texas Holdem Poker. Your job is to do step-by-step reasoning about a game scenario and then provide a final answer as 'Final Answer:'.
        """
    }

    def get_optimal_action(text, temperature=0.0):
        '''
        Sends a prompt to the Hugging Face Inference API and returns the response.
        '''
        user_message = {
            "role": "user",
            "content": text
        }
        messages = [system_message, user_message]
        response = call_hf(model, messages, temperature)
        return response

    def parse_output(output_text):
        '''
        Function to parse the output from the model.
        '''
        if "Final Answer:" in output_text:
            parts = output_text.split("Final Answer:")
            reasoning = parts[0].strip()
            final_answer = parts[1].strip().splitlines()[0]
        elif "optimal action:" in output_text.lower():
            parts = re.split(r"optimal action:", output_text, flags=re.IGNORECASE)
            reasoning = parts[0].strip()
            final_answer = parts[1].strip().splitlines()[0]
        else:
            reasoning = output_text
            final_answer = ""
        return reasoning, final_answer

    # Load datasets
    with open("Postflop 100 Sample.json", "r") as f:
        postflop_test_set = json.load(f)
    with open("Preflop 100 Sample.json", "r") as f:
        preflop_test_set = json.load(f)
    all_examples = postflop_test_set

    # CoT prompt setup
    extra_instruction = '''
    You are an expert in 6-handed No Limit Texas Holdem. Your job is to analyze a game scenario and decide on the optimal action.

    Think through your answer step-by-step and then output exactly one sentence starting with "Final Answer:"
    '''

    # Run across multiple temperatures
    for temperature in [0.0, 0.2, 0.4, 0.6, 0.8, 1.0]:
        print(f"\n Running experiment at temperature: {temperature}")
        results = []

        for idx, example in enumerate(all_examples):
            prompt_text = example.get("instruction", "")
            prompt_text = prompt_text.replace("Do not explain your answer.", "").strip()
            prompt_text = extra_instruction + prompt_text

            print(f"→ [{temperature}] Example {idx+1}/{len(all_examples)}")

            try:
                output = get_optimal_action(prompt_text, temperature)
                reasoning, final_answer = parse_output(output)
            except Exception as e:
                print(f"Error on example {idx+1}: {e}")
                output = reasoning = final_answer = "Error"

            results.append({
                "instruction": example.get("instruction", ""),
                "prompt": prompt_text,
                "ground_truth": example.get("output", ""),
                "reasoning": output,
                "final_answer": final_answer
            })

            time.sleep(1)  # Respect rate limits

        # Save output to CSV
        out_path = f"Poker_{model_name}_temp_{str(temperature).replace('.', '')}.csv"
        pd.DataFrame(results).to_csv(out_path, index=False)
        print(f"Saved: {out_path}")

# Run it for Llama (or replace with other text-gen model name)
main("meta-llama/Llama-3.1-8B-Instruct")


🔁 Running experiment at temperature: 0.4
→ [0.4] Example 1/100
→ [0.4] Example 2/100
→ [0.4] Example 3/100
→ [0.4] Example 4/100
→ [0.4] Example 5/100
→ [0.4] Example 6/100
→ [0.4] Example 7/100
→ [0.4] Example 8/100
→ [0.4] Example 9/100
→ [0.4] Example 10/100
→ [0.4] Example 11/100
→ [0.4] Example 12/100
→ [0.4] Example 13/100
→ [0.4] Example 14/100
→ [0.4] Example 15/100
→ [0.4] Example 16/100
→ [0.4] Example 17/100
→ [0.4] Example 18/100
→ [0.4] Example 19/100
→ [0.4] Example 20/100
→ [0.4] Example 21/100
→ [0.4] Example 22/100
→ [0.4] Example 23/100
→ [0.4] Example 24/100
→ [0.4] Example 25/100
→ [0.4] Example 26/100
→ [0.4] Example 27/100
→ [0.4] Example 28/100
→ [0.4] Example 29/100
→ [0.4] Example 30/100
→ [0.4] Example 31/100
→ [0.4] Example 32/100
→ [0.4] Example 33/100
→ [0.4] Example 34/100
→ [0.4] Example 35/100
→ [0.4] Example 36/100
→ [0.4] Example 37/100
→ [0.4] Example 38/100
→ [0.4] Example 39/100
→ [0.4] Example 40/100
→ [0.4] Example 41/100
→ [0.4] Example 42/100
→