In [None]:
import os
import json
import pandas as pd
import time

from huggingface_hub import InferenceClient

# Set your Hugging Face API token here
hf_token = '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'

def main(model):
    
    model_name = model.split("/")[-1].replace(".", "")

    # Create an Inference API instance for the model
    def call_hf(model, messages, temperature=0.0):
        """
        Wrapper function to query Hugging Face Inference API.
        """
        client_hf = InferenceClient(api_key=hf_token)
        response = client_hf.chat_completion(
            model=model,
            messages=messages,
            max_tokens=1024,
            temperature=temperature
        )
        return response.choices[0].message.content

    # Set the system message
    system_message = {
        "role": "system",
        "content": """
            You are an expert at Texas Holdem Poker. Output your optimal action to a game scenario with 'Final Answer:'.
        """
    }

    def get_optimal_action(text, temperature=0.0):
        """
        Sends a prompt to the Hugging Face Inference API and returns the response.
        """
        user_message = {
            "role": "user",
            "content": text
        }
        messages = [system_message, user_message]
        response = call_hf(model, messages, temperature)
        return response

    def parse_output(output_text):
        """
        Parses the model output to extract chain-of-thought reasoning and the final answer.
        """
        if "Final Answer:" in output_text:
            parts = output_text.split("Final Answer:")
            reasoning = parts[0].strip()
            final_answer = parts[1].strip().splitlines()[0]
        else:
            reasoning = output_text
            final_answer = ""
        return reasoning, final_answer

    # Set your working directory (e.g., current working directory or Google Drive path)
    cwd = os.getcwd()

    # Read your JSON test sets
    with open(os.path.join(cwd, "Postflop 100 Sample.json"), "r") as f:
        postflop_test_set = json.load(f)

    with open(os.path.join(cwd, "Preflop 100 Sample.json"), "r") as f:
        preflop_test_set = json.load(f)

    # Combine both test sets
    all_examples = postflop_test_set + preflop_test_set

    # Extra instruction to guide the chain-of-thought reasoning
    extra_instruction = '''
    You are an expert in 6-handed No Limit Texas Holdem. Your job is to analyze a game scenario and decide on the optimal action.

    Think through your answer step-by-step and then output exactly one sentence starting with "Final Answer:"
    '''

    # Initialise a list to store results
    results = []

    # Process each example
    for idx, example in enumerate(all_examples):
        prompt_text = example.get("instruction", "")

        print(f"Processing example {idx+1}/{len(all_examples)}")
        
        try:
            output = get_optimal_action(prompt_text)
            reasoning, final_answer = parse_output(output)
        except Exception as e:
            print(f"Error processing example {idx+1}: {e}")
            reasoning, final_answer = "Error", "Error"
        
        results.append({
            "instruction": example.get("instruction", ""),
            "prompt" : prompt_text,
            "ground_truth": example.get("output", ""),
            "reasoning": output,
            "final_answer": final_answer
        })
        
        time.sleep(1)

    # Save the results to a CSV file
    df = pd.DataFrame(results)
    csv_filename = f"Poker_{model_name}_base.csv"
    df.to_csv(csv_filename, index=False)
    print(f"Saved outputs to {csv_filename}")

    # Extra instruction to guide the chain-of-thought reasoning
    extra_instruction = '''
    You are an expert in 6-handed No Limit Texas Holdem. Your job is to analyze a game scenario and decide on the optimal action.

    Think through your answer step-by-step and then output exactly one sentence starting with "Final Answer:"
    '''

    # Initialise a list to store results
    results = []

    # Process each example
    for idx, example in enumerate(all_examples):
        prompt_text = example.get("instruction", "")
        prompt_text = prompt_text.replace("Do not explain your answer.", "").strip()
        # Prepend extra instructions to the prompt
        prompt_text = extra_instruction + "\n" + prompt_text

        print(f"Processing example {idx+1}/{len(all_examples)}")
        
        try:
            output = get_optimal_action(prompt_text)
            reasoning, final_answer = parse_output(output)
        except Exception as e:
            print(f"Error processing example {idx+1}: {e}")
            reasoning, final_answer = "Error", "Error"
        
        results.append({
            "instruction": example.get("instruction", ""),
            "prompt" : prompt_text,
            "ground_truth": example.get("output", ""),
            "reasoning": output,
            "final_answer": final_answer
        })
        
        time.sleep(1)

    # Save the results to a CSV file
    df = pd.DataFrame(results)
    csv_filename = f"Poker_{model_name}_unconstrained.csv"
    df.to_csv(csv_filename, index=False)
    print(f"Saved outputs to {csv_filename}")

    # Initialise a list to store results
    results = []

    extra_instruction = '''
    You are an expert in 6-handed No Limit Texas Holdem. Your job is to analyze a game scenario and decide on the optimal action. Follow these steps exactly:

    1. **Assess your own hand strength:** Evaluate your hand in context—consider its absolute value (e.g., pocket aces versus a speculative connector) as well as its relative strength given the situation. Understand whether your hand is a premium holding or a drawing hand.
    2. **Analyze Opponents’ Tendencies and Positions:** Observe opponents’ behaviors, betting patterns, and positions at the table. Elite players take note of who is aggressive, passive, or prone to bluffing. This helps in estimating the range of hands an opponent might be holding.
    3. **Consider Board Texture and Community Cards:** Evaluate the board’s potential: assess how it interacts with both your hand and the likely ranges of your opponents. Look for potential draws, dangerous coordinated cards, and how the community cards could change the dynamics of the hand.
    4. **Calculate Pot Odds and Implied Odds:** Compare the current size of the pot to the cost of calling. This mathematical analysis—balancing the risk with the potential reward—guides whether continuing in the hand is justified, especially when drawing.
    5. **Estimate Opponents’ Ranges:** Based on the betting action and the observed tendencies, narrow down the likely range of hands your opponents could have. Consider both strong hands and potential bluffs, adjusting your strategy accordingly.
    6. **Evaluate Risk versus Reward (Game Theory Considerations):** Reflect on the risk of losing additional chips against the potential reward if you win. Factor in tournament dynamics or cash game implications, your table image, and future strategic positioning.
    7. **Optimal Action:** Synthesize all the gathered information—your hand strength, opponent reads, board texture, pot odds, and risk assessment—to choose the optimal move (bet, raise, call, or fold). Output exactly one sentence starting with “Final Answer:”
    '''

    # Loop over each example in the test set
    for idx, example in enumerate(all_examples):
        # Construct the prompt:
        prompt_text = example.get("instruction", "")
        prompt_text = prompt_text.replace("Do not explain your answer.", "").strip()
        prompt_text = extra_instruction + prompt_text

        print(f"Processing example {idx+1}/{len(all_examples)}")

        try:
            output = get_optimal_action(prompt_text)
            reasoning, final_answer = parse_output(output)
        except Exception as e:
            print(f"Error processing example {idx+1}: {e}")
            reasoning, final_answer = "Error", "Error"

        results.append({
            "instruction": example.get("instruction", ""),
            "prompt" : prompt_text,
            "ground_truth": example.get("output", ""),
            "reasoning": output,
            "final_answer": final_answer
        })

        time.sleep(1)

    # Create a DataFrame and save to CSV
    df = pd.DataFrame(results)
    csv_filename = f"Poker_{model_name}_unconstrained_structure.csv"
    df.to_csv(csv_filename, index=False)
    print(f"Saved outputs to {csv_filename}")

    # Initialise a list to store results
    results = []

    # Extra instruction:
    extra_instruction = '''
    You are an expert in 6-handed No Limit Texas Holdem. Your job is to analyze a game scenario and decide on the optimal action. 

    Think through your answer step-by-step and then output exactly one sentence starting with “Final Answer:”

    -------
    Here is a game scenario example:

    **Game Scenario:**

    The small blind is 0.5 chips and the big blind is 1 chip. All players started with 100 chips.

    In this hand, your position is BTN, and your holding is [King of Heart and Three of Heart].

    Preflop, BTN raises to 2.5 chips, and BB calls. All other players folded.

    The flop comes: Ten of Hearts, Three of Spades, Two of Diamonds. BB leads with a 4-chip bet, and BTN calls.

    The turn is the Five of Diamonds. BB checks.

    You currently hold two pair: Kings and Threes with a Ten kicker.

    The pot size is now 13.0 chips.

    You are BTN and it's your action.

    Final Answer: check
    '''

    # Loop over each example in the test set
    for idx, example in enumerate(all_examples):
        # Construct the prompt:
        prompt_text = example.get("instruction", "")
        prompt_text = prompt_text.replace("Do not explain your answer.", "").strip()
        prompt_text = extra_instruction + prompt_text

        print(f"Processing example {idx+1}/{len(all_examples)}")

        try:
            output = get_optimal_action(prompt_text)
            reasoning, final_answer = parse_output(output)
        except Exception as e:
            print(f"Error processing example {idx+1}: {e}")
            reasoning, final_answer = "Error", "Error"

        results.append({
            "instruction": example.get("instruction", ""),
            "prompt" : prompt_text,
            "ground_truth": example.get("output", ""),
            "reasoning": output,
            "final_answer": final_answer
        })

        time.sleep(1)

    # Create a DataFrame and save to CSV
    df = pd.DataFrame(results)
    csv_filename = f"Poker_{model_name}_unconstrained_example.csv"
    df.to_csv(csv_filename, index=False)
    print(f"Saved outputs to {csv_filename}")

    # Initialise a list to store results
    results = []

    # Extra instruction:
    extra_instruction = '''
    You are an expert in 6-handed No Limit Texas Holdem. Your job is to analyze a game scenario and decide on the optimal action. Follow these steps exactly:

    1. **Assess your own hand strength:** Evaluate your hand in context—consider its absolute value (e.g., pocket aces versus a speculative connector) as well as its relative strength given the situation. Understand whether your hand is a premium holding or a drawing hand.
    2. **Analyze Opponents’ Tendencies and Positions:** Observe opponents’ behaviors, betting patterns, and positions at the table. Elite players take note of who is aggressive, passive, or prone to bluffing. This helps in estimating the range of hands an opponent might be holding.
    3. **Consider Board Texture and Community Cards:** Evaluate the board’s potential: assess how it interacts with both your hand and the likely ranges of your opponents. Look for potential draws, dangerous coordinated cards, and how the community cards could change the dynamics of the hand.
    4. **Calculate Pot Odds and Implied Odds:** Compare the current size of the pot to the cost of calling. This mathematical analysis—balancing the risk with the potential reward—guides whether continuing in the hand is justified, especially when drawing.
    5. **Estimate Opponents’ Ranges:** Based on the betting action and the observed tendencies, narrow down the likely range of hands your opponents could have. Consider both strong hands and potential bluffs, adjusting your strategy accordingly.
    6. **Evaluate Risk versus Reward (Game Theory Considerations):** Reflect on the risk of losing additional chips against the potential reward if you win. Factor in tournament dynamics or cash game implications, your table image, and future strategic positioning.
    7. **Optimal Action:** Synthesize all the gathered information—your hand strength, opponent reads, board texture, pot odds, and risk assessment—to choose the optimal move (bet, raise, call, or fold). Output exactly one sentence starting with “Final Answer:”

    ---

    **Game Scenario:**

    The small blind is 0.5 chips and the big blind is 1 chip. All players started with 100 chips.

    In this hand, your position is BTN, and your holding is [King of Heart and Three of Heart].

    Preflop, BTN raises to 2.5 chips, and BB calls. All other players folded.

    The flop comes: Ten of Hearts, Three of Spades, Two of Diamonds. BB leads with a 4-chip bet, and BTN calls.

    The turn is the Five of Diamonds. BB checks.

    You currently hold two pair: Kings and Threes with a Ten kicker.

    The pot size is now 13.0 chips.

    You are BTN and it's your action.

    Final Answer: check
    '''

    # Loop over each example in the test set
    for idx, example in enumerate(all_examples):
        # Construct the prompt:
        prompt_text = example.get("instruction", "")
        prompt_text = prompt_text.replace("Do not explain your answer.", "").strip()
        prompt_text = extra_instruction + prompt_text

        print(f"Processing example {idx+1}/{len(all_examples)}")

        try:
            output = get_optimal_action(prompt_text)
            reasoning, final_answer = parse_output(output)
        except Exception as e:
            print(f"Error processing example {idx+1}: {e}")
            reasoning, final_answer = "Error", "Error"

        results.append({
            "instruction": example.get("instruction", ""),
            "prompt" : prompt_text,
            "ground_truth": example.get("output", ""),
            "reasoning": output,
            "final_answer": final_answer
        })

        time.sleep(1)

    # Create a DataFrame and save to CSV
    df = pd.DataFrame(results)
    csv_filename = f"Poker_{model_name}_unconstrained_structure_example_1.csv"
    df.to_csv(csv_filename, index=False)
    print(f"Saved outputs to {csv_filename}")

    # Initialise a list to store results
    results = []

    # Extra instruction:
    extra_instruction = '''
    You are an expert in 6-handed No Limit Texas Holdem. Your job is to analyze a game scenario and decide on the optimal action. Follow these steps exactly:

    1. **Assess your own hand strength:** Evaluate your hand in context—consider its absolute value (e.g., pocket aces versus a speculative connector) as well as its relative strength given the situation. Understand whether your hand is a premium holding or a drawing hand.
    2. **Analyze Opponents’ Tendencies and Positions:** Observe opponents’ behaviors, betting patterns, and positions at the table. Elite players take note of who is aggressive, passive, or prone to bluffing. This helps in estimating the range of hands an opponent might be holding.
    3. **Consider Board Texture and Community Cards:** Evaluate the board’s potential: assess how it interacts with both your hand and the likely ranges of your opponents. Look for potential draws, dangerous coordinated cards, and how the community cards could change the dynamics of the hand.
    4. **Calculate Pot Odds and Implied Odds:** Compare the current size of the pot to the cost of calling. This mathematical analysis—balancing the risk with the potential reward—guides whether continuing in the hand is justified, especially when drawing.
    5. **Estimate Opponents’ Ranges:** Based on the betting action and the observed tendencies, narrow down the likely range of hands your opponents could have. Consider both strong hands and potential bluffs, adjusting your strategy accordingly.
    6. **Evaluate Risk versus Reward (Game Theory Considerations):** Reflect on the risk of losing additional chips against the potential reward if you win. Factor in tournament dynamics or cash game implications, your table image, and future strategic positioning.
    7. **Optimal Action:** Synthesize all the gathered information—your hand strength, opponent reads, board texture, pot odds, and risk assessment—to choose the optimal move (bet, raise, call, or fold). Output exactly one sentence starting with “Final Answer:”

    ---

    **Game Scenario:**

    The blinds are 0.5/1 chip. Everyone started with 100 chips.

    You are in the Hijack (HJ) position holding [Ace of Spades and Jack of Spades].

    Preflop, HJ raises to 2.5 chips. Only the Big Blind (BB) calls.

    The flop comes: Nine of Spades, Four of Hearts, Two of Spades.

    BB checks, HJ bets 2 chips, BB calls.

    The turn is the Seven of Diamonds. BB checks again.

    The current pot is 9 chips.

    You hold two overcards and a nut flush draw with [As Js].

    ---

    **Step-by-step Reasoning:**

    1. **Hand Strength:** Ace-high with a strong draw — two overcards and the nut flush draw — excellent equity but no showdown value yet.
    2. **Opponent Behavior:** BB has check-called twice. Likely holding a medium-strength hand, such as a weak 9x or pocket 5s–8s.
    3. **Board Texture:** The turn (7♦) doesn’t change much. We’re unlikely to push out hands that beat us, and our bet may fold out only hands we already beat.
    4. **Pot Odds:** Our hand has excellent equity. A check lets us see the river for free while keeping pot control.
    5. **Opponent Range:** Their range likely contains marginal made hands or spade draws. A bet here won’t fold out most of that range.
    6. **Risk vs Reward:** Betting could build a big pot with only ace-high and limited fold equity. A check lets us realize our equity efficiently.
    7. **Optimal Action:** Checking is the best line. It controls pot size, preserves our draw, and gives us a chance to reevaluate on the river.

    Final Answer: check
    '''

    # Loop over each example in the test set
    for idx, example in enumerate(all_examples):
        # Construct the prompt:
        prompt_text = example.get("instruction", "")
        prompt_text = prompt_text.replace("Do not explain your answer.", "").strip()
        prompt_text = extra_instruction + prompt_text

        print(f"Processing example {idx+1}/{len(all_examples)}")

        try:
            output = get_optimal_action(prompt_text)
            reasoning, final_answer = parse_output(output)
        except Exception as e:
            print(f"Error processing example {idx+1}: {e}")
            reasoning, final_answer = "Error", "Error"

        results.append({
            "instruction": example.get("instruction", ""),
            "prompt" : prompt_text,
            "ground_truth": example.get("output", ""),
            "reasoning": output,
            "final_answer": final_answer
        })

        time.sleep(1)

    # Create a DataFrame and save to CSV
    df = pd.DataFrame(results)
    csv_filename = f"Poker_{model_name}_unconstrained_structure_example_2.csv"
    df.to_csv(csv_filename, index=False)
    print(f"Saved outputs to {csv_filename}")

In [None]:
# Model repositories from Hugging Face
models = [
    "meta-llama/Llama-3.1-8B-Instruct",
    "meta-llama/Llama-3.2-3B-Instruct",
]
if __name__ == "__main__":
    for model in models:
        main(model)

Processing example 1/200
Processing example 2/200
Processing example 3/200
Processing example 4/200
Processing example 5/200
Processing example 6/200
Processing example 7/200
Processing example 8/200
Processing example 9/200
Processing example 10/200
Processing example 11/200
Processing example 12/200
Processing example 13/200
Processing example 14/200
Processing example 15/200
Processing example 16/200
Processing example 17/200
Processing example 18/200
Processing example 19/200
Processing example 20/200
Processing example 21/200
Processing example 22/200
Processing example 23/200
Processing example 24/200
Processing example 25/200
Processing example 26/200
Processing example 27/200
Processing example 28/200
Processing example 29/200
Processing example 30/200
Processing example 31/200
Processing example 32/200
Processing example 33/200
Processing example 34/200
Processing example 35/200
Processing example 36/200
Processing example 37/200
Processing example 38/200
Processing example 39

KeyboardInterrupt: 