In [None]:
%%capture
# Install dependencies
!pip install --no-deps bitsandbytes accelerate xformers==0.0.29 peft trl triton
!pip install --no-deps cut_cross_entropy unsloth_zoo
!pip install sentencepiece protobuf datasets huggingface_hub hf_transfer
!pip install --no-deps unsloth

In [None]:
import os
import json
import pandas as pd
import time
import torch

# --- Unsloth model loading code ---
from unsloth import FastLanguageModel
from transformers import TextStreamer

# Mount Google Drive and set working directory
from google.colab import drive
drive.mount('/content/drive')
cwd = '/content/drive/MyDrive'

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


    PyTorch 2.5.1+cu121 with CUDA 1201 (you have 2.6.0+cu124)
    Python  3.11.11 (you have 3.11.11)
  Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)
  Memory-efficient attention, SwiGLU, sparse and more won't be available.
  Set XFORMERS_MORE_DETAILS=1 for more details


🦥 Unsloth Zoo will now patch everything to make training faster!
Mounted at /content/drive


In [None]:
# Model configuration
pretrained_model_name = 'weber50432/lora-Llama-3.2-3B-Instruct'
max_seq_length = 2048  # Adjust as needed; unsloth supports RoPE Scaling internally
dtype = None           # Use None for auto-detection (or set to Float16/BFloat16 as needed)
load_in_4bit = True    # 4-bit quantization to reduce memory usage

# Load the pretrained model and tokenizer
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=pretrained_model_name,
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)
FastLanguageModel.for_inference(model)

==((====))==  Unsloth 2025.3.19: Fast Llama patching. Transformers: 4.50.0.
   \\   /|    NVIDIA A100-SXM4-40GB. Num GPUs = 1. Max memory: 39.557 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 8.0. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors.index.json:   0%|          | 0.00/21.9k [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/5.37G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.06G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/184 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/54.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

weber50432/lora-Llama-3.2-3B-Instruct does not have a padding token! Will use pad_token = <|finetune_right_pad_id|>.


LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 3072, padding_idx=128004)
    (layers): ModuleList(
      (0-27): 28 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear4bit(in_features=3072, out_features=3072, bias=False)
          (k_proj): Linear4bit(in_features=3072, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=3072, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=3072, out_features=3072, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=3072, out_features=8192, bias=False)
          (up_proj): Linear4bit(in_features=3072, out_features=8192, bias=False)
          (down_proj): Linear4bit(in_features=8192, out_features=3072, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((3072,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm

In [None]:
def get_optimal_action(prompt_text):
    inputs = tokenizer(prompt_text, return_tensors="pt").to(model.device)
    with torch.no_grad():
        output_ids = model.generate(
            **inputs,
            max_new_tokens=10,
            do_sample=True,
            top_p=0.95,
            eos_token_id=tokenizer.eos_token_id
        )
    return tokenizer.decode(output_ids[0], skip_special_tokens=True)

def parse_output(output_text):
    """
    Parses the model output to separate chain-of-thought reasoning and the final answer.
    """
    if "Final Answer:" in output_text:
        parts = output_text.split("Final Answer:")
        reasoning = parts[0].strip()
        final_answer = parts[1].strip().splitlines()[0]
    else:
        reasoning = output_text
        final_answer = ""
    return reasoning, final_answer

# --- Data Preparation ---

# Read the test sets from JSON files on your Google Drive
with open(os.path.join(cwd, "Data/postflop_100_sample.json"), "r") as f:
    postflop_test_set = json.load(f)

with open(os.path.join(cwd, "Data/preflop_100_sample.json"), "r") as f:
    preflop_test_set = json.load(f)

# Combine both test sets
all_examples = postflop_test_set + preflop_test_set

In [None]:
# --- Processing the Examples ---

# Initialise a list to store results
results = []

batch_size = 4
for i in range(0, len(all_examples), batch_size):
    batch = all_examples[i:i+batch_size]
    prompts = [ex["instruction"].strip() for ex in batch]
    print(f"Processing example {i}/{len(all_examples)}")
    inputs = tokenizer(prompts, return_tensors="pt", padding=True, truncation=True).to(model.device)
    with torch.no_grad():
        output_ids = model.generate(
            **inputs,
            max_new_tokens=10,
            do_sample=True,
            top_p=0.95,
            eos_token_id=tokenizer.eos_token_id
        )

    outputs = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
    for ex, output in zip(batch, outputs):
        reasoning, final_answer = parse_output(output)
        results.append({
            "instruction": ex["instruction"],
            "ground_truth": ex["output"],
            "reasoning": output,
            "final_answer": final_answer
        })

# --- Saving Results ---

# Create a DataFrame from the results and save to CSV
df = pd.DataFrame(results)
csv_filename = "Poker_Llama-32_base.csv"
df.to_csv(csv_filename, index=False)
print(f"Saved outputs to {csv_filename}")

Processing example 0/200
Processing example 4/200
Processing example 8/200
Processing example 12/200
Processing example 16/200
Processing example 20/200
Processing example 24/200
Processing example 28/200
Processing example 32/200
Processing example 36/200
Processing example 40/200
Processing example 44/200
Processing example 48/200
Processing example 52/200
Processing example 56/200
Processing example 60/200
Processing example 64/200
Processing example 68/200
Processing example 72/200
Processing example 76/200
Processing example 80/200
Processing example 84/200
Processing example 88/200
Processing example 92/200
Processing example 96/200
Processing example 100/200
Processing example 104/200
Processing example 108/200
Processing example 112/200
Processing example 116/200
Processing example 120/200
Processing example 124/200
Processing example 128/200
Processing example 132/200
Processing example 136/200
Processing example 140/200
Processing example 144/200
Processing example 148/200
Pr

In [None]:
# --- Extra Instructions for the Model ---

extra_instruction = '''
You are an expert in 6-handed No Limit Texas Holdem. Your job is to analyze a game scenario and decide on the optimal action.

Think through your answer step-by-step and then output exactly one sentence starting with "Final Answer:"
'''

# --- Processing the Examples ---

# Initialise a list to store results
results = []

batch_size = 8
for i in range(0, len(all_examples), batch_size):
    batch = all_examples[i:i+batch_size]
    prompts = [extra_instruction + "\n" + ex["instruction"].replace("Do not explain your answer.", "").strip() for ex in batch]
    print(f"Processing example {i}/{len(all_examples)}")
    inputs = tokenizer(prompts, return_tensors="pt", padding=True, truncation=True).to(model.device)
    with torch.no_grad():
        output_ids = model.generate(
            **inputs,
            max_new_tokens=1024,
            do_sample=True,
            top_p=0.95,
            eos_token_id=tokenizer.eos_token_id
        )

    outputs = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
    for ex, output in zip(batch, outputs):
        reasoning, final_answer = parse_output(output)
        results.append({
            "instruction": ex["instruction"],
            "ground_truth": ex["output"],
            "reasoning": output,
            "final_answer": final_answer
        })

# --- Saving Results ---

# Create a DataFrame from the results and save to CSV
df = pd.DataFrame(results)
csv_filename = "Poker_Llama-32_unconstrained.csv"
df.to_csv(csv_filename, index=False)
print(f"Saved outputs to {csv_filename}")

Processing example 0/200
Processing example 8/200
Processing example 16/200
Processing example 24/200
Processing example 32/200
Processing example 40/200
Processing example 48/200
Processing example 56/200
Processing example 64/200
Processing example 72/200
Processing example 80/200
Processing example 88/200
Processing example 96/200
Processing example 104/200
Processing example 112/200
Processing example 120/200
Processing example 128/200
Processing example 136/200
Processing example 144/200
Processing example 152/200
Processing example 160/200
Processing example 168/200
Processing example 176/200
Processing example 184/200
Processing example 192/200
Saved outputs to Poker_Llama-32_unconstrained.csv


In [None]:
# --- Extra Instructions for the Model ---

extra_instruction = '''
You are an expert in 6-handed No Limit Texas Holdem. Your job is to analyze a game scenario and decide on the optimal action. Follow these steps exactly:

1. **Assess your own hand strength:** Evaluate your hand in context—consider its absolute value (e.g., pocket aces versus a speculative connector) as well as its relative strength given the situation. Understand whether your hand is a premium holding or a drawing hand.
2. **Analyze Opponents’ Tendencies and Positions:** Observe opponents’ behaviors, betting patterns, and positions at the table. Elite players take note of who is aggressive, passive, or prone to bluffing. This helps in estimating the range of hands an opponent might be holding.
3. **Consider Board Texture and Community Cards:** Evaluate the board’s potential: assess how it interacts with both your hand and the likely ranges of your opponents. Look for potential draws, dangerous coordinated cards, and how the community cards could change the dynamics of the hand.
4. **Calculate Pot Odds and Implied Odds:** Compare the current size of the pot to the cost of calling. This mathematical analysis—balancing the risk with the potential reward—guides whether continuing in the hand is justified, especially when drawing.
5. **Estimate Opponents’ Ranges:** Based on the betting action and the observed tendencies, narrow down the likely range of hands your opponents could have. Consider both strong hands and potential bluffs, adjusting your strategy accordingly.
6. **Evaluate Risk versus Reward (Game Theory Considerations):** Reflect on the risk of losing additional chips against the potential reward if you win. Factor in tournament dynamics or cash game implications, your table image, and future strategic positioning.
7. **Optimal Action:** Synthesize all the gathered information—your hand strength, opponent reads, board texture, pot odds, and risk assessment—to choose the optimal move (bet, raise, call, or fold). Output exactly one sentence starting with “Final Answer:”
'''

# --- Processing the Examples ---
batch_size = 8

# Initialise a list to store results
results = []

for i in range(0, len(all_examples), batch_size):
    batch = all_examples[i:i+batch_size]
    prompts = [extra_instruction + "\n" + ex["instruction"].replace("Do not explain your answer.", "").strip() for ex in batch]
    print(f"Processing example {i}/{len(all_examples)}")
    inputs = tokenizer(prompts, return_tensors="pt", padding=True, truncation=True).to(model.device)
    with torch.no_grad():
        output_ids = model.generate(
            **inputs,
            max_new_tokens=1024,
            do_sample=True,
            top_p=0.95,
            eos_token_id=tokenizer.eos_token_id
        )

    outputs = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
    for ex, output in zip(batch, outputs):
        reasoning, final_answer = parse_output(output)
        results.append({
            "instruction": ex["instruction"],
            "ground_truth": ex["output"],
            "reasoning": output,
            "final_answer": final_answer
        })

# --- Saving Results ---

# Create a DataFrame from the results and save to CSV
df = pd.DataFrame(results)
csv_filename = "Poker_Llama-32_unconstrained_sctructure.csv"
df.to_csv(csv_filename, index=False)
print(f"Saved outputs to {csv_filename}")

Processing example 0/200
Processing example 8/200
Processing example 16/200
Processing example 24/200
Processing example 32/200
Processing example 40/200
Processing example 48/200
Processing example 56/200
Processing example 64/200
Processing example 72/200
Processing example 80/200
Processing example 88/200
Processing example 96/200
Processing example 104/200
Processing example 112/200
Processing example 120/200
Processing example 128/200
Processing example 136/200
Processing example 144/200
Processing example 152/200
Processing example 160/200
Processing example 168/200
Processing example 176/200
Processing example 184/200
Processing example 192/200
Saved outputs to Poker_Llama-32_unconstrained_sctructure.csv


In [None]:
# --- Extra Instructions for the Model ---

extra_instruction = '''
You are an expert in 6-handed No Limit Texas Holdem. Your job is to analyze a game scenario and decide on the optimal action.

Think through your answer step-by-step and then output exactly one sentence starting with “Final Answer:”

-------
Here is a game scenario example:

**Game Scenario:**

The small blind is 0.5 chips and the big blind is 1 chip. All players started with 100 chips.

In this hand, your position is BTN, and your holding is [King of Heart and Three of Heart].

Preflop, BTN raises to 2.5 chips, and BB calls. All other players folded.

The flop comes: Ten of Hearts, Three of Spades, Two of Diamonds. BB leads with a 4-chip bet, and BTN calls.

The turn is the Five of Diamonds. BB checks.

You currently hold two pair: Kings and Threes with a Ten kicker.

The pot size is now 13.0 chips.

You are BTN and it's your action.

Final Answer: check
'''

# --- Processing the Examples ---

# Initialise a list to store results
results = []

for i in range(0, len(all_examples), batch_size):
    batch = all_examples[i:i+batch_size]
    prompts = [extra_instruction + "\n" + ex["instruction"].replace("Do not explain your answer.", "").strip() for ex in batch]
    print(f"Processing example {i}/{len(all_examples)}")
    inputs = tokenizer(prompts, return_tensors="pt", padding=True, truncation=True).to(model.device)
    with torch.no_grad():
        output_ids = model.generate(
            **inputs,
            max_new_tokens=1024,
            do_sample=True,
            top_p=0.95,
            eos_token_id=tokenizer.eos_token_id
        )

    outputs = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
    for ex, output in zip(batch, outputs):
        reasoning, final_answer = parse_output(output)
        results.append({
            "instruction": ex["instruction"],
            "ground_truth": ex["output"],
            "reasoning": output,
            "final_answer": final_answer
        })

# --- Saving Results ---

# Create a DataFrame from the results and save to CSV
df = pd.DataFrame(results)
csv_filename = "Poker_Llama-32_unconstrained_example.csv"
df.to_csv(csv_filename, index=False)
print(f"Saved outputs to {csv_filename}")

Processing example 0/200
Processing example 8/200
Processing example 16/200
Processing example 24/200
Processing example 32/200
Processing example 40/200
Processing example 48/200
Processing example 56/200
Processing example 64/200
Processing example 72/200
Processing example 80/200
Processing example 88/200
Processing example 96/200
Processing example 104/200
Processing example 112/200
Processing example 120/200
Processing example 128/200
Processing example 136/200
Processing example 144/200
Processing example 152/200
Processing example 160/200
Processing example 168/200
Processing example 176/200
Processing example 184/200
Processing example 192/200
Saved outputs to Poker_Llama-32_unconstrained_example.csv


In [None]:
# --- Extra Instructions for the Model ---

extra_instruction = '''
You are an expert in 6-handed No Limit Texas Holdem. Your job is to analyze a game scenario and decide on the optimal action. Follow these steps exactly:

1. **Assess your own hand strength:** Evaluate your hand in context—consider its absolute value (e.g., pocket aces versus a speculative connector) as well as its relative strength given the situation. Understand whether your hand is a premium holding or a drawing hand.
2. **Analyze Opponents’ Tendencies and Positions:** Observe opponents’ behaviors, betting patterns, and positions at the table. Elite players take note of who is aggressive, passive, or prone to bluffing. This helps in estimating the range of hands an opponent might be holding.
3. **Consider Board Texture and Community Cards:** Evaluate the board’s potential: assess how it interacts with both your hand and the likely ranges of your opponents. Look for potential draws, dangerous coordinated cards, and how the community cards could change the dynamics of the hand.
4. **Calculate Pot Odds and Implied Odds:** Compare the current size of the pot to the cost of calling. This mathematical analysis—balancing the risk with the potential reward—guides whether continuing in the hand is justified, especially when drawing.
5. **Estimate Opponents’ Ranges:** Based on the betting action and the observed tendencies, narrow down the likely range of hands your opponents could have. Consider both strong hands and potential bluffs, adjusting your strategy accordingly.
6. **Evaluate Risk versus Reward (Game Theory Considerations):** Reflect on the risk of losing additional chips against the potential reward if you win. Factor in tournament dynamics or cash game implications, your table image, and future strategic positioning.
7. **Optimal Action:** Synthesize all the gathered information—your hand strength, opponent reads, board texture, pot odds, and risk assessment—to choose the optimal move (bet, raise, call, or fold). Output exactly one sentence starting with “Final Answer:”

---

**Game Scenario:**

The small blind is 0.5 chips and the big blind is 1 chip. All players started with 100 chips.

In this hand, your position is BTN, and your holding is [King of Heart and Three of Heart].

Preflop, BTN raises to 2.5 chips, and BB calls. All other players folded.

The flop comes: Ten of Hearts, Three of Spades, Two of Diamonds. BB leads with a 4-chip bet, and BTN calls.

The turn is the Five of Diamonds. BB checks.

You currently hold two pair: Kings and Threes with a Ten kicker.

The pot size is now 13.0 chips.

You are BTN and it's your action.

Final Answer: check
'''

# --- Processing the Examples ---

# Initialise a list to store results
results = []
batch_size = 8
for i in range(0, len(all_examples), batch_size):
    batch = all_examples[i:i+batch_size]
    prompts = [extra_instruction + "\n" + ex["instruction"].replace("Do not explain your answer.", "").strip() for ex in batch]
    print(f"Processing example {i}/{len(all_examples)}")
    inputs = tokenizer(prompts, return_tensors="pt", padding=True, truncation=True).to(model.device)
    with torch.no_grad():
        output_ids = model.generate(
            **inputs,
            max_new_tokens=1024,
            do_sample=True,
            top_p=0.95,
            eos_token_id=tokenizer.eos_token_id
        )

    outputs = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
    for ex, output in zip(batch, outputs):
        reasoning, final_answer = parse_output(output)
        results.append({
            "instruction": ex["instruction"],
            "ground_truth": ex["output"],
            "reasoning": output,
            "final_answer": final_answer
        })

# --- Saving Results ---

# Create a DataFrame from the results and save to CSV
df = pd.DataFrame(results)
csv_filename = "Poker_Llama-32_unconstrained_structure_example_1.csv"
df.to_csv(csv_filename, index=False)
print(f"Saved outputs to {csv_filename}")

Processing example 0/200
Processing example 8/200
Processing example 16/200
Processing example 24/200
Processing example 32/200
Processing example 40/200
Processing example 48/200
Processing example 56/200
Processing example 64/200
Processing example 72/200
Processing example 80/200
Processing example 88/200
Processing example 96/200
Processing example 104/200
Processing example 112/200
Processing example 120/200
Processing example 128/200
Processing example 136/200
Processing example 144/200
Processing example 152/200
Processing example 160/200
Processing example 168/200
Processing example 176/200
Processing example 184/200
Processing example 192/200
Saved outputs to Poker_Llama-32_unconstrained_structure_example_1.csv


In [None]:
# --- Extra Instructions for the Model ---

extra_instruction = '''
You are an expert in 6-handed No Limit Texas Holdem. Your job is to analyze a game scenario and decide on the optimal action. Follow these steps exactly:

1. **Assess your own hand strength:** Evaluate your hand in context—consider its absolute value (e.g., pocket aces versus a speculative connector) as well as its relative strength given the situation. Understand whether your hand is a premium holding or a drawing hand.
2. **Analyze Opponents’ Tendencies and Positions:** Observe opponents’ behaviors, betting patterns, and positions at the table. Elite players take note of who is aggressive, passive, or prone to bluffing. This helps in estimating the range of hands an opponent might be holding.
3. **Consider Board Texture and Community Cards:** Evaluate the board’s potential: assess how it interacts with both your hand and the likely ranges of your opponents. Look for potential draws, dangerous coordinated cards, and how the community cards could change the dynamics of the hand.
4. **Calculate Pot Odds and Implied Odds:** Compare the current size of the pot to the cost of calling. This mathematical analysis—balancing the risk with the potential reward—guides whether continuing in the hand is justified, especially when drawing.
5. **Estimate Opponents’ Ranges:** Based on the betting action and the observed tendencies, narrow down the likely range of hands your opponents could have. Consider both strong hands and potential bluffs, adjusting your strategy accordingly.
6. **Evaluate Risk versus Reward (Game Theory Considerations):** Reflect on the risk of losing additional chips against the potential reward if you win. Factor in tournament dynamics or cash game implications, your table image, and future strategic positioning.
7. **Optimal Action:** Synthesize all the gathered information—your hand strength, opponent reads, board texture, pot odds, and risk assessment—to choose the optimal move (bet, raise, call, or fold). Output exactly one sentence starting with “Final Answer:”

---

**Game Scenario:**

The blinds are 0.5/1 chip. Everyone started with 100 chips.

You are in the Hijack (HJ) position holding [Ace of Spades and Jack of Spades].

Preflop, HJ raises to 2.5 chips. Only the Big Blind (BB) calls.

The flop comes: Nine of Spades, Four of Hearts, Two of Spades.

BB checks, HJ bets 2 chips, BB calls.

The turn is the Seven of Diamonds. BB checks again.

The current pot is 9 chips.

You hold two overcards and a nut flush draw with [As Js].

---

**Step-by-step Reasoning:**

1. **Hand Strength:** We have two overcards to the board plus the nut flush draw, giving us strong equity even if behind.
2. **Opponent Behavior:** The BB has called twice and checked twice, indicating a likely marginal made hand or a draw—perhaps a weak nine, a pocket pair, or a spade draw.
3. **Board Texture:** The board is relatively dry with some draw potential. The presence of two spades enhances our implied odds.
4. **Pot Odds:** The pot is 9 chips. A bet of 6 chips or less gives favorable odds for our draw. If we choose to bet, we can apply fold equity too.
5. **Opponent Range:** Likely hands include A9, 88, 66, T9, or suited connectors like 65s or T8s. We may fold out better hands by applying pressure.
6. **Risk vs Reward:** A semi-bluff here has high fold equity and equity when called. We can credibly represent overpairs or strong made hands.
7. **Optimal Action:** This is an ideal spot for a turn semi-bluff. We apply pressure and build the pot for when we hit the flush, while folding out weak pairs or draws.

Final Answer: bet
'''

# --- Processing the Examples ---

# Initialise a list to store results
results = []
batch_size = 8
for i in range(0, len(all_examples), batch_size):
    batch = all_examples[i:i+batch_size]
    prompts = [extra_instruction + "\n" + ex["instruction"].replace("Do not explain your answer.", "").strip() for ex in batch]
    print(f"Processing example {i}/{len(all_examples)}")
    inputs = tokenizer(prompts, return_tensors="pt", padding=True, truncation=True).to(model.device)
    with torch.no_grad():
        output_ids = model.generate(
            **inputs,
            max_new_tokens=1024,
            do_sample=True,
            top_p=0.95,
            eos_token_id=tokenizer.eos_token_id
        )

    outputs = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
    for ex, output in zip(batch, outputs):
        reasoning, final_answer = parse_output(output)
        results.append({
            "instruction": ex["instruction"],
            "ground_truth": ex["output"],
            "reasoning": output,
            "final_answer": final_answer
        })

# --- Saving Results ---

# Create a DataFrame from the results and save to CSV
df = pd.DataFrame(results)
csv_filename = "Poker_Llama-32_unconstrained_structure_example_2.csv"
df.to_csv(csv_filename, index=False)
print(f"Saved outputs to {csv_filename}")

Processing example 0/200
Processing example 8/200
Processing example 16/200
Processing example 24/200
Processing example 32/200
Processing example 40/200
Processing example 48/200
Processing example 56/200
Processing example 64/200
Processing example 72/200
Processing example 80/200
Processing example 88/200
Processing example 96/200
Processing example 104/200
Processing example 112/200
Processing example 120/200
Processing example 128/200
Processing example 136/200
Processing example 144/200
Processing example 152/200
Processing example 160/200
Processing example 168/200
Processing example 176/200
Processing example 184/200
Processing example 192/200
Saved outputs to Poker_Llama-32_unconstrained_structure_example_2.csv
