In [None]:
import pandas as pd
import os
import re
from glob import glob

# Define valid poker actions
valid_actions = ["raise", "call", "check", "fold", "bet"]

# Function to parse action from string
def extract_action(text):
    if not isinstance(text, str):
        return None
    for action in valid_actions:
        if re.search(rf'\b{action}\b', text.lower()):
            return action
    return None

# Evaluation function
def evaluate_file_actions(file_path):
    df = pd.read_csv(file_path)

    midpoint = len(df) // 2
    postflop_df = df.iloc[:midpoint]
    preflop_df = df.iloc[midpoint:]

    def analyze_actions(subset_df, label):
        total = len(subset_df)
        parsed_actions = subset_df["action"].apply(extract_action)

        print(f"\n {label} Analysis ({total} rows):")
        for action in valid_actions:
            count = (parsed_actions == action).sum()
            pct = count / total if total > 0 else 0
            print(f"   • '{action}' found: {pct:.2%} ({count})")

        unparsable_count = parsed_actions.isna().sum()
        unparsable_pct = unparsable_count / total if total > 0 else 0
        print(f"   • Unparsable answers: {unparsable_pct:.2%} ({unparsable_count})")

    print(f"\n Evaluating file: {os.path.basename(file_path)}")
    analyze_actions(postflop_df, "Postflop")
    analyze_actions(preflop_df, "Preflop")

# Filter through files 
csv_files = sorted(glob("Experiments/processed_final/Budget Forcing/Poker_*_*.csv"))

# Run evaluation on each file
for csv_file in csv_files:
    evaluate_file_actions(csv_file)


📂 Evaluating file: Poker_GPT4_budget_1024.csv

🔍 Postflop Analysis (200 rows):
   • 'raise' found: 14.50% (29)
   • 'call' found: 21.50% (43)
   • 'check' found: 13.00% (26)
   • 'fold' found: 27.00% (54)
   • 'bet' found: 12.50% (25)
   • Unparsable answers: 11.50% (23)

🔍 Preflop Analysis (200 rows):
   • 'raise' found: 12.00% (24)
   • 'call' found: 21.00% (42)
   • 'check' found: 11.50% (23)
   • 'fold' found: 27.50% (55)
   • 'bet' found: 10.50% (21)
   • Unparsable answers: 17.50% (35)

📂 Evaluating file: Poker_GPT4_budget_2048.csv

🔍 Postflop Analysis (100 rows):
   • 'raise' found: 6.00% (6)
   • 'call' found: 11.00% (11)
   • 'check' found: 10.00% (10)
   • 'fold' found: 35.00% (35)
   • 'bet' found: 22.00% (22)
   • Unparsable answers: 16.00% (16)

🔍 Preflop Analysis (100 rows):
   • 'raise' found: 18.00% (18)
   • 'call' found: 24.00% (24)
   • 'check' found: 19.00% (19)
   • 'fold' found: 19.00% (19)
   • 'bet' found: 0.00% (0)
   • Unparsable answers: 20.00% (20)

📂 Evalu

In [10]:
import pandas as pd
import os
from glob import glob

# Define valid poker actions
valid_actions = ["raise", "call", "check", "fold", "bet"]

# Function to count ground_truth actions
def count_ground_truth_actions(file_path):
    df = pd.read_csv(file_path)
    midpoint = len(df) // 2
    postflop_df = df.iloc[:midpoint]
    preflop_df = df.iloc[midpoint:]

    def count_actions(subset_df, label):
        counts = subset_df["ground_truth"].str.lower().value_counts()
        valid_counts = {action: counts.get(action, 0) for action in valid_actions}
        total = sum(valid_counts.values())

        print(f"\n🔍 {label} Ground Truth Counts ({len(subset_df)} rows):")
        for action in valid_actions:
            print(f"   • {action:<5}: {valid_counts[action]}")
        print(f"   • Total valid actions: {total}")

        return valid_counts

    print(f"\n📂 Evaluating file: {os.path.basename(file_path)}")
    postflop_counts = count_actions(postflop_df, "Postflop")
    preflop_counts = count_actions(preflop_df, "Preflop")

    return postflop_counts, preflop_counts

# Automatically find all relevant CSV files
csv_files = sorted(glob("Experiments/processed_final/CoT/Poker_*_*.csv"))

# Run count on each file
for csv_file in csv_files:
    count_ground_truth_actions(csv_file)


📂 Evaluating file: Poker_GPT4_base.csv

🔍 Postflop Ground Truth Counts (100 rows):
   • raise: 15
   • call : 25
   • check: 25
   • fold : 25
   • bet  : 10
   • Total valid actions: 100

🔍 Preflop Ground Truth Counts (100 rows):
   • raise: 25
   • call : 25
   • check: 25
   • fold : 25
   • bet  : 0
   • Total valid actions: 100

📂 Evaluating file: Poker_GPT4_unconstrained.csv

🔍 Postflop Ground Truth Counts (100 rows):
   • raise: 15
   • call : 25
   • check: 25
   • fold : 25
   • bet  : 10
   • Total valid actions: 100

🔍 Preflop Ground Truth Counts (100 rows):
   • raise: 25
   • call : 25
   • check: 25
   • fold : 25
   • bet  : 0
   • Total valid actions: 100

📂 Evaluating file: Poker_GPT4_unconstrained_example.csv

🔍 Postflop Ground Truth Counts (100 rows):
   • raise: 15
   • call : 25
   • check: 25
   • fold : 25
   • bet  : 10
   • Total valid actions: 100

🔍 Preflop Ground Truth Counts (100 rows):
   • raise: 25
   • call : 25
   • check: 25
   • fold : 25
   • bet  