In [None]:
import pandas as pd
import os
import re
from glob import glob

# Valid Poker actions
valid_actions = ["raise", "bet", "check", "call", "fold"]

# Extract first valid action from text
def extract_first_action(text):
    if not isinstance(text, str):
        return ""
    for word in text.lower().split():
        word_clean = re.sub(r"[^\w]", "", word)
        if word_clean in valid_actions:
            return word_clean
    return ""

# Clean ground truth (e.g., "raise 20" → "raise")
def clean_ground_truth(gt):
    if not isinstance(gt, str):
        return ""
    for action in valid_actions:
        if gt.lower().startswith(action):
            return action
    return gt.strip().lower()

# Directory to store processed results (optional)
os.makedirs("processed", exist_ok=True)

# Automatically find all relevant CSV files
file_paths = sorted(glob("Experiments/Unprocessed/CoT/Fine-tuned/Poker_*Base*.csv"))

# Process each file
for file_path in file_paths:
    df = pd.read_csv(file_path)

    # If final_answer is empty, try extracting from reasoning
    def get_final_answer(row):
        final = row.get("final_answer", "")
        reasoning = row.get("reasoning", "").strip()
        
        pattern = re.compile(r"(?:your optimal action is)\s*:\s*(.*)", re.IGNORECASE | re.DOTALL)
        match = pattern.search(reasoning)

        #answer = match.group(1).strip() if match else reasoning
        #pattern = re.compile(r"(?:Final Answer|optimal action is)\s*:\s*(.*)", re.IGNORECASE | re.DOTALL)
        #match = pattern.search(answer) 

        return match.group(1).strip() if match else final


    df["final_answer"] = df.apply(get_final_answer, axis=1)

    # Clean ground truth actions
    df["ground_truth"] = df["ground_truth"].apply(clean_ground_truth)

    # Extract first valid poker action from final_answer
    df["action"] = df["final_answer"].apply(extract_first_action)
    #df["action"] = df["reasoning"].apply(extract_first_action)

    # Save the cleaned version for review
    output_path = os.path.join("Experiments/Processed", os.path.basename(file_path))
    df.to_csv(output_path, index=False)
    print(f"Processed and saved: {output_path}")

Processed and saved: Experiments/Processed/Poker_Llama-31-8B-Instruct-FT_Base.csv
Processed and saved: Experiments/Processed/Poker_Llama-32-3B-Instruct-FT_Base.csv
