In [4]:
import pandas as pd
import re
from pathlib import Path

def clean_verdict(text):
    if pd.isna(text):
        return text
    
    # Pattern to match A, B, or Tie in various formats
    pattern = r'["\']?([AB]|Tie)["\']?'
    match = re.search(pattern, str(text))
    
    if match:
        return match.group(1)  # Return just A, B, or Tie
    return text  # Return original if no match found

# Process all CSV files in current directory
for csv_file in Path('.').glob('*.csv'):
    print(f"Processing {csv_file.name}...")
    
    try:
        df = pd.read_csv(csv_file)
        
        if 'verdict' in df.columns:
            df['verdict'] = df['verdict'].apply(clean_verdict)
            df.to_csv(csv_file, index=False)
            print(f"Successfully cleaned {csv_file.name}")
        else:
            print(f"No 'verdict' column in {csv_file.name}")
            
    except Exception as e:
        print(f"Error processing {csv_file.name}: {str(e)}")

print("All files processed!")

Processing qwen3-0.6b_vs_llama4-400b.csv...
Successfully cleaned qwen3-0.6b_vs_llama4-400b.csv
Processing qwen3-latest_vs_gemma3-27b.csv...
Successfully cleaned qwen3-latest_vs_gemma3-27b.csv
Processing qwen3-0.6b_vs_gemma3-27b.csv...
Successfully cleaned qwen3-0.6b_vs_gemma3-27b.csv
Processing gemma3-27b_vs_mistral-small3.1-latest.csv...
Successfully cleaned gemma3-27b_vs_mistral-small3.1-latest.csv
Processing qwen2.5-latest_vs_mistral-small3.1-latest.csv...
Successfully cleaned qwen2.5-latest_vs_mistral-small3.1-latest.csv
Processing qwen3-latest_vs_qwen2.5-latest.csv...
Successfully cleaned qwen3-latest_vs_qwen2.5-latest.csv
Processing qwen3-latest_vs_qwen3-0.6b.csv...
Successfully cleaned qwen3-latest_vs_qwen3-0.6b.csv
Processing deepseek-r1-70b-alt_vs_mistral-small3.1-latest.csv...
Successfully cleaned deepseek-r1-70b-alt_vs_mistral-small3.1-latest.csv
Processing deepseek-r1-70b-alt_vs_gemma3-27b.csv...
Successfully cleaned deepseek-r1-70b-alt_vs_gemma3-27b.csv
Processing qwen3-0.

In [5]:
import pandas as pd
import re
from pathlib import Path

def clean_verdict(text):
    if pd.isna(text) or str(text).strip() == "":
        return "unknown"
    
    text = str(text).strip()
    
    # Case-insensitive matching for A, B, or Tie (with optional quotes)
    pattern = r'["\']?(A|B|Tie)["\']?'
    match = re.search(pattern, text, re.IGNORECASE)
    
    if match:
        verdict = match.group(1).capitalize()  # Ensure proper capitalization
        if verdict.lower() == "tie":
            return "Tie"
        return verdict.upper()  # Return A or B in uppercase
    return "unknown"

# Process all CSV files
for csv_file in Path('.').glob('*.csv'):
    print(f"Processing {csv_file.name}...")
    
    try:
        df = pd.read_csv(csv_file)
        
        if 'verdict' in df.columns:
            df['verdict'] = df['verdict'].apply(clean_verdict)
            df.to_csv(csv_file, index=False)
            print(f"✅ Cleaned: {csv_file.name}")
            print("Sample verdicts after cleaning:")
            print(df['verdict'].value_counts())
        else:
            print(f"⚠️ No 'verdict' column in {csv_file.name}")
            
    except Exception as e:
        print(f"❌ Error processing {csv_file.name}: {str(e)}")

print("\nAll files processed! 🎉")

Processing qwen3-0.6b_vs_llama4-400b.csv...
✅ Cleaned: qwen3-0.6b_vs_llama4-400b.csv
Sample verdicts after cleaning:
verdict
B      335
A      170
Tie    107
Name: count, dtype: int64
Processing qwen3-latest_vs_gemma3-27b.csv...
✅ Cleaned: qwen3-latest_vs_gemma3-27b.csv
Sample verdicts after cleaning:
verdict
A      307
B      286
Tie     19
Name: count, dtype: int64
Processing qwen3-0.6b_vs_gemma3-27b.csv...
✅ Cleaned: qwen3-0.6b_vs_gemma3-27b.csv
Sample verdicts after cleaning:
verdict
B      524
A       85
Tie      3
Name: count, dtype: int64
Processing gemma3-27b_vs_mistral-small3.1-latest.csv...
✅ Cleaned: gemma3-27b_vs_mistral-small3.1-latest.csv
Sample verdicts after cleaning:
verdict
A      447
B      116
Tie     49
Name: count, dtype: int64
Processing qwen2.5-latest_vs_mistral-small3.1-latest.csv...
✅ Cleaned: qwen2.5-latest_vs_mistral-small3.1-latest.csv
Sample verdicts after cleaning:
verdict
B      316
A      227
Tie     69
Name: count, dtype: int64
Processing qwen3-latest_

In [6]:
import pandas as pd
import re
from pathlib import Path

def clean_verdict(text):
    if pd.isna(text):
        return "unknown"
    
    text = str(text).strip()
    pattern = r'["\']?(A|B|Tie)["\']?'
    match = re.search(pattern, text, re.IGNORECASE)
    
    if match:
        verdict = match.group(1).capitalize()
        return "Tie" if verdict.lower() == "tie" else verdict.upper()
    return "unknown"

# Process all CSV files in current directory
for csv_file in Path('.').glob('*.csv'):
    print(f"\nProcessing {csv_file.name}...")
    
    try:
        df = pd.read_csv(csv_file)
        original_rows = len(df)
        print(f"Original rows: {original_rows}")
        
        # Delete first 288 rows if file has 900+ rows
        if original_rows >= 900:
            df = df.iloc[288:]
            print(f"Removed first 288 rows. New row count: {len(df)}")
        
        # Clean verdict column if exists
        if 'verdict' in df.columns:
            df['verdict'] = df['verdict'].apply(clean_verdict)
            print("Verdict column cleaned")
        else:
            print("No 'verdict' column found")
        
        # Save processed file
        df.to_csv(csv_file, index=False)
        print(f"Successfully processed and saved {csv_file.name}")
        print(f"Final row count: {len(df)}")
        
    except Exception as e:
        print(f"Error processing {csv_file.name}: {str(e)}")

print("\nAll files processed! 🎉")


Processing qwen3-0.6b_vs_llama4-400b.csv...
Original rows: 612
Verdict column cleaned
Successfully processed and saved qwen3-0.6b_vs_llama4-400b.csv
Final row count: 612

Processing qwen3-latest_vs_gemma3-27b.csv...
Original rows: 612
Verdict column cleaned
Successfully processed and saved qwen3-latest_vs_gemma3-27b.csv
Final row count: 612

Processing qwen3-0.6b_vs_gemma3-27b.csv...
Original rows: 612
Verdict column cleaned
Successfully processed and saved qwen3-0.6b_vs_gemma3-27b.csv
Final row count: 612

Processing gemma3-27b_vs_mistral-small3.1-latest.csv...
Original rows: 612
Verdict column cleaned
Successfully processed and saved gemma3-27b_vs_mistral-small3.1-latest.csv
Final row count: 612

Processing qwen2.5-latest_vs_mistral-small3.1-latest.csv...
Original rows: 612
Verdict column cleaned
Successfully processed and saved qwen2.5-latest_vs_mistral-small3.1-latest.csv
Final row count: 612

Processing qwen3-latest_vs_qwen2.5-latest.csv...
Original rows: 612
Verdict column clean