In [1]:
# Setup: imports and environment
import os
from dotenv import load_dotenv
import pandas as pd
from tqdm import tqdm

# Ensure current working directory is the repo root before load_dotenv()
if ".env" not in os.listdir():
    os.chdir("/Users/jase/codeswitch-benchmark")

# Load API keys from .env file
load_dotenv()

# Verify that your keys loaded correctly (True = found)
print("OpenAI key:", bool(os.getenv("OPENAI_API_KEY")))
print("Anthropic key loaded:", bool(os.getenv("ANTHROPIC_API_KEY")))
print("Gemini key:", bool(os.getenv("GEMINI_API_KEY")))
print("Cohere key:", bool(os.getenv("COHERE_API_KEY")))


OpenAI key: True
Anthropic key loaded: False
Gemini key: True
Cohere key: True


In [2]:
import os, pandas as pd

# Make sure folders exist
os.makedirs("../data/raw", exist_ok=True)

# Small, balanced starter set (12 rows: 4 varieties × 3 tasks)
rows = [
    # AAVE
    {"id":"aave_01","variety":"AAVE","task":"paraphrase","text":"He finna go to the store. You sliding?"},
    {"id":"aave_02","variety":"AAVE","task":"explain","text":"Ion think that plan gon’ work."},
    {"id":"aave_03","variety":"AAVE","task":"continue","text":"We was tryna finish that yesterday"},
    # Spanglish
    {"id":"span_01","variety":"Spanglish","task":"paraphrase","text":"Vamos later, it’s muy close to la tienda."},
    {"id":"span_02","variety":"Spanglish","task":"explain","text":"No entiendo bien, pero I think it’s fine."},
    {"id":"span_03","variety":"Spanglish","task":"continue","text":"We can meet en el parque, like at 5."},
    # British English
    {"id":"br_01","variety":"BrEng","task":"paraphrase","text":"Put it in the lorry outside the flat."},
    {"id":"br_02","variety":"BrEng","task":"explain","text":"Take the lift, not the stairs, to the first floor."},
    {"id":"br_03","variety":"BrEng","task":"continue","text":"We’re off on holiday next week, fancy it?"},
    # Standard English
    {"id":"std_01","variety":"StdEng","task":"paraphrase","text":"He is about to head out. Are you coming?"},
    {"id":"std_02","variety":"StdEng","task":"explain","text":"Please explain this in simple terms."},
    {"id":"std_03","variety":"StdEng","task":"continue","text":"We should wrap this up and send it."},
]

stimuli = pd.DataFrame(rows, columns=["id","variety","task","text"])
stimuli.to_csv("../data/raw/stimuli.csv", index=False)
print("Wrote:", os.path.abspath("../data/raw/stimuli.csv"))
stimuli.head(12)


Wrote: /Users/jase/data/raw/stimuli.csv


Unnamed: 0,id,variety,task,text
0,aave_01,AAVE,paraphrase,He finna go to the store. You sliding?
1,aave_02,AAVE,explain,Ion think that plan gon’ work.
2,aave_03,AAVE,continue,We was tryna finish that yesterday
3,span_01,Spanglish,paraphrase,"Vamos later, it’s muy close to la tienda."
4,span_02,Spanglish,explain,"No entiendo bien, pero I think it’s fine."
5,span_03,Spanglish,continue,"We can meet en el parque, like at 5."
6,br_01,BrEng,paraphrase,Put it in the lorry outside the flat.
7,br_02,BrEng,explain,"Take the lift, not the stairs, to the first fl..."
8,br_03,BrEng,continue,"We’re off on holiday next week, fancy it?"
9,std_01,StdEng,paraphrase,He is about to head out. Are you coming?


In [3]:
import pandas as pd, os
print("Exists?", os.path.exists("../data/raw/stimuli.csv"))
pd.read_csv("../data/raw/stimuli.csv").head()

Exists? True


Unnamed: 0,id,variety,task,text
0,aave_01,AAVE,paraphrase,He finna go to the store. You sliding?
1,aave_02,AAVE,explain,Ion think that plan gon’ work.
2,aave_03,AAVE,continue,We was tryna finish that yesterday
3,span_01,Spanglish,paraphrase,"Vamos later, it’s muy close to la tienda."
4,span_02,Spanglish,explain,"No entiendo bien, pero I think it’s fine."


In [4]:
import google.generativeai as genai

genai.configure(api_key=os.getenv("GEMINI_API_KEY"))

# Use a model you actually have access to
model = genai.GenerativeModel("models/gemini-2.5-flash")

def query_gemini(prompt):
    """Send a single prompt to Gemini 2.5 Flash and return its text response."""
    try:
        response = model.generate_content(prompt)
        return response.text
    except Exception as e:
        print("Error:", e)
        return None





In [5]:
pip install -U google-generativeai

Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [6]:
for m in genai.list_models():
    print(m.name)

E0000 00:00:1759274931.808653  222222 alts_credentials.cc:93] ALTS creds ignored. Not running on GCP and untrusted ALTS is not enabled.


models/embedding-gecko-001
models/gemini-2.5-pro-preview-03-25
models/gemini-2.5-flash-preview-05-20
models/gemini-2.5-flash
models/gemini-2.5-flash-lite-preview-06-17
models/gemini-2.5-pro-preview-05-06
models/gemini-2.5-pro-preview-06-05
models/gemini-2.5-pro
models/gemini-2.0-flash-exp
models/gemini-2.0-flash
models/gemini-2.0-flash-001
models/gemini-2.0-flash-exp-image-generation
models/gemini-2.0-flash-lite-001
models/gemini-2.0-flash-lite
models/gemini-2.0-flash-preview-image-generation
models/gemini-2.0-flash-lite-preview-02-05
models/gemini-2.0-flash-lite-preview
models/gemini-2.0-pro-exp
models/gemini-2.0-pro-exp-02-05
models/gemini-exp-1206
models/gemini-2.0-flash-thinking-exp-01-21
models/gemini-2.0-flash-thinking-exp
models/gemini-2.0-flash-thinking-exp-1219
models/gemini-2.5-flash-preview-tts
models/gemini-2.5-pro-preview-tts
models/learnlm-2.0-flash-experimental
models/gemma-3-1b-it
models/gemma-3-4b-it
models/gemma-3-12b-it
models/gemma-3-27b-it
models/gemma-3n-e4b-it
mo

In [7]:
query_gemini("Paraphrase this sentence without changing its meaning: He finna go to the store.")

E0000 00:00:1759274932.056034  222222 alts_credentials.cc:93] ALTS creds ignored. Not running on GCP and untrusted ALTS is not enabled.


'Here are several ways to paraphrase "He finna go to the store" without changing its meaning, ranging from informal to more formal:\n\n1.  **He is going to go to the store.** (Most common and direct)\n2.  **He is about to go to the store.** (Emphasizes immediacy)\n3.  **He intends to go to the store.**\n4.  **He plans to go to the store.**\n5.  **He will go to the store.**\n6.  **He\'s preparing to go to the store.**\n7.  **He\'s fixing to go to the store.** (Still informal, but clarifies the "finna")'

In [8]:
from tqdm import tqdm
import pandas as pd

stimuli = pd.read_csv("../data/raw/stimuli.csv")

responses = []

for i, row in tqdm(stimuli.iterrows(), total=len(stimuli)):
    prompt = f"Paraphrase or continue this text in the same dialectal style: {row.text}"
    output = query_gemini(prompt)
    responses.append({
        "id": row.id,
        "variety": row.variety,
        "task": row.task,
        "input_text": row.text,
        "output_text": output
    })

df = pd.DataFrame(responses)
df.to_csv("../data/raw/gemini_responses.csv", index=False)
print("✅ Saved:", os.path.abspath("../data/raw/gemini_responses.csv"))
df.head()


100%|██████████| 12/12 [02:06<00:00, 10.57s/it]

✅ Saved: /Users/jase/data/raw/gemini_responses.csv





Unnamed: 0,id,variety,task,input_text,output_text
0,aave_01,AAVE,paraphrase,He finna go to the store. You sliding?,Here are a few ways to paraphrase or continue ...
1,aave_02,AAVE,explain,Ion think that plan gon’ work.,Here are a few ways to paraphrase or continue ...
2,aave_03,AAVE,continue,We was tryna finish that yesterday,"Okay, here are a few ways to paraphrase or con..."
3,span_01,Spanglish,paraphrase,"Vamos later, it’s muy close to la tienda.",This is a great example of Spanglish! Here are...
4,span_02,Spanglish,explain,"No entiendo bien, pero I think it’s fine.","Here are a few options, both paraphrasing and ..."


In [9]:
# Test Anthropic Claude adapter with debug agent
import sys
import os

# Add src to path for imports
current_dir = os.getcwd()
if current_dir.endswith('notebooks'):
    src_path = os.path.join('..', 'src')
else:
    src_path = 'src'

if src_path not in sys.path:
    sys.path.insert(0, src_path)

# Import debug agent and test
from debug_agent import AnthropicDebugAgent

print("🧪 Running Claude smoke test with debug agent...")
debug_agent = AnthropicDebugAgent()

# Set up environment
if debug_agent.setup_environment():
    print("✅ Environment setup successful")
    
    # Import query function
    from adapters.anthropic_adapter import query_claude
    
    # Smoke test with 2 rows
    stimuli_small = stimuli.head(2)
    claude_responses = []

    for i, row in tqdm(stimuli_small.iterrows(), total=len(stimuli_small)):
        prompt = f"Paraphrase or continue this text in the same dialectal style: {row.text}"
        try:
            output = query_claude(prompt)
            claude_responses.append({
                "id": row.id,
                "variety": row.variety,
                "task": row.task,
                "input_text": row.text,
                "output_text": output
            })
            print(f"✅ {row.id}: {output[:50]}...")
        except Exception as e:
            print(f"❌ {row.id}: {e}")
            claude_responses.append({
                "id": row.id,
                "variety": row.variety,
                "task": row.task,
                "input_text": row.text,
                "output_text": f"ERROR: {e}"
            })

    claude_df = pd.DataFrame(claude_responses)
    claude_df.to_csv("../data/raw/claude_smoke.csv", index=False)
    print("✅ Saved Claude smoke test:", os.path.abspath("../data/raw/claude_smoke.csv"))
    claude_df
else:
    print("❌ Environment setup failed. Check API key and configuration.")


ModuleNotFoundError: No module named 'debug_agent'