# CoT Rationales Explorer
Quickly browse the CoT rationales used for @exps_logistic training/testing. Set `CSV_PATH` to any input CSV (canonical or res.csv-style) and run the cells.


In [1]:
import pandas as pd
from pathlib import Path

# Point to any CSV used by exps_logistic (canonical format or res.csv from exps_performance)
CSV_PATH = Path(
    "/nlpgpu/data/terry/ToolProj/src/exps_logistic/results/bert_code_tfidf.csv"
)

# For performance, cap rows
N_ROWS = 2000



In [2]:
def load_any_csv(path: Path, n_rows: int = N_ROWS) -> pd.DataFrame:
    df = pd.read_csv(path, nrows=n_rows)
    # Normalize column names to expected ones
    colmap = {
        "answer": "answer",
        "question": "prompt",
        "rationale": "rationale",
        "nl_reasoning": "rationale",
        "code_answer": "rationale",
        "rep": "rep",
    }
    df = df.rename(columns={k: v for k, v in colmap.items() if k in df.columns})
    # best-effort rep inference
    if "rep" not in df.columns:
        if "code_answer" in colmap:
            df["rep"] = "code"
        else:
            df["rep"] = "nl"
    # drop rows with empty rationale
    df["rationale"] = df.get("rationale", "").astype(str)
    df = df[df["rationale"].str.len() > 0].reset_index(drop=True)
    return df


df = load_any_csv(CSV_PATH)
df.head()


Unnamed: 0,rationale,kind,digits,prompt,true_label,pred_label,neglogp_true_nat,rep
0,,gcp,0,,gcp|d0|bNA,ksp|d0|bNA,2.03967,code
1,60.0,gsm8k,0,"In a dance class of 20 students, 20% enrolled ...",gsm8k|d0|bNA,gsm8k|d0|bNA,2.080556,code
2,26.0,gsm8k,0,Kyle bought last year's best-selling book for ...,gsm8k|d0|bNA,gsm8k|d0|bNA,2.080556,code
3,1.0,lcs,8,Compute the length of the Longest Common Subse...,lcs|d8|b27,edp|d0|bNA,2.613136,code
4,,gcp,0,,gcp|d0|bNA,ksp|d0|bNA,2.03967,code


In [3]:
# Basic stats
print(f"Rows: {len(df):,}")
print(df.columns.tolist())
print(df.groupby('rep')['rationale'].count())

# Length distribution
lengths = df['rationale'].str.len()
print(lengths.describe())


Rows: 76
['rationale', 'kind', 'digits', 'prompt', 'true_label', 'pred_label', 'neglogp_true_nat', 'rep']
rep
code    76
Name: rationale, dtype: int64
count    76.000000
mean     11.565789
std      21.597121
min       1.000000
25%       1.000000
50%       3.000000
75%       3.500000
max      73.000000
Name: rationale, dtype: float64


In [4]:
# Peek random samples per rep/kind
kind_col = 'kind' if 'kind' in df.columns else None
for rep, df_rep in df.groupby('rep'):
    print(f"\n=== {rep} samples ===")
    sample = df_rep.sample(n=min(3, len(df_rep)), random_state=0)
    cols_show = ['rationale', 'prompt']
    if kind_col:
        cols_show.append(kind_col)
    display(sample[cols_show])



=== code samples ===


Unnamed: 0,rationale,prompt,kind
52,540.0,James decides to run 3 sprints 3 times a week....,gsm8k
57,,,spp
22,0.0,,edp


In [5]:
# Optional: filter by kind/rep and inspect
FILTER_KIND = None  # e.g., 'gsm8k'
FILTER_REP = None   # 'nl' or 'code'

view = df
if FILTER_KIND and 'kind' in df.columns:
    view = view[view['kind'] == FILTER_KIND]
if FILTER_REP:
    view = view[view['rep'] == FILTER_REP]

print(f"Showing {len(view)} rows after filters")
display(view[['rationale', 'prompt']].head(5))


Showing 76 rows after filters


Unnamed: 0,rationale,prompt
0,,
1,60.0,"In a dance class of 20 students, 20% enrolled ..."
2,26.0,Kyle bought last year's best-selling book for ...
3,1.0,Compute the length of the Longest Common Subse...
4,,


Notes:
- Set `CSV_PATH` to any @exps_logistic input or saved preds CSV.
- Handles canonical columns and res.csv-style (`nl_reasoning`, `code_answer`).
- Uses random samples per rep for quick eyeballing; tweak `N_ROWS` for larger slices.
