In [25]:
import pandas as pd 
import os 

from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset

import seaborn as sns

from datasets import load_dataset
import numpy as np

In [26]:
pwd

'/home/cn356/oliver/research/perturb-r'

In [28]:
cd /home/cn356/oliver/research/perturb-r

/home/cn356/oliver/research/perturb-r


# Utils

In [21]:
def chunking(model_response, granularity=20):
    """Split thinking into chunks of approximately equal size."""
    # Step 1: merge small chunks
    thinking = model_response.split('</think>')[0]
    chunks = thinking.split('\n\n')
    masks = [len(chunk.split()) > granularity for chunk in chunks]

    merged, buffer = [], []
    for c, m in zip(chunks, masks):
        if not m:
            buffer.append(c)
        else:
            if buffer:
                merged.append('\n\n'.join(buffer))  # Use '\n\n' to maintain paragraph structure
                buffer.clear()
            merged.append(c)
    if buffer:
        merged.append('\n\n'.join(buffer))

    # Step 2: merge small chunks to big chunks
    super_chunks, current = [], None
    for c in merged:
        if len(c.split()) > granularity:
            if current is not None:
                super_chunks.append(current)
            current = c
        else:
            if current is None:
                # no big chunk yet
                current = c
            else:
                current += '\n\n' + c  # Use '\n\n' to maintain paragraph structure

    if current is not None:
        super_chunks.append(current)

    return super_chunks

# Reasoning Results

In [5]:
root = "./results/hendrycks_math/sample200/reasoning"
os.listdir(root)

['QwQ-32B.pickle',
 'QwQ-32B_correct.pickle',
 'Qwen-R1-Distill-32B.pickle',
 'Qwen-R1-Distill-32B_correct.pickle']

In [6]:
for fname in os.listdir(root):
    if 'correct' not in fname:
        print(fname)
        df = pd.read_pickle(os.path.join(root, fname))
        print(f"Accuracy: {df['correct'].mean()}")

QwQ-32B.pickle
Accuracy: 0.97
Qwen-R1-Distill-32B.pickle
Accuracy: 0.955


In [7]:
df = pd.read_pickle(os.path.join(root, "QwQ-32B.pickle"))

In [17]:
response = df["response"][0]

In [23]:
len(chunking(response))

32

# Digit Corruption Results

In [42]:
def group_analysis (df: pd.DataFrame, cutoffs: list, col: str):
    def group(col, cutoffs = cutoffs):
        for cutoff in cutoffs:
            if col< cutoff:
                return cutoff
            else:
                return np.float("inf")
    
    df['token_count_groups'] = df[col].apply(group)
    
    return {"corrupting token counts": df['token_count_groups'].value_counts(), 
            "accuracy": df.groupby('token_count_groups')[col].mean()}

### Corrupt answer digit backwards

In [172]:
rl_df = pd.read_pickle("/share/goyal/lio/reasoning/eval/hendrycks_math/sample200/digit_corruption/Qwen2.5-Math-1.5B-DrGRPO-Zero_type=answer_digit_continuethinking=False.pickle")

In [173]:
corruption_group_analysis(rl_df, 'percentile', 'still_correct')

{'corruption percentage': pctl_group
 0.50    31
 1.00    27
 0.25    21
 0.75    19
 0.10    17
 Name: count, dtype: int64,
 'accuracy': pctl_group
 0.10    0.882353
 0.25    0.857143
 0.50    0.741935
 0.75    0.684211
 1.00    0.740741
 Name: still_correct, dtype: float64}

In [149]:
print(rl_df['post_corruption_response'][6])

 I started by recalling the definition of a complex number, which is \( z = x + yi \), where \( x \) and \( y \) are real numbers, and \( i \) is the imaginary unit. The conjugate of \( z \), denoted \( \bar{z} \), is \( x - yi \). 

I then substituted \( z = x + yi \) and \( \bar{z} = x - yi \) into the given equation \( 3z + 5i\bar{z} = 1 - 8i \). Expanding both terms, I got \( 3x + 3yi + 5ix - 5y = (3x + 5y) + (3y + 5x)i \). 

Setting the real and imaginary parts equal to the corresponding parts on the right-hand side, I obtained the system of linear equations: \( 3x + 5y = 1 \) and \( 5x + 3y = -8 \). 

To solve this system, I used the method of elimination. I multiplied the first equation by 5 and the second equation by 3 to align the coefficients of \( x \), resulting in the equations \( 12x + 16y = 5 \) and \( 12x + 9y = -25 \). Subtracting the second equation from the first, I found \( 7y = 28 \), leading to \( y = 5 \). Substituting \( y = 5 \) back into the first equation, I 

In [91]:
rl_df = pd.read_pickle("/share/goyal/lio/reasoning/eval/hendrycks_math/sample200/digit_corruption/R1-Distill-Qwen-1.5B_type=answer_digit_continuethinking=False.pickle")

In [92]:
corruption_group_analysis(rl_df, 'percentile', 'still_correct')

{'corruption percentage': pctl_group
 0.50    298
 0.25    290
 1.00    153
 0.75    151
 0.10    143
 Name: count, dtype: int64,
 'accuracy': pctl_group
 0.10    0.335664
 0.25    0.082759
 0.50    0.097315
 0.75    0.079470
 1.00    0.104575
 Name: still_correct, dtype: float64}

### Corrupt all digits forwards

In [174]:
sft_df = pd.read_pickle("/share/goyal/lio/reasoning/eval/hendrycks_math/sample200/digit_corruption/Qwen2.5-Math-1.5B-DrGRPO-Zero_type=midway_continuethinking=True.pickle")

In [175]:
corruption_group_analysis(sft_df, 'percentile', 'still_correct')

{'corruption percentage': pctl_group
 1.00    137
 0.50     71
 0.75     44
 0.25     27
 0.10      2
 Name: count, dtype: int64,
 'accuracy': pctl_group
 0.10    0.500000
 0.25    0.407407
 0.50    0.183099
 0.75    0.136364
 1.00    0.138686
 Name: still_correct, dtype: float64}

In [152]:
rl_df = pd.read_pickle("/share/goyal/lio/reasoning/eval/hendrycks_math/sample200/digit_corruption/Qwen2.5-Math-1.5B-sky-math8k-r1-continue-sft64steps-rl300steps_type=midway_continuethinking=True.pickle")

In [153]:
corruption_group_analysis(rl_df, 'percentile', 'still_correct')

{'corruption percentage': pctl_group
 1.00    106
 0.50     79
 0.75     65
 0.25     43
 0.10      4
 Name: count, dtype: int64,
 'accuracy': pctl_group
 0.10    0.250000
 0.25    0.302326
 0.50    0.113924
 0.75    0.107692
 1.00    0.216981
 Name: still_correct, dtype: float64}

# Distraction

## Full distraction

In [49]:
def if_answer (response):
    try:
        math_answer = remove_boxed(last_boxed_only_string(response))
        return True
    except:
        return False
    
def group_analysis (df: pd.DataFrame, cutoffs: list, col: str, stats: str):
    cutoffs.sort()
    
    def group(col, cutoffs = cutoffs):        
        for cutoff in cutoffs:
            if col < cutoff:
                return cutoff
            else:
                return float("inf")
    
    df['token_count_groups'] = df[col].apply(group)
    
    return {"corrupting token counts": df['token_count_groups'].value_counts(), 
            "accuracy": df.groupby('token_count_groups')[stats].mean()}

In [31]:
root = './results/hendrycks_math/sample200/distract_thinking'
os.listdir(root)

['R1-Distill-Qwen-1.5B.pickle', 'R1-Distill-Qwen-7B.pickle']

In [32]:
df = pd.read_pickle(os.path.join(root, 'full_distract_qwen32b-200sample.pickle'))

print("accuracy according to original solution: \n", corruption_group_analysis(df, 'actual_fraction', 'original_correct'))
print('*' * 50, '\n\n')
print("accuracy according to distractor solution: \n", corruption_group_analysis(df, 'actual_fraction', 'distractor_correct'))

NameError: name 'if_answer' is not defined

In [48]:
df["num_distract_tokens"][0]

94

In [50]:
cutoffs = [512, 1024, 2048, 4096]
group_analysis(df, cutoffs, "num_distract_tokens", "original_correct")

TypeError: 'NoneType' object is not iterable