In [4]:
import json
import os

# Define the base directory for the problem files
problem_dir = "math-rollouts/deepseek-r1-distill-llama-8b/temperature_0.6_top_p_0.95/correct_base_solution/problem_330"

# The directory containing the solution file
# chunk_dir = "chunk_0"
# The name of the solution file
chunk_filename = "problem.json"

# Construct the full path to the solutions.json file
chunk_path = os.path.join(problem_dir, chunk_filename)

try:
    with open(chunk_path, 'r') as f:
        # Load the JSON content from the file
        chunk_data = json.load(f)

    print(f"Content of {chunk_filename}:")
    # Pretty-print the JSON object for better readability
    print(json.dumps(chunk_data, indent=2))

except FileNotFoundError:
    print(f"Error: The file {chunk_path} was not found.")
except json.JSONDecodeError:
    print(f"Error: The file {chunk_path} is not a valid JSON file.")

Content of problem.json:
{
  "problem": "Compute\n\n$3(1+3(1+3(1+3(1+3(1+3(1+3(1+3(1+3(1+3)))))))))$",
  "level": "Level 5",
  "type": "Algebra",
  "gt_solution": "Not to be tricked by the excess of parentheses, we rewrite the expression as a geometric series: \\[3+3^2+3^3+\\cdots +3^9 +3^{10}.\\]Now the sum can be computed as $\\frac{3^{11}-3}{3-1}=\\boxed{88572}.$",
  "gt_answer": "88572",
  "nickname": "Nested Multiplication"
}


In [5]:
problem = chunk_data["problem"]

In [6]:
import json
import os

# Define the base directory for the problem files
problem_dir = "math-rollouts/deepseek-r1-distill-llama-8b/temperature_0.6_top_p_0.95/correct_base_solution/problem_330"

# The directory containing the solution file
# chunk_dir = "chunk_0"
# The name of the solution file
chunk_filename = "chunks_labeled.json"

# Construct the full path to the solutions.json file
chunk_path = os.path.join(problem_dir, chunk_filename)

try:
    with open(chunk_path, 'r') as f:
        # Load the JSON content from the file
        chunk_data = json.load(f)

    print(f"Content of {chunk_filename}:")
    # Pretty-print the JSON object for better readability
    print(json.dumps(chunk_data, indent=2))

except FileNotFoundError:
    print(f"Error: The file {chunk_path} was not found.")
except json.JSONDecodeError:
    print(f"Error: The file {chunk_path} is not a valid JSON file.")

Content of chunks_labeled.json:
[
  {
    "chunk": "Alright, so I've got this math problem here that I need to solve.",
    "chunk_idx": 0,
    "function_tags": [
      "problem_setup"
    ],
    "depends_on": [],
    "accuracy": 0.21428571428571427,
    "resampling_importance_accuracy": 0.3157142857142857,
    "resampling_importance_kl": 9.539380788051231,
    "counterfactual_importance_accuracy": -0.3871428571428572,
    "counterfactual_importance_kl": 10.809324976891892,
    "forced_importance_accuracy": 0.0,
    "forced_importance_kl": 14.21827053440187,
    "different_trajectories_fraction": 0.8571428571428571,
    "overdeterminedness": 0.18367346938775508,
    "summary": "solve math problem"
  },
  {
    "chunk": "It's a nested expression with a bunch of threes and parentheses.",
    "chunk_idx": 1,
    "function_tags": [
      "problem_setup"
    ],
    "depends_on": [],
    "accuracy": 0.53,
    "resampling_importance_accuracy": -0.030000000000000027,
    "resampling_importance

In [5]:
sentence = chunk_data[0]["chunk"]

In [7]:
allsentences = [ chunk["chunk"] for chunk in chunk_data]

In [8]:
problem

'Compute\n\n$3(1+3(1+3(1+3(1+3(1+3(1+3(1+3(1+3(1+3)))))))))$'

In [9]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, BitsAndBytesConfig, AutoModelForCausalLM, pipeline

import torch


model_name = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B" # Or any other suitable model

mname = model_name

tokenizer = AutoTokenizer.from_pretrained(model_name)

# Important: Add a pad token if the tokenizer doesn't have one, especially for decoder models.

if tokenizer.pad_token is None:

    tokenizer.add_special_tokens({'pad_token': tokenizer.eos_token})

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")

The following generation flags are not valid and may be ignored: ['output_attentions']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['output_attentions']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00,  1.14it/s]


In [8]:
#want do a loop through all the chunks append it to the current text set it through multiple rollouts and measure counterfactual importance

In [9]:
import torch
import torch.nn.functional as F
import numpy as np

In [15]:
def cosine_similarity(a, b):
    """
    a: shape (n_samples, n_features)
    b: shape (1, n_features) or (n_features,)
    Returns: shape (n_samples,)
    """
    a_norm = np.linalg.norm(a, axis=1, keepdims=True)
    b_norm = np.linalg.norm(b)
    sim = np.dot(a, b.reshape(-1)) / (a_norm.flatten() * b_norm + 1e-8)
    return sim

In [16]:
# Helper: get model output probabilities
def get_probs(model, inputs):
    with torch.no_grad():
        logits = model(**inputs).logits
        return torch.softmax(logits, dim=-1).cpu().numpy().squeeze()

In [18]:
print(model.device)

cpu


In [17]:
# Assume allsentences is a list of chunks/sentences
num_rollouts = 100
cos_sim_threshold = 0.8

results = []

for i, chunk in enumerate(allsentences):
    print(f"Processing chunk {i+1}/{len(allsentences)}")

    # 1. Build the context WITH the chunk (original)
    context_with = problem + " " + " ".join(allsentences[:i] + [chunk] + allsentences[i+1:])
    inputs_with = tokenizer(context_with, return_tensors="pt")
    probs_with = get_probs(model, inputs_with)

    
    # 2. Build the context WITHOUT the chunk (counterfactual)
    context_without = problem + " " + " ".join(allsentences[:i] + allsentences[i+1:])

    # 3. Generate rollouts (could randomize, but here just repeat)
    rollout_probs = []
    for _ in range(num_rollouts):
        # If you want to randomize, do it here (e.g., shuffle, sample, etc.)
        inputs_without = tokenizer(context_without, return_tensors="pt")
        rollout_probs.append(get_probs(model, inputs_without))
    rollout_probs = np.stack(rollout_probs)  # shape: (num_rollouts, vocab_size)

    # 4. Cosine similarity to original
    cos_sims = cosine_similarity(rollout_probs, probs_with.reshape(1, -1)).flatten()
    similar_mask = cos_sims > cos_sim_threshold
    not_similar_mask = ~similar_mask

    # 5. Average probabilities for each group
    if not_similar_mask.any():
        probs_not_similar = rollout_probs[not_similar_mask].mean(axis=0)
        # KL divergence (counterfactual importance)
        kl = F.kl_div(torch.log(torch.tensor(probs_not_similar + 1e-8)), torch.tensor(probs_with + 1e-8), reduction='batchmean').item()
    else:
        kl = None

    # (Optional) Accuracy change
    pred_with = probs_with.argmax()
    pred_rollouts = rollout_probs.argmax(axis=1)
    accuracy_change = float(np.mean(pred_rollouts != pred_with))

    results.append({
        "chunk_index": i,
        "chunk_text": chunk,
        "counterfactual_importance_kl": kl,
        "accuracy_change": accuracy_change,
        "num_not_similar": int(not_similar_mask.sum()),
        "num_similar": int(similar_mask.sum())
    })

# Print or save results
for r in results:
    print(r)

Processing chunk 1/170


KeyboardInterrupt: 

In [None]:
import json

# Save results to a JSON file
with open("counterfactual_importance_results_control.json", "w") as f:
    json.dump(results, f, indent=2)
print("Results saved to counterfactual_importance_results_control.json")