<a href="https://colab.research.google.com/github/Reusezer/Neuron-activations/blob/main/weight_Qwen8B.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install transformers accelerate --quiet

import os
import torch
import pandas as pd
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
from accelerate import init_empty_weights, infer_auto_device_map # load_checkpoint_in_model is not needed with the new approach

# Set model name
model_name = "Qwen/Qwen3-8B"
offload_dir = "offload"
os.makedirs(offload_dir, exist_ok=True)

# Step 1: Load config (still useful for understanding the model structure if needed)
config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)

# Infer device map that offloads everything
# You can infer the device map first to use it in from_pretrained
# Init an empty model to infer the device map correctly for the architecture
with init_empty_weights():
     dummy_model = AutoModelForCausalLM.from_config(config, trust_remote_code=True)

device_map = infer_auto_device_map(
    dummy_model, # Use the dummy model for inferring the map
    max_memory={"cpu": "32GiB"},
    no_split_module_classes=["QWenBlock"]
)
del dummy_model # Delete the dummy model to free memory

# Step 2: Load the model with weights, device map, and offloading
# This single step replaces steps 2, 3, and 4 from the original code
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    config=config, # Pass the config object
    device_map=device_map, # Apply the inferred device map
    offload_folder=offload_dir, # Specify the offload directory
    torch_dtype=torch.float16, # Specify the data type
    trust_remote_code=True # Trust remote code
)

# Step 3: Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)


# %%
# Load your CSV
df = pd.read_csv("Qwen8B-Crows-bias.csv")

weight_vectors = []

for idx, row in df.iterrows():
    layer = int(row['layer'])
    neuron_idx = int(row['neuron_index'])

    # Removed explicit .to("cpu") - Accelerate manages loading to CPU based on access
    # model.model.layers[layer].to("cpu")

    # Access the neuron weight vector
    # Accessing the weight tensor triggers Accelerate to load this part of the layer to CPU
    # if it's not already there.
    # Ensure the tensor is on CPU before detaching and converting
    weight_vec = model.model.layers[layer].mlp.down_proj.weight[:, neuron_idx].detach().cpu().tolist()
    weight_vectors.append(weight_vec)

    # Removed explicit .to("meta") - Accelerate manages unloading/offloading automatically
    # model.model.layers[layer].to("meta")

# Append weight vectors to DataFrame
# Check if weight_vectors is empty to avoid errors
if weight_vectors:
    weight_dim = len(weight_vectors[0])
    column_labels = [f"neuron_weight_{i}" for i in range(weight_dim)]
    weight_df = pd.DataFrame(weight_vectors, columns=column_labels)
    df_combined = pd.concat([df, weight_df], axis=1)
    df_combined.to_csv("Qwen3B-Crows-with-weights.csv", index=False)

    print("✅ Saved: Qwen3B-Crows-with-weights.csv")
else:
    print("⚠️ No weight vectors were extracted. Check your CSV and loop logic.")