<a href="https://colab.research.google.com/github/Reusezer/Neuron-activations/blob/main/deepseek_ai_DeepSeek_R1_Distill_Llama_8B_weight.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install required packages
!pip install transformers accelerate --quiet

import os
import torch
import pandas as pd
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
from accelerate import init_empty_weights, infer_auto_device_map

# -----------------------
# Step 1: Set model name and prepare environment
# -----------------------
model_name = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
offload_dir = "offload"
os.makedirs(offload_dir, exist_ok=True)

# -----------------------
# Step 2: Load config to inspect structure
# -----------------------
config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)

# -----------------------
# Step 3: Infer device map with empty model
# -----------------------
with init_empty_weights():
    dummy_model = AutoModelForCausalLM.from_config(config, trust_remote_code=True)

device_map = infer_auto_device_map(
    dummy_model,
    max_memory={"cpu": "32GiB"},
    no_split_module_classes=["LlamaDecoderLayer"]  # Use class from DeepSeek architecture
)
del dummy_model

# -----------------------
# Step 4: Load model with inferred device map
# -----------------------
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    config=config,
    device_map=device_map,
    offload_folder=offload_dir,
    torch_dtype=torch.float16,
    trust_remote_code=True
)

# -----------------------
# Step 5: Load tokenizer
# -----------------------
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

# -----------------------
# Step 6: Load CSV file
# -----------------------
df = pd.read_csv("/content/deepseek-ai:DeepSeek-R1-Distill-Llama-8B.csv")

weight_vectors = []

for idx, row in df.iterrows():
    layer = int(row['layer'])
    neuron_idx = int(row['neuron_index'])

    try:
        # Access and retrieve neuron weight vector
        weight_vec = model.model.layers[layer].mlp.down_proj.weight[:, neuron_idx].detach().cpu().tolist()
        weight_vectors.append(weight_vec)
    except Exception as e:
        print(f"❗ Error at row {idx} (layer {layer}, neuron {neuron_idx}): {e}")
        weight_vectors.append([None]*model.model.layers[layer].mlp.down_proj.weight.shape[0])  # Placeholder

# -----------------------
# Step 7: Save results
# -----------------------
if weight_vectors and weight_vectors[0][0] is not None:
    weight_dim = len(weight_vectors[0])
    column_labels = [f"neuron_weight_{i}" for i in range(weight_dim)]
    weight_df = pd.DataFrame(weight_vectors, columns=column_labels)
    df_combined = pd.concat([df, weight_df], axis=1)
    df_combined.to_csv("deepseek-ai-DeepSeek-R1-Distill-Llama-8B-with-weights.csv", index=False)
    print("✅ Saved: deepseek-ai-DeepSeek-R1-Distill-Llama-8B-with-weights.csv")
else:
    print("⚠️ No valid weight vectors were extracted.")
