In [1]:
!pip install torch transformers datasets numpy scikit-learn matplotlib pyyaml huggingface-cli



In [2]:
!pip install accelerate bitsandbytes hf-transfer



In [3]:
import os
import sys
import sqlite3
import numpy as np
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import yaml
from datasets import load_dataset
from tqdm import tqdm
import accelerate
import bitsandbytes as bnb
import json
import multiprocessing
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
# Setup
DATABASE_PATH = './activations.db'
MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2"
DATASET_NAME = "Doctor-Shotgun/capybara-sharegpt"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize database
def setup_database():
    conn = sqlite3.connect(DATABASE_PATH)
    c = conn.cursor()
    c.execute('''CREATE TABLE IF NOT EXISTS activations (layer TEXT, neuron INTEGER, activation REAL)''')
    conn.commit()
    conn.close()

# Record activations into database
def record_activations_to_db(activations):
    conn = sqlite3.connect(DATABASE_PATH)
    c = conn.cursor()
    c.executemany("INSERT INTO activations (layer, neuron, activation) VALUES (?, ?, ?)", activations)
    conn.commit()
    conn.close()

# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, quantization_config=bnb_config, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model.eval()

def process_dataset_and_record_activations(dataset):
    activations = []
    total_examples = len(dataset["train"])
    progress_bar = tqdm(total=total_examples, desc="Processing dataset", unit="example")
    for example in dataset["train"]:
        conversation = example["conversations"]
        text = " ".join(turn["value"] for turn in conversation)
        inputs = tokenizer(text, return_tensors="pt", truncation=True)
        with torch.no_grad():
            outputs = model(**inputs, output_hidden_states=True)
        hidden_states = outputs.hidden_states
        for layer_idx, layer_activations in enumerate(hidden_states):
            for neuron_idx in range(layer_activations.size(2)):
                activation = layer_activations[0, 0, neuron_idx].item()
                activations.append((f"layer_{layer_idx}", neuron_idx, activation))
        if len(activations) >= 10000:
            record_activations_to_db(activations)
            activations = []
        progress_bar.update(1)
    if activations:
        record_activations_to_db(activations)
    progress_bar.close()

# Analyze activations
import torch
import multiprocessing
from tqdm import tqdm
import sqlite3
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def analyze_layer(layer, batch_size, num_bins, device):
    conn = None
    try:
        conn = sqlite3.connect(DATABASE_PATH, timeout=60)  # Set a timeout of 60 seconds
        layer_activations = []
        c = conn.cursor()

        c.execute("SELECT COUNT(*) FROM activations WHERE layer = ?", (layer,))
        total_rows = c.fetchone()[0]
        num_batches = (total_rows + batch_size - 1) // batch_size

        for batch in tqdm(range(num_batches), desc=f"Processing layer {layer}", unit="batch"):
            offset = batch * batch_size
            c.execute("SELECT activation FROM activations WHERE layer = ? LIMIT ? OFFSET ?",
                      (layer, batch_size, offset))
            batch_activations = [row[0] for row in c.fetchall()]
            layer_activations.extend(batch_activations)

        activations_tensor = torch.tensor(layer_activations, dtype=torch.float32).to(device)
        abs_activations = torch.abs(activations_tensor)
        mean_abs_activation = torch.mean(abs_activations)
        std_abs_activation = torch.std(abs_activations)

        threshold = mean_abs_activation + 2 * std_abs_activation

        hist = torch.histc(abs_activations, bins=num_bins, min=0, max=abs_activations.max())
        cum_hist = torch.cumsum(hist, dim=0) / abs_activations.numel()
        proportion_threshold = (cum_hist >= 0.95).nonzero(as_tuple=True)[0][0].item() / num_bins

        normalized_activations = (abs_activations - mean_abs_activation) / std_abs_activation
        high_activations = normalized_activations[normalized_activations >= proportion_threshold]
        proportion = high_activations.numel() / normalized_activations.numel()

        if proportion >= 0.05:
            return int(layer.split('_')[1])
        else:
            return None
    except Exception as e:
        logger.exception(f"Error processing layer {layer}: {str(e)}")
        return None
    finally:
        if conn:
            conn.close()

def analyze_activations(chunk_size=100000, num_bins=100, num_processes=35):
    conn = sqlite3.connect(DATABASE_PATH)
    c = conn.cursor()
    c.execute("SELECT DISTINCT layer FROM activations ORDER BY layer")
    layers = [row[0] for row in c.fetchall()]
    conn.close()

    beneficial_layers = []

    with multiprocessing.get_context('spawn').Pool(processes=num_processes) as pool:
        results = list(tqdm(
            pool.starmap(analyze_layer, [(layer, chunk_size, num_bins, torch.device("cuda" if torch.cuda.is_available() else "cpu")) for layer in layers]),
            total=len(layers),
            desc="Analyzing layers"
        ))

    beneficial_layers = [layer for layer in results if layer is not None]
    return sorted(set(beneficial_layers))

# Generate configuration based on analysis
def generate_config(beneficial_layers):
    config = {
        "dtype": "bfloat16",
        "merge_method": "passthrough",
        "slices": []
    }

    # Ensure step is appropriate for the expected batching of layer ranges.
    step = 4
    for i in range(0, len(beneficial_layers), step):
        slice_layers = beneficial_layers[i:i + step]  # Correctly slice the beneficial_layers list

        if slice_layers:  # Check if the slice contains any layers
            # Assuming MODEL_NAME is defined elsewhere and accessible.
            slice_config = {
                "sources": [{
                    "model": MODEL_NAME,
                    # Adjusted to use the actual layer numbers from slice_layers
                    "layer_range": [slice_layers[0], slice_layers[-1]]
                }]
            }
            config["slices"].append(slice_config)

    return config

# Main execution flow
def main():
    print("Setting up database...")
    setup_database()

    print("Loading dataset...")
    dataset = load_dataset(DATASET_NAME)

    print("Processing dataset and recording activations...")
    process_dataset_and_record_activations(dataset)

    print("Analyzing activations...")
    beneficial_layers = analyze_activations()
    print(beneficial_layers)
    
    print("Generating configuration...")
    config = generate_config(beneficial_layers)

    with open('model_config.yaml', 'w') as f:
        yaml.dump(config, f, default_flow_style=False)
    print("Configuration generated and saved to model_config.yaml.")

if __name__ == "__main__":
    main()

  from .autonotebook import tqdm as notebook_tqdm
Downloading shards:   0%|          | 0/3 [00:29<?, ?it/s]


KeyboardInterrupt: 