LLM控制面板

In [1]:
from model import *
from IPython.display import display

model_list = {
    "gpt2": "/home/cs/yangyuchen/guoyiqiu/my_models/gpt2",
    "llama_13b": "/home/cs/yangyuchen/yushengliao/Medical_LLM/llama-13b",
    "llama-2-7b-chat": "/home/cs/yangyuchen/yushengliao/Medical_LLM/llama-2-7b-chat-hugging",
    "llama-2-13b-chat": "/home/cs/yangyuchen/guoyiqiu/my_models/Llama-2-13b-chat-ms",
    "vicuna_7b": "/home/cs/yangyuchen/yushengliao/Medical_LLM/vicuna-7b/",
    "internlm-chat-7b": "/home/cs/yangyuchen/yushengliao/Medical_LLM/internlm-chat-7b",
    "internlm-chat-20b": "/home/cs/yangyuchen/guoyiqiu/my_models/internlm-chat-20b",
    "vicuna-33b-v1.3": "/home/cs/yangyuchen/guoyiqiu/my_models/models--lmsys--vicuna-33b-v1.3/snapshots/ef8d6becf883fb3ce52e3706885f761819477ab4",
}

panel = LLMPanel(model_list, chat_template=INTERNLM_TEMPLATE)
display(panel)

LLMPanel(children=(HBox(children=(Dropdown(description='Model:', options=(('gpt2', '/home/cs/yangyuchen/guoyiq…

You are attempting to use Flash Attention 2.0 with a model initialized on CPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

LAT on honesty

In [28]:
import torch
import pyecharts.options as opts
from pyecharts.charts import Bar, Line, Scatter
import json
from model import *
from sklearn.decomposition import PCA
import plotly.graph_objs as go
from plotly.subplots import make_subplots
import numpy as np
import random

random.seed(42)
torch.manual_seed(42)
torch.cuda.manual_seed(42)
    

def calcu_score(neuron_act, reading_vectors, mean_vector, std_vector):
    """
    reading_vectors: [layer, hidden_size]
    mean_vector: [layer * hidden_size]
    std_vector: [layer * hidden_size]
    """
    neuron_act = neuron_act.transpose(0,1) # [seq_len, layer, hidden_size]
    seq_len, layer, hidden_size = neuron_act.shape
    neuron_act = neuron_act.reshape(seq_len, layer * hidden_size) # [seq_len, layer * hidden_size]
    neuron_act = (neuron_act - mean_vector) / std_vector
    neuron_act = neuron_act.reshape(seq_len, layer, hidden_size) # [seq_len, layer, hidden_size]
    scores = (neuron_act * reading_vectors.repeat(seq_len, 1, 1)).sum(-1) # [seq_len, layer]
    return scores.mean()


def compute_reading_vectors(neuron_acts):
    diff = []
    for (act1,act2) in neuron_acts:
        l = min(act1.shape[1],act2.shape[1])
        act1 = act1[:,:l,:]
        act2 = act2[:,:l,:]
        diff.append(act1 - act2)
    diff = torch.cat(diff, dim=1).transpose(0,1) # [sum(seq_len), layer, hidden_size]
    sample_size,n_layer,hidden_size = diff.shape
    diff = diff.reshape(sample_size, -1).numpy() # [sample_size, layer * hidden_size]
    mean_vector = np.mean(diff, axis=0)
    std_vector = np.std(diff, axis=0)
    diff = (diff - mean_vector) / std_vector
    pca = PCA(n_components=1)
    pca.fit(diff)
    reading_vectors = pca.components_[0].reshape(n_layer, hidden_size) # [layer, hidden_size]
    # print('reading_vectors: ', reading_vectors.shape)
    reading_vectors = torch.from_numpy(reading_vectors)
    mean_vector = torch.from_numpy(mean_vector)
    std_vector = torch.from_numpy(std_vector)
    return reading_vectors, mean_vector, std_vector, pca.explained_variance_ratio_[0]


def collect_neuron_acts(mt, stimuli_dst, capture_window, local_bsz=32):
    hook_configs = [LLMHookerConfig(module_name='block', layer=l) for l in range(mt.n_layer)]
    neuron_acts = []
    data_bsz = local_bsz // 2
    stimuli_dst = [stimuli_dst[i:i+data_bsz] for i in range(0, len(stimuli_dst), data_bsz)]
    for batch_pairs in tqdm(stimuli_dst):
        pairs = []
        for pair in batch_pairs:
            pairs += pair
        prompt_lens = [len(mt.tok(s['input'])['input_ids']) for s in pairs]
        seq_lens = [len(mt.tok(s['input']+s['output'])['input_ids']) for s in pairs]
        with PaddingSide(mt.tok, 'right'):
            input_ids = mt.tok([s['input']+s['output'] for s in pairs], return_tensors='pt', padding=True)['input_ids']
        with torch.no_grad(), LLMHooker(mt, hook_configs) as hooker:
            mt.model(input_ids=input_ids.to(mt.model.device))
            sentences_repr = torch.stack([h.outputs[0] for h in hooker.hooks]).transpose(0,1) # [bsz, layer, seq_len, hidden_size]
        batch_neuron_acts = []
        for i,repr in enumerate(sentences_repr):
            prompt_len = prompt_lens[i]
            seq_len = seq_lens[i]
            start = prompt_len + capture_window[0] if capture_window[0] >= 0 else seq_len - capture_window[0]
            end = prompt_len + capture_window[1] if capture_window[1] >= 0 else seq_len - capture_window[1] + 1
            batch_neuron_acts.append(repr[:,start:end,:])
        batch_neuron_acts = [[batch_neuron_acts[i],batch_neuron_acts[i+1]] for i in range(0, len(batch_neuron_acts), 2)]
        neuron_acts.extend(batch_neuron_acts)
    return neuron_acts # [layer, window_size, hidden_size]


def full_pipeline(mt, train_dst, test_dst, capture_window, compare_window, local_bsz=64):
    print('collecting train neuron_acts')
    neuron_acts = collect_neuron_acts(mt, train_dst, capture_window=capture_window,local_bsz=local_bsz)
    print('computing reading_vectors')
    reading_vectors, mean_vector, std_vector, importance = compute_reading_vectors(neuron_acts)
    print('collecting test neuron_acts')
    test_neuron_acts = collect_neuron_acts(mt, test_dst, capture_window=compare_window,local_bsz=local_bsz)
    print('calculating scores')
    scores = [[calcu_score(tna, reading_vectors, mean_vector, std_vector),calcu_score(fna, reading_vectors, mean_vector, std_vector)] for (tna, fna) in test_neuron_acts]
    mean_diff = np.mean([s[1]-s[0] for s in scores])
    acc = sum([1 for s in scores if s[0]<s[1]])/len(scores)
    return acc, mean_diff, importance



tf_dst = json.load(open("data/true_false_dataset.json"))
random.shuffle(tf_dst)

# IID_Hard
reserved_topic = "Medical"
train_dst_iid_hard = [[td,fd] for (td,fd) in tf_dst if td['topic']!= "Medical"]
test_dst_iid_hard = [[td,fd] for (td,fd) in tf_dst if td['topic']== "Medical"]
# IID_Weak
train_dst_iid_weak = tf_dst[:len(train_dst_iid_hard)]
test_dst_iid_weak = tf_dst[len(train_dst_iid_hard):]
# OOD
reserved_topic = "Medical"
prompt = "USER:Tell me a fact.\nAssistant:"
train_dst_ood = [[td,fd] for (td,fd) in tf_dst if td['topic']!= "Medical"]
test_dst_ood = [[dict(input=prompt,output=td['output'],topic=td['topic'],label=True),
                 dict(input=prompt,output=fd['output'],topic=fd['topic'],label=False)] 
                for (td,fd) in tf_dst if td['topic']== "Medical"]

config = {
    "mt": panel.mt,
    "train_dst": train_dst_iid_hard,
    "test_dst": test_dst_iid_hard,
    "capture_window": (0,-1),
    "compare_window": (0,-1),
    "local_bsz": 64,
}

acc, mean_diff, importance = full_pipeline(**config)
print(f"importance: {importance:.4f}\nacc: {acc:.4f},\nmean_diff: {mean_diff:.4f}")

collecting train neuron_acts


  0%|          | 0/17 [00:00<?, ?it/s]

computing reading_vectors
collecting test neuron_acts


  0%|          | 0/4 [00:00<?, ?it/s]

calculating scores
importance: 0.0322
acc: 0.0408,
mean_diff: -0.3358
