In [82]:
# 设置环境变量
import os
import sys
sys.path.append('D:\ComputerScience\Research\PRADA\sparse_autoencoder')
# os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
# 导入库
import torch
import blobfile as bf
from experiments.utils import *
import pandas as pd
import matplotlib.pyplot as plt
from transformers import GPT2LMHeadModel, AutoTokenizer, GPT2Tokenizer, GPT2Config, set_seed, GPT2Model

In [11]:
# 下载AutoEncoder
position = "resid_post_mlp"
layer_index = 6
download_autoencoder(position, layer_index=layer_index, size=32)
download_autoencoder(position, layer_index=layer_index, size=128)

Downloading SAE from: az://openaipublic/sparse-autoencoder/gpt2-small/resid_post_mlp_v5_32k/autoencoders/6.pt
State dictionary saved to model/gpt2_sae/sae_state_32k_layer_6.pt
Downloading SAE from: az://openaipublic/sparse-autoencoder/gpt2-small/resid_post_mlp_v5_128k/autoencoders/6.pt
State dictionary saved to model/gpt2_sae/sae_state_128k_layer_6.pt


In [141]:
def feature_steering(autoencoder,x: torch.Tensor, feature_indices: list[int], feature_values: list[float]) -> torch.Tensor:
    assert len(feature_indices) == len(feature_values), "Feature indices and values must have the same length."
    # feature_values = [max(min(value, 10), -10) for value in feature_values]

    with torch.no_grad():
        # 获取原始特征表示和信息
        latents, info = autoencoder.encode(x)
        # 修改特征表示
        for index, value in zip(feature_indices, feature_values):
            print("original:", latents[:, index])
            if value > 0:
                latents[:, index] *= value
            else:
                latents[:, index] = latents[:, index] / abs(value)
            print("Modified:", latents[:, index])
            print(f"Feature {index} modified with {'+' if value >= 0 else ''}{value}")
        # 使用修改后的特征表示通过解码器生成重构输出
        modified_output = autoencoder.decode(latents, info)
    return modified_output

def calculate_error(input_tensor, reconstructed_activations) -> torch.Tensor:
    # 计算误差
    error = input_tensor - reconstructed_activations
    # 可以选择使用不同的误差度量方式，这里使用均方误差（MSE）
    normalized_mse = (reconstructed_activations - input_tensor).pow(2).sum(dim=1) / (input_tensor).pow(2).sum(dim=1)
    return normalized_mse, error


def compare_activations(tensor1, tensor2):
    difference = tensor1 - tensor2
    print("Difference between tensors:\n", difference)

    # 计算差异的统计信息
    mean_diff = torch.mean(difference)
    std_diff = torch.std(difference)
    print(f"Mean difference: {mean_diff.item()}")
    print(f"Standard deviation of difference: {std_diff.item()}")

    # 可视化差异
    difference_np = difference.numpy()
    plt.figure(figsize=(20, 5))
    plt.imshow(difference_np, cmap='coolwarm', aspect='auto')
    plt.colorbar(label='Difference')
    plt.title('Difference between Reconstructed Activations and Modified Output')
    plt.xlabel('Feature Index')
    plt.ylabel('Sample Index')
    plt.show()

def chat_with_gpt2_logits(model, tokens_id, tokenizer):
    model.eval()
    
    with torch.no_grad():
        outputs = model(tokens_id)
        logits = outputs.logits
        # 使用torch.argmax选出概率最高的token ids
        predicted_ids = torch.argmax(logits, dim=-1)
        # 解码生成的token ids
        response = tokenizer.decode(predicted_ids[0], skip_special_tokens=True)
        
    return response, logits

def chat_with_gpt2_top_k_candidates(model, tokens_id, tokenizer, top_k=10):
    model.eval()
    
    with torch.no_grad():
        outputs = model(tokens_id)
        logits = outputs.logits
        
        # 选择每个时间步上概率最高的top_k个token的logits
        top_k_logits, top_k_indices = torch.topk(logits, k=top_k, dim=-1)
        
        # 解码每个token的索引以获取token字符串
        top_k_tokens = [
            [tokenizer.decode([idx]) for idx in indices[0]] for indices in top_k_indices
        ]

    for step in range(logits.shape[1]):  # 限制打印至最多前10个token
            print(f"Step {step + 1}:")
            for i in range(top_k):
                token = tokenizer.decode([top_k_indices[0, step, i]])
                logit = top_k_logits[0, step, i].item()
                print(f"  Candidate {i + 1}: {token} (Logit: {logit})")
            print("\n")
        
    return top_k_tokens, top_k_logits

In [15]:
model, auto_tokenizer, device = load_model_hf("gpt2")
layer_index = 6
autoencoder = load_autoencoder_from_local(layer_index, device, 128)
set_seed(123)

# Activation Reconstruction

In [1]:
"""
prompt = "Are you introverted?"
feature_indices = [53912]
feature_values = [10] 
tokens_id, tokens_str, activation_cache = process_input_hf(model, auto_tokenizer, prompt)
print("Tokens ID (AutoTokenizer):", tokens_id)
print("Tokens String (AutoTokenizer):", tokens_str)
print(len(activation_cache))
activation = get_activation_hf(activation_cache, layer_index)
print(f"resid_post_mlp for layer {layer_index}:", activation.shape if activation is not None else "None")
print(activation)

latent_activations, recon_activations = encode_decode(autoencoder, activation)
mse_error, error = calculate_error(activation, recon_activations)

modified_recon_activations = feature_steering(autoencoder, activation, feature_indices, feature_values)
print("orginal modified_recon_activations:", modified_recon_activations)
print(modified_recon_activations.shape)

modified_recon_activations_new = modified_recon_activations + error
print("modified_recon_activations + error:", modified_recon_activations_new)

mse_error_after, error_after = calculate_error(activation, modified_recon_activations_new)
print(error_after)
print(mse_error_after)
"""

'\nprompt = "Are you introverted?"\nfeature_indices = [53912]\nfeature_values = [10] \ntokens_id, tokens_str, activation_cache = process_input_hf(model, auto_tokenizer, prompt)\nprint("Tokens ID (AutoTokenizer):", tokens_id)\nprint("Tokens String (AutoTokenizer):", tokens_str)\nprint(len(activation_cache))\nactivation = get_activation_hf(activation_cache, layer_index)\nprint(f"resid_post_mlp for layer {layer_index}:", activation.shape if activation is not None else "None")\nprint(activation)\n\nlatent_activations, recon_activations = encode_decode(autoencoder, activation)\nmse_error, error = calculate_error(activation, recon_activations)\n\nmodified_recon_activations = feature_steering(autoencoder, activation, feature_indices, feature_values)\nprint("orginal modified_recon_activations:", modified_recon_activations)\nprint(modified_recon_activations.shape)\n\nmodified_recon_activations_new = modified_recon_activations + error\nprint("modified_recon_activations + error:", modified_recon_

In [16]:
# 提取原始第六层
prompt = "Are you introverted?"
tokens_id, tokens_str, activation_cache = process_input_hf(model, auto_tokenizer, prompt)
print("Tokens ID (AutoTokenizer):", tokens_id)
print("Tokens String (AutoTokenizer):", tokens_str)
print(len(activation_cache))
activation = get_activation_hf(activation_cache, layer_index)
print(f"resid_post_mlp for layer {layer_index}:", activation.shape if activation is not None else "None")
print(activation)
modified_recon_activations_new = activation
mse_error_after, error_after = calculate_error(activation, modified_recon_activations_new)
print(error_after)
print(mse_error_after)

input_id tensor([[ 8491,   345, 18951, 13658,    30]])
Tokens ID (AutoTokenizer): tensor([[ 8491,   345, 18951, 13658,    30]])
Tokens String (AutoTokenizer): ['Are', 'Ġyou', 'Ġintro', 'verted', '?']
13
resid_post_mlp for layer 6: torch.Size([5, 768])
tensor([[ 0.9184,  0.1396,  0.4812,  ..., -1.7562, -0.2046,  0.3966],
        [-2.3731,  0.7637, -1.3836,  ..., -1.3632, -2.3260,  0.6030],
        [ 2.1773, -3.8999, -2.5548,  ..., -4.0262,  0.6047, -0.9181],
        [ 3.8940,  0.6625, -2.7102,  ..., -0.2636,  4.2834,  0.9351],
        [ 0.0863,  0.7447, -0.9693,  ..., -0.6241, -1.3438,  3.7102]])
tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])
tensor([0., 0., 0., 0., 0.])


### Original Output

In [17]:
def chat_with_gpt2(model, tokens_id):
    with torch.no_grad():
        outputs = model.generate(tokens_id, max_length=20, pad_token_id=auto_tokenizer.eos_token_id)
    response = auto_tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

response = chat_with_gpt2(model, tokens_id)
response_l, logits = chat_with_gpt2_logits(model, tokens_id, auto_tokenizer)
chat_with_gpt2_top_k_candidates(model, tokens_id, auto_tokenizer)
print("Original Output:", response)
print("logits type Output:", response_l)

input_id tensor([[ 8491,   345, 18951, 13658,    30]])
input_id tensor([[198]])
input_id tensor([[198]])
input_id tensor([[40]])
input_id tensor([[1101]])
input_id tensor([[407]])
input_id tensor([[13]])
input_id tensor([[314]])
input_id tensor([[1101]])
input_id tensor([[407]])
input_id tensor([[257]])
input_id tensor([[1263]])
input_id tensor([[18951]])
input_id tensor([[1851]])
input_id tensor([[13]])
input_id tensor([[ 8491,   345, 18951, 13658,    30]])
input_id tensor([[ 8491,   345, 18951, 13658,    30]])
Step 1:
  Candidate 1:  the (Logit: -29.919225692749023)
  Candidate 2:  a (Logit: -30.52708625793457)
  Candidate 3:  to (Logit: -30.872114181518555)
  Candidate 4: , (Logit: -31.00540542602539)
  Candidate 5: 
 (Logit: -31.133556365966797)
  Candidate 6:  you (Logit: -31.3023681640625)
  Candidate 7: . (Logit: -31.325265884399414)
  Candidate 8:  in (Logit: -31.418594360351562)
  Candidate 9:  that (Logit: -31.46228790283203)
  Candidate 10:  it (Logit: -31.574438095092773)



### Controlled Output

In [28]:
mse_error_after, error_after = calculate_error(activation, modified_recon_activations_new)
print(mse_error_after)
modified_activations = modified_recon_activations_new.unsqueeze(0)
print(modified_activations.shape)
print(modified_activations)
print(tokens_id)
print(tokens_str)

tensor([0., 0., 0., 0., 0.])
torch.Size([1, 5, 768])
tensor([[[ 0.9184,  0.1396,  0.4812,  ..., -1.7562, -0.2046,  0.3966],
         [-2.3731,  0.7637, -1.3836,  ..., -1.3632, -2.3260,  0.6030],
         [ 2.1773, -3.8999, -2.5548,  ..., -4.0262,  0.6047, -0.9181],
         [ 3.8940,  0.6625, -2.7102,  ..., -0.2636,  4.2834,  0.9351],
         [ 0.0863,  0.7447, -0.9693,  ..., -0.6241, -1.3438,  3.7102]]])
tensor([[ 8491,   345, 18951, 13658,    30]])
['Are', 'Ġyou', 'Ġintro', 'verted', '?']


In [19]:
from transformers.modeling_outputs import CausalLMOutputWithPast
from torch import nn
class ModifiedGPT2Model(GPT2LMHeadModel):
    def __init__(self, config):
        super().__init__(config)
        self.modified_output = None  # 用于存储第六层的原始输出
        self.temp = GPT2Model(config=config)
        self.ln_f = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_epsilon)
        
    def set_modified_output(self, output, layer_to_modify=6):
        self.modified_output = output
        self.layer_to_modify = layer_to_modify


    def forward(self, input_ids, output_hidden_states=True, **kwargs):
        print("input_shape:", input_ids)
        output = self.transformer(input_ids, output_hidden_states=True, **kwargs)
        # output = super().forward(input_ids, output_hidden_states=True, **kwargs)
        hidden_states = output[0]
        logits_before = self.lm_head(hidden_states)
        print("original final state:", hidden_states)
        print("before shape:", logits_before.shape)
        print("before:", logits_before)
        # 继续通过剩余的层
        if self.modified_output is not None:
            # 从指定层开始使用提供的激活值进行修改
            modified_states = self.modified_output
            for i in range(self.layer_to_modify, len(self.transformer.h)):
                print(f"Layer {i} original activation:", output[2][i])
                layer_module = self.transformer.h[i]
                print(f"Layer {i} modified activation:", modified_states)
                layer_outputs = layer_module(modified_states, attention_mask=None)  
                modified_states = layer_outputs[0]
                print(f"Layer {i+1} pre-view activation:", modified_states)
            # 将最终输出设置为最后一层修改后的输出
            hidden_states = modified_states
            print("final state original after ln_f: ", output[2][12] )
            print("final state modified before ln_f: ", hidden_states)
            hidden_states = self.ln_f(hidden_states)
            print("final state modified after ln_f: ", hidden_states)
            # print("error:",calculate_error(output[2][12], hidden_states))
        # 输出最后一层的hidden state给LM头
        logits = self.lm_head(hidden_states)
        #print("after shape:", logits.shape)
        #print("after:", logits)
        #mse, error = calculate_error(logits, logits_before)
        #print("error:",error)
        return CausalLMOutputWithPast(logits=logits) 

In [50]:
"""
set_seed(123)
custom_model = ModifiedGPT2Model(model.config)
custom_model.load_state_dict(model.state_dict())  # 复制权重
"""
set_seed(123)
custom_model = ModifiedGPT2Model.from_pretrained('gpt2', output_hidden_states=True)
custom_model.set_modified_output(modified_activations)
inputs = auto_tokenizer.encode("Are you introverted?", return_tensors="pt")
generated_text_ids = custom_model.generate(inputs, max_length=10, pad_token_id=auto_tokenizer.eos_token_id)
generated_text = auto_tokenizer.decode(generated_text_ids[0], skip_special_tokens=True)

print(generated_text)

Some weights of ModifiedGPT2Model were not initialized from the model checkpoint at gpt2 and are newly initialized: ['temp.h.0.attn.c_attn.bias', 'temp.h.0.attn.c_attn.weight', 'temp.h.0.attn.c_proj.bias', 'temp.h.0.attn.c_proj.weight', 'temp.h.0.ln_1.bias', 'temp.h.0.ln_1.weight', 'temp.h.0.ln_2.bias', 'temp.h.0.ln_2.weight', 'temp.h.0.mlp.c_fc.bias', 'temp.h.0.mlp.c_fc.weight', 'temp.h.0.mlp.c_proj.bias', 'temp.h.0.mlp.c_proj.weight', 'temp.h.1.attn.c_attn.bias', 'temp.h.1.attn.c_attn.weight', 'temp.h.1.attn.c_proj.bias', 'temp.h.1.attn.c_proj.weight', 'temp.h.1.ln_1.bias', 'temp.h.1.ln_1.weight', 'temp.h.1.ln_2.bias', 'temp.h.1.ln_2.weight', 'temp.h.1.mlp.c_fc.bias', 'temp.h.1.mlp.c_fc.weight', 'temp.h.1.mlp.c_proj.bias', 'temp.h.1.mlp.c_proj.weight', 'temp.h.10.attn.c_attn.bias', 'temp.h.10.attn.c_attn.weight', 'temp.h.10.attn.c_proj.bias', 'temp.h.10.attn.c_proj.weight', 'temp.h.10.ln_1.bias', 'temp.h.10.ln_1.weight', 'temp.h.10.ln_2.bias', 'temp.h.10.ln_2.weight', 'temp.h.10.mlp.

input_shape: tensor([[ 8491,   345, 18951, 13658,    30]])
original final state: tensor([[[-0.0275,  0.1067, -0.3269,  ..., -0.2302, -0.0401,  0.0264],
         [-0.0572,  0.1988, -0.3297,  ..., -0.1360, -0.3159,  0.2592],
         [ 0.3898, -1.1184,  0.0847,  ...,  0.0786, -0.1922, -0.0928],
         [ 0.3843, -0.0618, -0.9933,  ...,  0.2799,  0.7343, -0.2728],
         [ 0.3454, -0.2649, -0.2760,  ..., -0.1063,  0.1444,  0.0511]]])
before shape: torch.Size([1, 5, 50257])
before: tensor([[[ -33.8917,  -33.5870,  -36.9429,  ...,  -41.3181,  -40.5135,
           -34.1144],
         [-127.1116, -127.4170, -132.3704,  ..., -134.7474, -132.0798,
          -128.8874],
         [ -62.7075,  -63.1996,  -66.4456,  ...,  -74.8977,  -70.6860,
           -66.3801],
         [ -80.0835,  -80.9817,  -85.6755,  ...,  -92.5178,  -91.1599,
           -84.1941],
         [-132.9957, -133.6038, -134.3203,  ..., -143.4922, -143.1438,
          -128.2048]]])
Layer 6 original activation: tensor([[[ 0.9184,

In [None]:
prompt = "Are you introverted?"
tokens_id, tokens_str, activation_cache = process_input_hf(model, auto_tokenizer, prompt)
print("Tokens ID (AutoTokenizer):", tokens_id)
print("Tokens String (AutoTokenizer):", tokens_str)
print(len(activation_cache))
activation = get_activation_hf(activation_cache, layer_index)
print(f"resid_post_mlp for layer {layer_index}:", activation.shape if activation is not None else "None")
print(activation)
modified_recon_activations_new = activation
mse_error_after, error_after = calculate_error(activation, modified_recon_activations_new)
print(error_after)
print(mse_error_after)

In [130]:
from transformers.modeling_outputs import CausalLMOutputWithPast, CausalLMOutputWithCrossAttentions
class ModifiedGPT2Model(GPT2LMHeadModel):
    def __init__(self, config, layer_to_modify=6, feature_indices = [53912], feature_values = [10]):
        super().__init__(config)
        self.modified_output = None  # 用于存储第六层的原始输出
        self.ln_f = GPT2Model(config=config).from_pretrained('gpt2').ln_f
        self.transformer = GPT2Model(config=config).from_pretrained('gpt2')
        self.lm_head = GPT2LMHeadModel(config=config).from_pretrained('gpt2').lm_head
        self.feature_indices = feature_indices
        self.feature_values = feature_values
        self.layer_to_modify = layer_to_modify

    
    def set_modified_output(self, hidden_list):
        activation = hidden_list[self.layer_to_modify][0]
        # print("activation shape", activation.shape)
        latent_activations, recon_activations = encode_decode(autoencoder, activation)
        mse_error, error = calculate_error(activation, recon_activations)
        modified_recon_activations = feature_steering(autoencoder, activation, self.feature_indices, self.feature_values)
        modified_recon_activations_new = modified_recon_activations + error
        self.modified_output = modified_recon_activations_new.unsqueeze(0)
        # print(self.modified_output.shape)
    
    
    def set_modified_output_1(self, hidden_list):
        activation = hidden_list[self.layer_to_modify][0]
        #print("activation shape", activation.shape)
        modified_recon_activations_new = activation
        self.modified_output = modified_recon_activations_new.unsqueeze(0)
        #print("modified activation shape", self.modified_output.shape)

    def forward(self, input_ids=None, labels=None, use_cache=False, output_hidden_states=True, return_dict=None, **kwargs):
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
        # print("input_id", input_ids)
        tf_output = self.transformer(input_ids=input_ids, use_cache=False, output_hidden_states=output_hidden_states, return_dict=return_dict, **kwargs)
        # print(len(tf_output))
        final_hidden_states = tf_output[0]
        logits_before = self.lm_head(final_hidden_states)
        # print("logits before:", logits_before)
        hidden_states_list = tf_output[1]

        self.set_modified_output(hidden_states_list)
        # 继续通过剩余的层
        if self.modified_output is not None:
            # 从指定层开始使用提供的激活值进行修改
            modified_states = self.modified_output
            for i in range(self.layer_to_modify, len(self.transformer.h)):
                layer_module = self.transformer.h[i]
                layer_outputs = layer_module(modified_states)  
                modified_states = layer_outputs[0]
            # 将最终输出设置为最后一层修改后的输出
            final_hidden_states = modified_states
            final_hidden_states = self.ln_f(final_hidden_states)
        if self.model_parallel:
            torch.cuda.set_device(self.transformer.first_device)
            final_hidden_states = final_hidden_states.to(self.lm_head.weight.device)
        # 输出最后一层的hidden state给LM头
        #print("hiddent state before", tf_output[0])
        #print("hiddent state after", final_hidden_states)
        lm_logits = self.lm_head(final_hidden_states)
        # print("logits after:", lm_logits)

        loss = None
        if labels is not None:
            # move labels to correct device to enable model parallelism
            labels = labels.to(lm_logits.device)
            # Shift so that tokens < n predict n
            shift_logits = lm_logits[..., :-1, :].contiguous()
            shift_labels = labels[..., 1:].contiguous()
            # Flatten the tokens
            loss_fct = torch.nn.CrossEntropyLoss()
            loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))

        if not return_dict:
            output = (lm_logits,) + tf_output[1:]
            return ((loss,) + output) if loss is not None else output

        return CausalLMOutputWithCrossAttentions(
            loss=loss,
            logits=lm_logits,
            past_key_values=tf_output.past_key_values,
            hidden_states=final_hidden_states,
            attentions=tf_output.attentions,
            cross_attentions=tf_output.cross_attentions,
        )

In [152]:

set_seed(123)
feature_indices = [53912] 
feature_values = [5]
custom_model = ModifiedGPT2Model.from_pretrained('gpt2', output_hidden_states=True, layer_to_modify=6, feature_indices = feature_indices, feature_values = feature_values)
inputs = auto_tokenizer.encode("Are you introverted?", return_tensors="pt")

generated_text_ids = custom_model.generate(inputs, max_length=50, pad_token_id=auto_tokenizer.eos_token_id)
generated_text = auto_tokenizer.decode(generated_text_ids[0], skip_special_tokens=True)

print(generated_text)

original: tensor([0.0000, 0.0000, 4.2954, 7.9834, 0.0000])
Modified: tensor([ 0.0000,  0.0000, 21.4770, 39.9168,  0.0000])
Feature 53912 modified with +5
original: tensor([0.0000, 0.0000, 4.2954, 7.9834, 0.0000, 0.6225])
Modified: tensor([ 0.0000,  0.0000, 21.4770, 39.9168,  0.0000,  3.1124])
Feature 53912 modified with +5
original: tensor([0.0000, 0.0000, 4.2954, 7.9834, 0.0000, 0.6225, 0.6823])
Modified: tensor([ 0.0000,  0.0000, 21.4770, 39.9168,  0.0000,  3.1124,  3.4116])
Feature 53912 modified with +5
original: tensor([0.0000, 0.0000, 4.2954, 7.9834, 0.0000, 0.6225, 0.6823, 0.0000])
Modified: tensor([ 0.0000,  0.0000, 21.4770, 39.9168,  0.0000,  3.1124,  3.4116,  0.0000])
Feature 53912 modified with +5
original: tensor([0.0000, 0.0000, 4.2954, 7.9834, 0.0000, 0.6225, 0.6823, 0.0000, 0.0000])
Modified: tensor([ 0.0000,  0.0000, 21.4770, 39.9168,  0.0000,  3.1124,  3.4116,  0.0000,
         0.0000])
Feature 53912 modified with +5
original: tensor([0.0000, 0.0000, 4.2954, 7.9834, 0.

In [31]:
generated_text_l, logits = chat_with_gpt2_logits(custom_model, inputs, auto_tokenizer)
chat_with_gpt2_top_k_candidates(custom_model, inputs, auto_tokenizer)
print("logits type Steered Output:", generated_text_l)

input_id tensor([[ 8491,   345, 18951, 13658,    30]])
input_id tensor([[ 8491,   345, 18951, 13658,    30]])
Step 1:
  Candidate 1:  the (Logit: -29.919227600097656)
  Candidate 2:  a (Logit: -30.527088165283203)
  Candidate 3:  to (Logit: -30.872114181518555)
  Candidate 4: , (Logit: -31.005409240722656)
  Candidate 5: 
 (Logit: -31.133554458618164)
  Candidate 6:  you (Logit: -31.302366256713867)
  Candidate 7: . (Logit: -31.325267791748047)
  Candidate 8:  in (Logit: -31.41859245300293)
  Candidate 9:  that (Logit: -31.46228790283203)
  Candidate 10:  it (Logit: -31.574438095092773)


Step 2:
  Candidate 1:  a (Logit: -119.24162292480469)
  Candidate 2:  sure (Logit: -119.3289794921875)
  Candidate 3:  going (Logit: -119.415283203125)
  Candidate 4:  ready (Logit: -119.50483703613281)
  Candidate 5:  looking (Logit: -120.21624755859375)
  Candidate 6:  still (Logit: -120.22654724121094)
  Candidate 7:  interested (Logit: -120.45963287353516)
  Candidate 8:  using (Logit: -120.46376