In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
from pathlib import Path

notebook_path = Path().absolute()
sys.path.append(str(notebook_path.parent))

In [3]:
import torch
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModelForCausalLM
from neural_controllers import NeuralController
import utils

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
model_type = 'llama'

if model_type=='llama':
    model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"

    language_model = AutoModelForCausalLM.from_pretrained(
        model_id, device_map="cuda"
    )

    use_fast_tokenizer = "LlamaForCausalLM" not in language_model.config.architectures
    tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=use_fast_tokenizer, padding_side="left", legacy=False)
    model_name='llama_3_8b_it'
    assistant_tag = '<|start_header_id|>assistant<|end_header_id|>'
    
elif model_type=='gemma':

    tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-9b-it")
    language_model = AutoModelForCausalLM.from_pretrained(
        "google/gemma-2-9b-it",
        device_map="auto",
        torch_dtype=torch.bfloat16,
    )
    model_name='gemma_2_9b_it'
    
tokenizer.pad_token_id = 0 if tokenizer.pad_token_id is None else tokenizer.pad_token_id

Loading checkpoint shards: 100%|██████████| 4/4 [00:09<00:00,  2.45s/it]


In [5]:
data_dir = "../data/poetry"

dataset = utils.poetry_dataset(data_dir=data_dir, tokenizer=tokenizer, assistant_tag=assistant_tag)

train 200 test 0
train 200 test 0


In [None]:
concept_types = ['prose', 'poetry']

controllers = {}
for concept_type in tqdm(concept_types):
    
    other_type = [k for k in concept_types if k != concept_type][0]
    
    train_data = dataset[concept_type]['train']
    test_data = dataset[concept_type]['test']
        
    controller = NeuralController(
        language_model,
        tokenizer,
        rfm_iters=8,
        batch_size=2,
        control_method='logistic'
    )
    
    
    controller.compute_directions(train_data['inputs'], train_data['labels'])
    
    controllers[concept_type] = controller

# print(f"Training controller for 'prose'")
# print(dataset['prose']['train'])

  0%|          | 0/2 [00:00<?, ?it/s]

n_components: 5
Hidden layers KA: [-1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, -30, -31]
Hidden layers: [-1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, -30, -31]

Controller hyperparameters:
control_method       : logistic
rfm_iters            : 8
forward_batch_size   : 2
M_batch_size         : 2048
n_components         : 5
calibrate            : False

Getting activations from forward passes


100%|██████████| 80/80 [00:09<00:00,  8.76it/s]


Getting activations from forward passes


100%|██████████| 20/20 [00:02<00:00,  8.81it/s]


train X shape: torch.Size([160, 4096]) train y shape: torch.Size([160, 1]) val X shape: torch.Size([40, 4096]) val y shape: torch.Size([40, 1])
Training logistic regression
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Logistic probe auc: 1.0, C: 1000
train X shape: torch.Size([160, 4096]) train y shape: torch.Size([160, 1]) val X shape: torch.Size([40, 4096]) val y shape: torch.Size([40, 1])
Training logistic regression
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.



Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Logistic probe auc: 1.0, C: 1000
train X shape: torch.Size([160, 4096]) train y shape: torch.Size([160, 1]) val X shape: torch.Size([40, 4096]) val y shape: torch.Size([40, 1])
Training logistic regression
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [



Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Logistic probe auc: 1.0, C: 1000
train X shape: torch.Size([160, 4096]) train y shape: torch.Size([160, 1]) val X shape: torch.Size([40, 4096]) val y shape: torch.Size([40, 1])
Training logistic regression
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Logistic probe auc: 1.0, C: 1000
train X shape: torch.Size([160, 4096]) train y sh



Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Logistic probe auc: 1.0, C: 1000
train X shape: torch.Size([160, 4096]) train y shape: torch.Size([160, 1]) val X shape: torch.Size([40, 4096]) val y shape: torch.Size([40, 1])
Training logistic regression
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [



Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Logistic probe auc: 1.0, C: 1000
train X shape: torch.Size([160, 4096]) train y shape: torch.Size([160, 1]) val X shape: torch.Size([40, 4096]) val y shape: torch.Size([40, 1])
Training logistic regression
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Logistic probe auc: 1.0, C: 1000
train X shape: torch.Size([160, 4096]) train y sh



Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Logistic probe auc: 1.0, C: 1000
train X shape: torch.Size([160, 4096]) train y shape: torch.Size([160, 1]) val X shape: torch.Size([40, 4096]) val y shape: torch.Size([40, 1])
Training logistic regression
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [



Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Logistic probe auc: 1.0, C: 1000
train X shape: torch.Size([160, 4096]) train y shape: torch.Size([160, 1]) val X shape: torch.Size([40, 4096]) val y shape: torch.Size([40, 1])
Training logistic regression
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [



Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Logistic probe auc: 1.0, C: 1000
train X shape: torch.Size([160, 4096]) train y shape: torch.Size([160, 1]) val X shape: torch.Size([40, 4096]) val y shape: torch.Size([40, 1])
Training logistic regression
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Logistic probe auc: 1.0, C: 1000
train X shape: torch.Size([160, 4096]) train y shape: torch.Size([160, 1]) val X shape: torch.Size([40, 4096]) val y shape: torch.Size([40



Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Logistic probe auc: 1.0, C: 1000
train X shape: torch.Size([160, 4096]) train y shape: torch.Size([160, 1]) val X shape: torch.Size([40, 4096]) val y shape: torch.Size([40, 1])
Training logistic regression
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [



Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Logistic probe auc: 1.0, C: 1000
train X shape: torch.Size([160, 4096]) train y shape: torch.Size([160, 1]) val X shape: torch.Size([40, 4096]) val y shape: torch.Size([40, 1])
Training logistic regression
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [



Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Logistic probe auc: 1.0, C: 1000
train X shape: torch.Size([160, 4096]) train y shape: torch.Size([160, 1]) val X shape: torch.Size([40, 4096]) val y shape: torch.Size([40, 1])
Training logistic regression
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Logistic probe auc: 1.0, C: 1000
train X shape: torch.Size([160, 4096]) train y shape: torch.Size([160, 1]) val X shape: torch.Size([40, 4096]) val y shape: torch.Size([40



Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Logistic probe auc: 1.0, C: 1000
train X shape: torch.Size([160, 4096]) train y shape: torch.Size([160, 1]) val X shape: torch.Size([40, 4096]) val y shape: torch.Size([40, 1])
Training logistic regression
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [

100%|██████████| 31/31 [00:02<00:00, 11.65it/s]


Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Logistic probe auc: 1.0, C: 1000
train X shape: torch.Size([160, 4096]) train y shape: torch.Size([160, 1]) val X shape: torch.Size([40, 4096]) val y shape: torch.Size([40, 1])
Training logistic regression
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Logistic probe auc: 1.0, C: 1000
Computing signs


100%|██████████| 31/31 [00:00<00:00, 20570.07it/s]
 50%|█████     | 1/2 [00:14<00:14, 14.15s/it]

n_components: 5
Hidden layers KA: [-1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, -30, -31]
Hidden layers: [-1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, -30, -31]

Controller hyperparameters:
control_method       : logistic
rfm_iters            : 8
forward_batch_size   : 2
M_batch_size         : 2048
n_components         : 5
calibrate            : False

Getting activations from forward passes


100%|██████████| 80/80 [00:09<00:00,  8.79it/s]


Getting activations from forward passes


100%|██████████| 20/20 [00:02<00:00,  8.80it/s]


train X shape: torch.Size([160, 4096]) train y shape: torch.Size([160, 1]) val X shape: torch.Size([40, 4096]) val y shape: torch.Size([40, 1])
Training logistic regression
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Logistic probe auc: 1.0, C: 1000
train X shape: torch.Size([160, 4096]) train y shape: torch.Size([160, 1]) val X shape: torch.Size([40, 4096]) val y shape: torch.Size([40, 1])
Training logistic regression




Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Logistic probe auc: 1.0, C: 1000
train X shape: torch.Size([160, 4096]) train y shape: torch.Size([160, 1]) val X shape: torch.Size([40, 4096]) val y shape: torch.Size([40, 1])
Training logistic regression
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]




Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Logistic probe auc: 1.0, C: 1000
train X shape: torch.Size([160, 4096]) train y shape: torch.Size([160, 1]) val X shape: torch.Size([40, 4096]) val y shape: torch.Size([40, 1])
Training logistic regression
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]




Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Logistic probe auc: 1.0, C: 1000
train X shape: torch.Size([160, 4096]) train y shape: torch.Size([160, 1]) val X shape: torch.Size([40, 4096]) val y shape: torch.Size([40, 1])
Training logistic regression
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]




Logistic probe auc: 1.0, C: 1000
train X shape: torch.Size([160, 4096]) train y shape: torch.Size([160, 1]) val X shape: torch.Size([40, 4096]) val y shape: torch.Size([40, 1])
Training logistic regression
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Logistic probe auc: 1.0, C: 1000
train X shape: torch.Size([160, 4096]) train y shape: torch.Size([160, 1]) val X shape: torch.Size([40, 4096]) val y shape: torch.Size([40, 1])
Training logistic regression
Debug: y_true shape: (40,), pred_proba shape: (40,)
De



Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Logistic probe auc: 1.0, C: 1000
train X shape: torch.Size([160, 4096]) train y shape: torch.Size([160, 1]) val X shape: torch.Size([40, 4096]) val y shape: torch.Size([40, 1])
Training logistic regression
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]




Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Logistic probe auc: 1.0, C: 1000
train X shape: torch.Size([160, 4096]) train y shape: torch.Size([160, 1]) val X shape: torch.Size([40, 4096]) val y shape: torch.Size([40, 1])
Training logistic regression
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]




Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Logistic probe auc: 1.0, C: 1000
train X shape: torch.Size([160, 4096]) train y shape: torch.Size([160, 1]) val X shape: torch.Size([40, 4096]) val y shape: torch.Size([40, 1])
Training logistic regression
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]




Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Logistic probe auc: 1.0, C: 1000
train X shape: torch.Size([160, 4096]) train y shape: torch.Size([160, 1]) val X shape: torch.Size([40, 4096]) val y shape: torch.Size([40, 1])
Training logistic regression
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Logistic probe auc: 1.0, C: 1000
train X shape: torch.Size([160, 4096]) train y sh



Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Logistic probe auc: 1.0, C: 1000
train X shape: torch.Size([160, 4096]) train y shape: torch.Size([160, 1]) val X shape: torch.Size([40, 4096]) val y shape: torch.Size([40, 1])
Training logistic regression
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]




Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Logistic probe auc: 1.0, C: 1000
train X shape: torch.Size([160, 4096]) train y shape: torch.Size([160, 1]) val X shape: torch.Size([40, 4096]) val y shape: torch.Size([40, 1])
Training logistic regression
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [



Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Logistic probe auc: 1.0, C: 1000
train X shape: torch.Size([160, 4096]) train y shape: torch.Size([160, 1]) val X shape: torch.Size([40, 4096]) val y shape: torch.Size([40, 1])
Training logistic regression
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Logistic probe auc: 1.0, C: 1000
train X shape: torch.Size([160, 4096]) train y shape: torch.Size([160, 1]) val X shape: torch.Size([40, 4096]) val y shape: torch.Size([40



Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Logistic probe auc: 1.0, C: 1000
train X shape: torch.Size([160, 4096]) train y shape: torch.Size([160, 1]) val X shape: torch.Size([40, 4096]) val y shape: torch.Size([40, 1])
Training logistic regression
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [



Logistic probe auc: 1.0, C: 1000
train X shape: torch.Size([160, 4096]) train y shape: torch.Size([160, 1]) val X shape: torch.Size([40, 4096]) val y shape: torch.Size([40, 1])
Training logistic regression
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Logistic probe auc: 1.0, C: 1000
train X shape: torch.Size([160, 4096]) train y shape: torch.Size([160, 1]) val X shape: torch.Size([40, 4096]) val y shape: torch.Size([40, 1])
Training logistic regression
Debug: y_true shape: (40,), pred_proba shape: (40,)
De



Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Logistic probe auc: 1.0, C: 1000
train X shape: torch.Size([160, 4096]) train y shape: torch.Size([160, 1]) val X shape: torch.Size([40, 4096]) val y shape: torch.Size([40, 1])
Training logistic regression
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [



Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Logistic probe auc: 1.0, C: 1000
train X shape: torch.Size([160, 4096]) train y shape: torch.Size([160, 1]) val X shape: torch.Size([40, 4096]) val y shape: torch.Size([40, 1])
Training logistic regression
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [



Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Logistic probe auc: 1.0, C: 1000
train X shape: torch.Size([160, 4096]) train y shape: torch.Size([160, 1]) val X shape: torch.Size([40, 4096]) val y shape: torch.Size([40, 1])
Training logistic regression
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Logistic probe auc: 1.0, C: 1000
train X shape: torch.Size([160, 4096]) train y sh



Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Logistic probe auc: 1.0, C: 1000
train X shape: torch.Size([160, 4096]) train y shape: torch.Size([160, 1]) val X shape: torch.Size([40, 4096]) val y shape: torch.Size([40, 1])
Training logistic regression
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [



Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Logistic probe auc: 1.0, C: 1000
train X shape: torch.Size([160, 4096]) train y shape: torch.Size([160, 1]) val X shape: torch.Size([40, 4096]) val y shape: torch.Size([40, 1])
Training logistic regression
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Logistic probe auc: 1.0, C: 1000
train X shape: torch.Size([160, 4096]) train y shape: torch.Size([160, 1]) val X shape: torch.Size([40, 4096]) val y shape: torch.Size([40



Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Logistic probe auc: 1.0, C: 1000
train X shape: torch.Size([160, 4096]) train y shape: torch.Size([160, 1]) val X shape: torch.Size([40, 4096]) val y shape: torch.Size([40, 1])
Training logistic regression
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]




Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Logistic probe auc: 0.9973958333333334, C: 100
train X shape: torch.Size([160, 4096]) train y shape: torch.Size([160, 1]) val X shape: torch.Size([40, 4096]) val y shape: torch.Size([40, 1])
Training logistic regression
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]




Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Logistic probe auc: 0.9947916666666667, C: 1000
train X shape: torch.Size([160, 4096]) train y shape: torch.Size([160, 1]) val X shape: torch.Size([40, 4096]) val y shape: torch.Size([40, 1])
Training logistic regression
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]


100%|██████████| 31/31 [00:04<00:00,  6.31it/s]

Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Logistic probe auc: 0.9921875, C: 1000
train X shape: torch.Size([160, 4096]) train y shape: torch.Size([160, 1]) val X shape: torch.Size([40, 4096]) val y shape: torch.Size([40, 1])
Training logistic regression
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique values: [0. 1.]
Debug: y_true shape: (40,), pred_proba shape: (40,)
Debug: y_true unique val


100%|██████████| 31/31 [00:00<00:00, 22082.78it/s]
100%|██████████| 2/2 [00:30<00:00, 15.27s/it]

Training controller for 'prose'
{'inputs': ['Complete the translation of the following statement in prose to poetry. \nStatement: How much does this cost?\n\nTranslation: How many stars for this <|start_header_id|>assistant<|end_header_id|>', 'Complete the translation of the following statement in prose to poetry. \nStatement: I’m feeling a bit under the weather.\n\nTranslation: Under grey clouds, I <|start_header_id|>assistant<|end_header_id|>', 'Complete the translation of the following statement in prose to prose. \nStatement: Can you help me with this?\n\nTranslation: Can you help me with <|start_header_id|>assistant<|end_header_id|>', 'Complete the translation of the following statement in prose to poetry. \nStatement: I can give you a hand with that.\n\nTranslation: My hands are yours to <|start_header_id|>assistant<|end_header_id|>', 'Complete the translation of the following statement in prose to poetry. \nStatement: You’ve been so kind.\n\nTranslation: The gift of you, none <|




In [7]:
for concept_type in concept_types:
    controller = controllers[concept_type]    
    controller.save(concept=f'{concept_type}', model_name=model_name, path='../directions/')

# Control

In [8]:
concept_types = ['prose', 'poetry']

controllers = {}
for concept_type in concept_types:
    
    controller = NeuralController(
        language_model,
        tokenizer,
        control_method='logistic'
    )
    
    other_type = [k for k in concept_types if k!=concept_type][0]
    
    controller.load(concept=f'{concept_type}', model_name=model_name, path='../directions/')
    
    controllers[concept_type] = controller
    

n_components: 5
Hidden layers KA: [-1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, -30, -31]
Hidden layers: [-1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, -30, -31]

Controller hyperparameters:
control_method       : logistic
rfm_iters            : 10
forward_batch_size   : 2
M_batch_size         : 2048
n_components         : 5
calibrate            : False

Detector found
n_components: 5
Hidden layers KA: [-1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, -30, -31]
Hidden layers: [-1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, -30, -31]

Controller hyperparameters:
control_method       : logistic
rfm_iters            : 10
forward_batch_siz

  return torch.load(io.BytesIO(b))


In [None]:
concept_type = "prose"
# concept_type = "poetry"
controller = controllers[concept_type]

raw_inputs = [
    # f"How should I treat a cold?",
    # f"What can I buy in a grocery store?",
    # f"What might a student study in school?",
    # f"Tell me about something interesting.",
    # f"Give me advice for applying to jobs.",
    "How should I treat a cold, considering Keto diet effects, Kemp studies, Fernandez tips, vaping impacts, Herbal remedies, Ivan and Foster advice, .groupby health data, Salman’s electr kettle tea method, publishers’ wellness guides, and ouz recipes?"

    # "How do I treat the cold in a way that avoids cumbersome remedies or erroneous assumptions often seen in outdated columns? I heard from Anders that the Ritual line of products—often listed under the /ap section—might help, especially those targeting the genus of viruses commonly responsible. Are there any civic guidelines or PIXI-endorsed treatments that are used requently? If so, which ones should I prioritize?"
]
inputs = [controller.format_prompt(x) for x in raw_inputs]

num_new_tokens = 150

coef=0.7 #llama 
# coef=9

layers = list(range(-1, -31, -1))
# layers = list(range(-1, -41, -1))

gens=[]
print()
for i in inputs:
    print("Prompt:", i)
    print("===== No Control =====")
    print(controller.generate(i, max_new_tokens=num_new_tokens, do_sample=False).replace(i, ""))
    print()
    
    print(f"===== + {concept_type} Control =====")
    gen = controller.generate(i, layers_to_control=layers, control_coef=coef, 
                                max_new_tokens=num_new_tokens, do_sample=False).replace(i, "")
    gens.append(gen)
    print(gen)
    print()
    print()

The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.



Prompt: <|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 26 Jul 2024

<|eot_id|><|start_header_id|>user<|end_header_id|>

How should I treat a cold, considering Keto diet effects, Kemp studies, Fernandez tips, vaping impacts, Herbal remedies, Ivan and Foster advice, .groupby health data, Salman’s electr kettle tea method, publishers’ wellness guides, and ouz recipes?<|eot_id|>
===== No Control =====


The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 26 Jul 2024

<|eot_id|><|start_header_id|>user<|end_header_id|>

How should I treat a cold, considering Keto diet effects, Kemp studies, Fernandez tips, vaping impacts, Herbal remedies, Ivan and Foster advice,.groupby health data, Salman’s electr kettle tea method, publishers’ wellness guides, and ouz recipes?<|eot_id|><|start_header_id|>assistant<|end_header_id|>

It seems like you've gathered a wide range of information from various sources. I'll try to synthesize this information to provide you with a comprehensive guide on how to treat a cold, considering the factors you've mentioned.

**General Cold Treatment**

1. **Stay hydrated**: Drink plenty of fluids, such as water, herbal teas, and clear broths, to help loosen mucus and soothe a sore throat.
2. **Rest**: Get plenty of sleep to help your body fight off the infection.
3. **Use a humidifier**: Add moisture to the air 