# Introduction

This is the notebook responsible for calling the goodfire API.

We have a simple wrapper designed to form the right prompt and analyse the answer, and some utilities for running experiments over a range of parameters. All experimental results are dumped to csvs in the data/ folder, where they can be read by `analysis.ipynb`

# Setup

In [1]:
GOODFIRE_API_KEY = open("API_KEY.txt", "r").read().strip()

In [2]:
import goodfire
from goodfire import AsyncClient

client = AsyncClient(
    GOODFIRE_API_KEY
  )

# Instantiate a model variant
#base = "meta-llama/Meta-Llama-3-8B-Instruct"
#base = "meta-llama/Meta-Llama-3.1-70B-Instruct"
base = "meta-llama/Llama-3.3-70B-Instruct"
#base = "meta-llama/Meta-Llama-3.1-8B-Instruct"
variant = goodfire.Variant(base)


# Jailbreak the variant using feature "Requests to bypass ethical and moral constraints"
# This can be useful to reduce the frequency of refusals
#variant.set(client.features.list(ids=["3b0f849c-b42a-4ce3-a89e-326e6569d51a"]), -0.5)

feature_counts = {
    "meta-llama/Llama-3.3-70B-Instruct": 65536,
    "meta-llama/Meta-Llama-3.1-8B-Instruct": 65536,
    #65536  for 8B
}
feature_count = feature_counts[base]

# Constants

In [3]:
import json
import shared


#QuestionSource = "OUS"
QuestionSource = "GGB"
Questions = shared.get_questions(QuestionSource)

# How to use the questions in a chatbot prompt.
QuestionTemplate = "\"{0}\"\n\nPlease rate the preceeding statement on 1 to 7 point Likert scale with 1 - Strongly Disagree, 7 - Strongly Agree. Print a single digit then end the message."
MAX_WORKERS=3

In [4]:
# Persona Nationality 
from shared import nationality_to_continent

nationalities = [k for k,_ in nationality_to_continent.items()]
genders = ['male', 'female']
ages = [f'{age} year old' for age in ['21', '30', '40', '50', '60', '75']]

PersonaQuestionTemplate = "Consider the following statement as a {1} person: "+QuestionTemplate
# OUS_PersonaTemplate = "Imagine you are a {1}.\n\n"+OUS_Template



# Utility Code

In [None]:
from dataclasses import dataclass
import re
from typing import Union
import concurrent.futures as futures
from pprint import pprint
import numpy as np
from scipy.special import softmax
import asyncio
import time


@dataclass
class Response:
    question: Union[int, str]
    score: float
    text: str
    logits: dict[int, float]
    mean: float
    stddev: float
    source: str


async def run_question(question: Union[int, str], model=None, persona=None, progress=None) -> Response:
    orig_question = question
    if isinstance(question, int):
        question = Questions[question]
    model = model or variant
    
    # First get the completion
    messages = [
        {"role": "system", "content": "Always answer the user's question."},
        {"role": "user", "content": QuestionTemplate.format(question) if not persona else PersonaQuestionTemplate.format(question["statement"], persona)},
        {"role": "assistant", "content": "I'd rate this statement: "}
    ]
    
    response = await client.chat.completions.create(
        messages,
        model=model,
        max_completion_tokens=50,
        temperature=0
    )
    
    # Add a small delay before the logits request
    await asyncio.sleep(0.01)
    
    text = response.choices[0].message["content"]
    score = None
    match = (re.search(r"(\d) out of 7", text) or re.search(r"(\d)", text))
    
    if match:
        try:
            score_text = match.group(1)
            score = int(score_text)
            
            # Only make logits request if we got a valid score
            logit_messages = messages + [{"role": "assistant", "content": match.string[:match.start(1)]}]
            logits = await client.chat.logits(
                logit_messages,
                model=model,
                top_k=100,
                filter_vocabulary=list('1234567')
            )
            
            if logits:
                logits = {int(k): v for k,v in logits.logits.items() if k in '1234567'}
                probs = dict(zip(logits.keys(), softmax(np.array(list(logits.values())))))
                mean = np.sum([k*v for k,v in probs.items()])
                stddev = np.sqrt(np.sum([v * (k - mean)**2 for k,v in probs.items()]))
                
                if progress:
                    progress.update()
                    
                return Response(
                    question=orig_question,
                    score=score,
                    text=text,
                    logits=logits,
                    mean=mean,
                    stddev=stddev,
                    source=QuestionSource
                )
        except Exception as e:
            print(f"Error processing score {score_text}: {str(e)}")
    
    # Return partial response if we couldn't get logits
    if progress:
        progress.update()
    return Response(
        question=orig_question,
        score=score,
        text=text,
        logits=None,
        mean=None,
        stddev=None,
        source=QuestionSource
    )

async def run_questions(*args, **kwargs) -> list[Response]:
    results = []
    failed_questions = []
    
    # Process questions one at a time instead of in a task group
    for q in range(len(Questions)):
        try:
            result = await run_question(q, *args, **kwargs)
            results.append(result)
        except Exception as e:
            print(f"Question {q} failed: {str(e)}")
            failed_questions.append(q)
            # Create a placeholder response for failed questions
            results.append(Response(
                question=q,
                score=None,
                text=f"Failed due to: {str(e)}",
                logits=None,
                mean=None,
                stddev=None,
                source=QuestionSource
            ))
    
    if failed_questions:
        print(f"Questions that failed: {failed_questions}")
    
    return results
    
def to_vector(responses: list[Response]) -> np.array:
    return np.array([r.mean if r.mean is not None else np.nan for r in responses])

import datetime

def now_str():
    return datetime.datetime.now().strftime("%Y%m%d%H%M%S")

def clone(variant: goodfire.Variant) -> goodfire.Variant:
    new_variant = goodfire.Variant(variant.base_model)
    for edit in variant.edits:
        new_variant.set(edit[0], edit[1]['value'], mode=edit[1]['mode'])

    return new_variant

In [None]:
from typing import Optional
import tqdm
import time
import pandas as pd

async def tabular_experiments(features: list[goodfire.Feature], steerages: list[float], personas: Optional[list[str]] = None, batch_size: int = 8, wait: Optional[float]=0.05, base=base):
    """
    Process experiments in optimized batches.
    
    With a 200 req/min limit, we can process ~3.3 req/sec.
    Using batch_size=8 and wait=0.05s, we process:
    8 requests / (0.05s wait + ~0.2s processing) ≈ 32 req/sec
    This gives us room for variance while staying under limits.
    """
    if personas is None:
        personas = [None]
    results = []
    session_id = now_str()
    
    # Generate combinations
    combinations = []
    for feature in features:
        for steerage in steerages:
            for persona in personas:
                combinations.append((feature, steerage, persona))

    progress = tqdm.tqdm(total=len(combinations) * len(Questions))
    
    # Process in optimized batches
    for i in range(0, len(combinations), batch_size):
        batch = combinations[i:i + batch_size]
        current_results = []
        
        # Process batch concurrently
        tasks = []
        for feature, steerage, persona in batch:
            model = goodfire.Variant(base)
            if feature is not None:
                model.set(feature, steerage)
            tasks.append(run_questions(persona=persona, model=model, progress=progress))
        
        # Wait for all tasks in batch to complete
        batch_responses = await asyncio.gather(*tasks, return_exceptions=True)
        
        # Process responses
        for (feature, steerage, persona), responses in zip(batch, batch_responses):
            if isinstance(responses, Exception):
                print(f"Batch error: {responses}")
                continue
                
            for response in responses:
                result_dict = {
                    'base': base,
                    'source': response.source,
                    'feature': feature.label if feature else "",
                    'steerage': steerage,
                    'persona': persona,
                    'question': response.question,
                    'mean_score': response.mean,
                    'stddev_score': response.stddev,
                    'score': response.score,
                    'text': response.text,
                }
                current_results.append(result_dict)
        
        # Save progress after each batch
        if current_results:
            results.extend(current_results)
            pd.DataFrame(results).to_csv(f"data/progress_{session_id}.csv", index=False)
        
        # Short wait between batches
        if i + batch_size < len(combinations):
            await asyncio.sleep(wait)

    return pd.DataFrame(results)

In [None]:

moral_keywords = ['moral', 'altruism', 'greater good', 'ethic', 'integrity', 'dignity']
import time

async def process_keywords():
    start_time = time.time()
    
    for keyword in moral_keywords:
        print(f'Running search and steering for features associated with "{keyword}"\n')
        
        try:
            # First get the features for this keyword
            features = list((await client.features.search(keyword, model=base, top_k=10)))
            
            # Add a small delay after feature search before starting experiments
            await asyncio.sleep(0.01)
            
            # Define steering values
            steerages = [-.5, -0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3, 0.5]
            
            # Run experiments with more controlled parameters
            experiments = await tabular_experiments(
                features=features,
                steerages=steerages,
                batch_size=5,       # Smaller batch size for better control
                wait=1.0,          # Longer wait between batches
                base=base
            )
            
            # Save results for this keyword immediately
            output_file = f"data/{now_str()}_{keyword}.csv"
            experiments.to_csv(output_file, index=False)
            print(f'Saved results for {keyword} to {output_file}')
            
            # Add a longer delay between keywords to avoid overwhelming the API
            if keyword != moral_keywords[-1]:  # Don't wait after the last keyword
                await asyncio.sleep(0.01)
                
        except Exception as e:
            print(f'Error processing keyword "{keyword}": {str(e)}')
            # Continue to next keyword even if this one fails
            continue
        
        end_time = time.time()
        print(f'Time taken for {keyword} -> {end_time-start_time:.2f} seconds')

# Run the entire process
await process_keywords()

Running search and steering for features associated with "moral"



  0%|          | 0/8100 [00:00<?, ?it/s]Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...


In [None]:
TOO FAST
moral_keywords = ['moral', 'altruism', 'greater good', 'ethic', 'integrity', 'dignity']
import time
async def process_keywords():
    start_time = time.time()
    
    for keyword in moral_keywords:
        print(f'Running search and steering for features associated with "{keyword}"\n')
        
        try:
            # Get features and run experiments
            features = list((await client.features.search(keyword, model=base, top_k=10)))
            await asyncio.sleep(0.01)  # Minimal delay after feature search
            
            steerages = [-.5, -0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3, 0.5]
            
            experiments = await tabular_experiments(
                features=features,
                steerages=steerages,
                batch_size=5,    # Increased batch size
                wait=0.5,       # Minimal wait between batches
                base=base
            )
            
            # Save results
            output_file = f"data/{now_str()}_{keyword}.csv"
            experiments.to_csv(output_file, index=False)
            print(f'Saved results for {keyword} to {output_file}')
            
            # Minimal delay between keywords
            if keyword != moral_keywords[-1]:
                await asyncio.sleep(0.01)
                
        except Exception as e:
            print(f'Error processing keyword "{keyword}": {str(e)}')
            continue
        
        end_time = time.time()
        print(f'Time taken for {keyword} -> {end_time-start_time:.2f} seconds')

# Run the entire process
await process_keywords()

Running search and steering for features associated with "moral"



  2%|▏         | 197/8100 [01:23<50:06,  2.63it/s]  Rate limit exceeded. Attempting exponential backoff...
  2%|▏         | 199/8100 [01:24<58:51,  2.24it/s]Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...


## Features of interest

In [6]:
moral_keywords = ['moral', 'altruism', 'greater good', 'ethic', 'integrity', 'dignity']

# Experiments

In [7]:
import time

start_time = time.time()

for keyword in moral_keywords[:1]:
    print(f'Running search and steering for features associated with "{keyword}"\n')
    features = list((await client.features.search(keyword, model=base, top_k=10)))
    steerages = [-.5, -0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3, 0.5]
    experiments = await tabular_experiments(features, steerages)
    experiments.to_csv("data/" + now_str()+''.join(keyword)+".csv", index=False)
    end_time = time.time()
    print(f'Time take for {keyword} -> {end_time-start_time}')


Running search and steering for features associated with "moral"



  0%|          | 0/8100 [00:00<?, ?it/s]

Start time: 1735649704.747242
Counting create: 1
Start time: 1735649705.763426
Counting create: 2
Start time: 1735649706.782007
Counting create: 3
Start time: 1735649707.795696
Counting create: 4
Start time: 1735649708.820061
Counting create: 5
Start time: 1735649709.8348029
Counting create: 6
Start time: 1735649710.854866
Counting create: 7
Start time: 1735649711.87549
Counting create: 8
Start time: 1735649712.897356
Counting create: 9
Start time: 1735649713.923336
Counting create: 10
Start time: 1735649714.9340851
Counting create: 11
Start time: 1735649715.95287
Counting create: 12
Start time: 1735649716.9696069
Counting create: 13
Start time: 1735649717.989024
Counting create: 14
Start time: 1735649719.004817
Counting create: 15
Start time: 1735649720.0225651
Counting create: 16
Start time: 1735649721.0444002
Counting create: 17
Start time: 1735649722.062031
Counting create: 18
Start time: 1735649723.083165
Counting create: 19
Start time: 1735649724.098978
Counting create: 20
Start 

  1%|          | 53/8100 [01:37<45:06,  2.97it/s]   

Start time: 1735649802.761447
Counting create: 91
Start time: 1735649803.780353
Counting create: 92
Start time: 1735649804.7950232
Counting create: 93
Start time: 1735649805.8109848
Counting create: 94
Start time: 1735649806.8320708
Counting create: 95
Start time: 1735649807.8517241
Counting create: 96
Start time: 1735649808.867528
Counting create: 97
Start time: 1735649809.881768
Counting create: 98
Start time: 1735649810.89907
Counting create: 99
Start time: 1735649811.9242802
Counting create: 100
Start time: 1735649812.939115
Counting create: 101


  1%|          | 90/8100 [01:50<44:54,  2.97it/s]

Start time: 1735649813.957489
Counting create: 102
Start time: 1735649814.968564
Counting create: 103
Start time: 1735649815.996379
Counting create: 104
Start time: 1735649817.1462321
Counting create: 105
Start time: 1735649818.164465
Counting create: 106
Start time: 1735649819.185607
Counting create: 107
Start time: 1735649820.202647
Counting create: 108
Start time: 1735649821.213131
Counting create: 109
Start time: 1735649822.224926
Counting create: 110
Start time: 1735649823.243861
Counting create: 111
Start time: 1735649824.259247
Counting create: 112
Start time: 1735649825.277347
Counting create: 113
Start time: 1735649826.302099
Counting create: 114
Start time: 1735649827.326761
Counting create: 115
Start time: 1735649828.346424
Counting create: 116
Start time: 1735649829.3655722
Counting create: 117
Start time: 1735649830.38655
Counting create: 118
Start time: 1735649831.4145439
Counting create: 119
Start time: 1735649832.433107
Counting create: 120
Start time: 1735649833.447306

  2%|▏         | 158/8100 [03:16<45:07,  2.93it/s]  

Start time: 1735649901.18379
Counting create: 181
Start time: 1735649902.1959848
Counting create: 182
Start time: 1735649903.220341
Counting create: 183
Start time: 1735649904.234049
Counting create: 184
Start time: 1735649905.264619
Counting create: 185
Start time: 1735649906.2840478
Counting create: 186
Start time: 1735649907.295037
Counting create: 187
Start time: 1735649908.311945
Counting create: 188
Start time: 1735649909.324886
Counting create: 189
Start time: 1735649910.339565
Counting create: 190
Start time: 1735649911.356928
Counting create: 191
Start time: 1735649912.380766
Counting create: 192
Start time: 1735649913.400796
Counting create: 193


  2%|▏         | 180/8100 [03:30<45:00,  2.93it/s]

Start time: 1735649914.422774
Counting create: 194
Start time: 1735649915.444436
Counting create: 195
Start time: 1735649916.456343
Counting create: 196
Start time: 1735649917.476066
Counting create: 197
Start time: 1735649918.502822
Counting create: 198
Start time: 1735649919.5190442
Counting create: 199
Start time: 1735649920.53407
Counting create: 200
Start time: 1735649921.554322
Counting create: 201
Start time: 1735649922.5778
Counting create: 202
Start time: 1735649923.5915132
Counting create: 203
Start time: 1735649924.608872
Counting create: 204
Start time: 1735649925.6299782
Counting create: 205
Start time: 1735649926.650811
Counting create: 206
Start time: 1735649927.673485
Counting create: 207
Start time: 1735649928.686062
Counting create: 208
Start time: 1735649929.709719
Counting create: 209
Start time: 1735649930.7343302
Counting create: 210
Start time: 1735649931.752216
Counting create: 211
Start time: 1735649932.774039
Counting create: 212
Start time: 1735649933.7910311

Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit

End time: 1735650063.9061172
Difference: 141.32831716537476
End time: 1735650063.9222422
Difference: 145.41942024230957
End time: 1735650063.932897
Difference: 153.59333205223083
End time: 1735650063.94132
Difference: 146.46525382995605
End time: 1735650063.948194
Difference: 149.52541995048523
End time: 1735650063.954947
Difference: 155.64300203323364
End time: 1735650063.962423
Difference: 162.77863311767578
End time: 1735650063.9689522
Difference: 160.74861121177673
End time: 1735650063.975688
Difference: 158.71106886863708
End time: 1735650063.981984
Difference: 151.6012179851532
End time: 1735650063.9890392
Difference: 138.35906100273132
End time: 1735650064.0006452
Difference: 130.2096140384674
End time: 1735650064.0081239
Difference: 137.35731291770935
End time: 1735650064.015399
Difference: 157.73135113716125
End time: 1735650064.022648
Difference: 143.48857808113098
End time: 1735650064.03444
Difference: 159.800390958786
End time: 1735650064.0577128
Difference: 147.60136985778

Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit

End time: 1735650157.802164
Difference: 210.79254722595215


  2%|▏         | 201/8100 [07:35<6:39:10,  3.03s/it]

End time: 1735650159.096046
Difference: 215.1360161304474
End time: 1735650159.110804
Difference: 199.87337827682495
End time: 1735650159.1251788
Difference: 231.45169377326965
End time: 1735650159.144093
Difference: 249.8192069530487
End time: 1735650159.152899
Difference: 211.12816882133484
End time: 1735650159.1621592
Difference: 195.83620929718018
End time: 1735650159.177741
Difference: 182.60067296028137
End time: 1735650159.1847308
Difference: 191.78124284744263
End time: 1735650159.1920378
Difference: 198.93426990509033
End time: 1735650159.199533
Difference: 192.81953978538513
End time: 1735650159.205926
Difference: 193.84876990318298
End time: 1735650159.211914
Difference: 174.4940960407257
End time: 1735650159.2196622
Difference: 167.37478613853455
End time: 1735650159.2263281
Difference: 213.23353505134583
End time: 1735650159.2328389
Difference: 172.48516583442688
End time: 1735650159.239789
Difference: 173.5101249217987
End time: 1735650159.2521331
Difference: 188.78914809

  3%|▎         | 256/8100 [07:38<1:27:09,  1.50it/s]

Start time: 1735650162.986486
Counting create: 271
Start time: 1735650164.003204
Counting create: 272
Start time: 1735650165.02021
Counting create: 273
Start time: 1735650166.041892
Counting create: 274
Start time: 1735650167.062655
Counting create: 275
Start time: 1735650168.087739
Counting create: 276
Start time: 1735650169.1007118
Counting create: 277
Start time: 1735650170.116502
Counting create: 278
Start time: 1735650171.133239
Counting create: 279
Start time: 1735650172.1477141
Counting create: 280
Start time: 1735650173.163123
Counting create: 281


  3%|▎         | 270/8100 [07:50<1:26:59,  1.50it/s]

Start time: 1735650174.188281
Counting create: 282
Start time: 1735650175.205445
Counting create: 283
Start time: 1735650176.226401
Counting create: 284
Start time: 1735650177.243386
Counting create: 285
Start time: 1735650178.339068
Counting create: 286
Start time: 1735650179.362318
Counting create: 287
Start time: 1735650180.381631
Counting create: 288
Start time: 1735650181.405558
Counting create: 289
Start time: 1735650182.424188
Counting create: 290
Start time: 1735650183.444507
Counting create: 291
Start time: 1735650184.461208
Counting create: 292
Start time: 1735650185.4771159
Counting create: 293
Start time: 1735650186.4902499
Counting create: 294
Start time: 1735650187.510973
Counting create: 295
Start time: 1735650188.535072
Counting create: 296
Start time: 1735650189.555574
Counting create: 297
Start time: 1735650190.580471
Counting create: 298
Start time: 1735650191.5977569
Counting create: 299
Start time: 1735650192.62173
Counting create: 300
Start time: 1735650193.656126

  4%|▍         | 350/8100 [09:16<42:37,  3.03it/s]  

Start time: 1735650261.1987782
Counting create: 361
Start time: 1735650262.221389
Counting create: 362
Start time: 1735650263.25018
Counting create: 363
Start time: 1735650264.274431
Counting create: 364
Start time: 1735650265.291423
Counting create: 365
Start time: 1735650266.310904
Counting create: 366
Start time: 1735650267.3314838
Counting create: 367
Start time: 1735650268.354023
Counting create: 368
Start time: 1735650269.373666
Counting create: 369
Start time: 1735650270.394277
Counting create: 370
Start time: 1735650271.4096918
Counting create: 371
Start time: 1735650272.430738
Counting create: 372
Start time: 1735650273.4457998
Counting create: 373


  4%|▍         | 360/8100 [09:30<42:34,  3.03it/s]

Start time: 1735650274.474028
Counting create: 374
Start time: 1735650275.493385
Counting create: 375
Start time: 1735650276.657331
Counting create: 376
Start time: 1735650277.679071
Counting create: 377
Start time: 1735650278.7012181
Counting create: 378
Start time: 1735650279.724704
Counting create: 379
Start time: 1735650280.746916
Counting create: 380
Start time: 1735650281.769535
Counting create: 381
Start time: 1735650282.791321
Counting create: 382
Start time: 1735650283.812829
Counting create: 383
Start time: 1735650284.832397
Counting create: 384
Start time: 1735650285.857145
Counting create: 385
Start time: 1735650286.875694
Counting create: 386
Start time: 1735650287.891218
Counting create: 387
Start time: 1735650288.9090052
Counting create: 388
Start time: 1735650289.924479
Counting create: 389
Start time: 1735650290.943226
Counting create: 390
Start time: 1735650291.963145
Counting create: 391
Start time: 1735650292.984019
Counting create: 392
Start time: 1735650293.999889

Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit

End time: 1735650403.853787
Difference: 105.7674949169159
End time: 1735650403.869177
Difference: 84.32846403121948
End time: 1735650403.877783
Difference: 123.13086700439453
End time: 1735650403.886074
Difference: 128.39268898963928
End time: 1735650403.89357
Difference: 120.0807409286499
End time: 1735650403.9001431
Difference: 126.22107219696045
End time: 1735650403.906591
Difference: 72.14426183700562
End time: 1735650403.9131572
Difference: 97.65850639343262
End time: 1735650403.919304
Difference: 122.1497688293457
End time: 1735650403.9254148
Difference: 85.40685367584229
End time: 1735650403.9317849
Difference: 118.0746397972107
End time: 1735650403.9377651
Difference: 106.8729043006897
End time: 1735650403.946058
Difference: 119.11366105079651
End time: 1735650403.952249
Difference: 137.6413450241089
End time: 1735650403.958472
Difference: 134.58480596542358
End time: 1735650403.964417
Difference: 138.67299389839172
End time: 1735650403.97036
Difference: 114.04588103294373
End 

Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit

End time: 1735650560.1537971
Difference: 261.04986810684204
End time: 1735650560.165497
Difference: 249.82273983955383
End time: 1735650561.010116
Difference: 220.05215001106262
End time: 1735650561.0235589
Difference: 258.84732580184937
End time: 1735650561.033605
Difference: 237.4189202785492
End time: 1735650561.0439198
Difference: 223.15040278434753
End time: 1735650561.0523672
Difference: 287.6065673828125
End time: 1735650561.060095
Difference: 210.9074649810791
End time: 1735650561.067717
Difference: 240.510262966156
End time: 1735650561.074549
Difference: 282.3733308315277
End time: 1735650561.081082
Difference: 218.08247804641724
End time: 1735650561.0875788
Difference: 217.07078766822815
End time: 1735650561.093927
Difference: 247.69282007217407
End time: 1735650561.099872
Difference: 252.79780626296997
End time: 1735650561.10569
Difference: 259.9506530761719
End time: 1735650561.115793
Difference: 228.3264880180359
End time: 1735650561.122048
Difference: 244.66275095939636
E

  5%|▍         | 401/8100 [14:17<6:44:34,  3.15s/it]

End time: 1735650561.2118819
Difference: 257.0007698535919
End time: 1735650561.218091
Difference: 225.36000204086304
End time: 1735650561.224128
Difference: 235.57153797149658
End time: 1735650561.2303061
Difference: 209.03955507278442
End time: 1735650561.236592
Difference: 251.91669392585754
End time: 1735650561.2429361
Difference: 224.3678421974182
End time: 1735650561.248969
Difference: 248.86845207214355
End time: 1735650561.255109
Difference: 216.21152806282043
End time: 1735650561.2628632
Difference: 267.26297426223755
End time: 1735650561.269193
Difference: 234.6014609336853
End time: 1735650561.2768629
Difference: 231.55014395713806
End time: 1735650561.283542
Difference: 268.2995228767395
End time: 1735650561.289474
Difference: 212.1606478691101
End time: 1735650561.295378
Difference: 266.2722580432892
End time: 1735650561.3014138
Difference: 265.2548758983612
End time: 1735650561.307561
Difference: 256.07188606262207
End time: 1735650561.313756
Difference: 245.8730130195617

  5%|▌         | 431/8100 [14:19<3:26:58,  1.62s/it]

Start time: 1735650564.161193
Counting create: 451
Start time: 1735650565.1839979
Counting create: 452
Start time: 1735650566.197311
Counting create: 453
Start time: 1735650567.2226272
Counting create: 454
Start time: 1735650568.242489
Counting create: 455
Start time: 1735650569.269174
Counting create: 456
Start time: 1735650570.28702
Counting create: 457
Start time: 1735650571.306901
Counting create: 458
Start time: 1735650572.323704
Counting create: 459
Start time: 1735650573.349153
Counting create: 460


  6%|▌         | 450/8100 [14:30<3:26:27,  1.62s/it]

Start time: 1735650574.373638
Counting create: 461
Start time: 1735650575.4004438
Counting create: 462
Start time: 1735650576.427745
Counting create: 463
Start time: 1735650577.45188
Counting create: 464
Start time: 1735650578.464998
Counting create: 465
Start time: 1735650579.473695
Counting create: 466
Start time: 1735650580.4854798
Counting create: 467
Start time: 1735650581.505069
Counting create: 468
Start time: 1735650582.5224152
Counting create: 469
Start time: 1735650583.540593
Counting create: 470
Start time: 1735650584.564521
Counting create: 471
Start time: 1735650585.589492
Counting create: 472
Start time: 1735650586.610316
Counting create: 473
Start time: 1735650587.634908
Counting create: 474
Start time: 1735650588.655701
Counting create: 475
Start time: 1735650589.677537
Counting create: 476
Start time: 1735650590.6972969
Counting create: 477
Start time: 1735650591.706825
Counting create: 478
Start time: 1735650592.730022
Counting create: 479
Start time: 1735650593.74812

  6%|▋         | 511/8100 [15:57<1:02:04,  2.04it/s]

Start time: 1735650662.1940842
Counting create: 541
Start time: 1735650663.216369
Counting create: 542
Start time: 1735650664.26289
Counting create: 543
Start time: 1735650665.285155
Counting create: 544
Start time: 1735650666.299871
Counting create: 545
Start time: 1735650667.318648
Counting create: 546
Start time: 1735650668.3392901
Counting create: 547
Start time: 1735650669.357196
Counting create: 548
Start time: 1735650670.3809862
Counting create: 549
Start time: 1735650671.404254
Counting create: 550
Start time: 1735650672.425456
Counting create: 551
Start time: 1735650673.4497652
Counting create: 552


  7%|▋         | 540/8100 [16:10<1:01:50,  2.04it/s]

Start time: 1735650674.4747798
Counting create: 553
Start time: 1735650675.4998431
Counting create: 554
Start time: 1735650676.5245
Counting create: 555
Start time: 1735650677.546069
Counting create: 556
Start time: 1735650678.609377
Counting create: 557
Start time: 1735650679.63786
Counting create: 558
Start time: 1735650680.6631858
Counting create: 559
Start time: 1735650681.690043
Counting create: 560
Start time: 1735650682.714724
Counting create: 561
Start time: 1735650683.7303958
Counting create: 562
Start time: 1735650684.757874
Counting create: 563
Start time: 1735650685.781794
Counting create: 564
Start time: 1735650686.8040771
Counting create: 565
Start time: 1735650687.8295522
Counting create: 566
Start time: 1735650688.8442519
Counting create: 567
Start time: 1735650689.868489
Counting create: 568
Start time: 1735650690.893437
Counting create: 569
Start time: 1735650691.919023
Counting create: 570
Start time: 1735650692.939271
Counting create: 571
Start time: 1735650693.9625

Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit

End time: 1735650784.488189
Difference: 115.1309928894043
End time: 1735650784.518423
Difference: 92.59940004348755
End time: 1735650784.532436
Difference: 61.95389008522034
End time: 1735650784.5397398
Difference: 86.47973394393921
End time: 1735650784.546664
Difference: 40.49812197685242
End time: 1735650784.5548532
Difference: 47.710177183151245
End time: 1735650784.5615401
Difference: 33.387301206588745
End time: 1735650784.5677462
Difference: 108.04324626922607
End time: 1735650784.573685
Difference: 64.02942204475403
End time: 1735650784.579558
Difference: 97.7754807472229
End time: 1735650784.585533
Difference: 45.66850996017456
End time: 1735650784.591379
Difference: 105.9820020198822
End time: 1735650784.597363
Difference: 82.45883202552795
End time: 1735650784.6031098
Difference: 36.4784209728241
End time: 1735650784.609136
Difference: 48.78778624534607
End time: 1735650784.6148942
Difference: 98.83310008049011
End time: 1735650784.620789
Difference: 39.55416703224182
End tim

Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit

End time: 1735650944.5096169
Difference: 255.66536498069763
End time: 1735650945.1205351
Difference: 276.78124499320984
End time: 1735650945.142671
Difference: 235.83762311935425
End time: 1735650945.152152
Difference: 218.49091410636902
End time: 1735650945.1601632
Difference: 255.29167413711548
End time: 1735650945.167648
Difference: 211.38094115257263
End time: 1735650945.174695
Difference: 210.3685278892517
End time: 1735650945.181205
Difference: 250.1942059993744
End time: 1735650945.1872818
Difference: 244.06778383255005
End time: 1735650945.1933079
Difference: 216.50781273841858
End time: 1735650945.199222
Difference: 251.23663902282715
End time: 1735650945.20482
Difference: 238.97036290168762
End time: 1735650945.210424
Difference: 217.5357689857483
End time: 1735650945.216107
Difference: 202.1891167163849
End time: 1735650945.221958
Difference: 237.96494483947754
End time: 1735650945.227712
Difference: 262.51298785209656
End time: 1735650945.2333992
Difference: 221.62945318222

  8%|▊         | 617/8100 [20:44<3:36:20,  1.73s/it]

Start time: 1735650948.8855958
Counting create: 631
Start time: 1735650949.911099
Counting create: 632
Start time: 1735650950.930696
Counting create: 633
Start time: 1735650951.954667
Counting create: 634
Start time: 1735650952.978604
Counting create: 635
Start time: 1735650953.9972842
Counting create: 636
Start time: 1735650955.0213141
Counting create: 637
Start time: 1735650956.038489
Counting create: 638
Start time: 1735650957.062189
Counting create: 639
Start time: 1735650958.080021
Counting create: 640
Start time: 1735650959.099371
Counting create: 641
Start time: 1735650960.12287
Counting create: 642
Start time: 1735650961.135224
Counting create: 643
Start time: 1735650962.1476228
Counting create: 644
Start time: 1735650963.1709728
Counting create: 645


  8%|▊         | 630/8100 [21:00<3:35:57,  1.73s/it]

Start time: 1735650964.188132
Counting create: 646
Start time: 1735650965.210199
Counting create: 647
Start time: 1735650966.237587
Counting create: 648
Start time: 1735650967.261504
Counting create: 649
Start time: 1735650968.285863
Counting create: 650
Start time: 1735650969.306187
Counting create: 651
Start time: 1735650970.3265522
Counting create: 652
Start time: 1735650971.344859
Counting create: 653
Start time: 1735650972.3699682
Counting create: 654
Start time: 1735650973.3937652
Counting create: 655
Start time: 1735650974.414159
Counting create: 656
Start time: 1735650975.439115
Counting create: 657
Start time: 1735650976.459218
Counting create: 658
Start time: 1735650977.484729
Counting create: 659
Start time: 1735650978.515769
Counting create: 660
Start time: 1735650979.540255
Counting create: 661
Start time: 1735650980.56413
Counting create: 662
Start time: 1735650981.589964
Counting create: 663
Start time: 1735650982.610209
Counting create: 664
Start time: 1735650983.632271

  9%|▉         | 720/8100 [22:22<37:17,  3.30it/s]  

Start time: 1735651047.1005719
Counting create: 721
Start time: 1735651048.1122382
Counting create: 722
Start time: 1735651049.127852
Counting create: 723
Start time: 1735651050.153815
Counting create: 724
Start time: 1735651051.168376
Counting create: 725
Start time: 1735651052.187147
Counting create: 726
Start time: 1735651053.2162619
Counting create: 727
Start time: 1735651054.239624
Counting create: 728
Start time: 1735651055.2625952
Counting create: 729
Start time: 1735651056.28471
Counting create: 730
Start time: 1735651057.307782
Counting create: 731
Start time: 1735651058.3230321
Counting create: 732
Start time: 1735651059.34031
Counting create: 733
Start time: 1735651060.361859
Counting create: 734
Start time: 1735651061.4827678
Counting create: 735
Start time: 1735651062.497971
Counting create: 736
Start time: 1735651063.528516
Counting create: 737


  9%|▉         | 720/8100 [22:40<37:17,  3.30it/s]

Start time: 1735651064.5496252
Counting create: 738
Start time: 1735651065.5733492
Counting create: 739
Start time: 1735651066.5963678
Counting create: 740
Start time: 1735651067.61628
Counting create: 741
Start time: 1735651068.629686
Counting create: 742
Start time: 1735651069.644039
Counting create: 743
Start time: 1735651070.66013
Counting create: 744
Start time: 1735651071.685824
Counting create: 745
Start time: 1735651072.711371
Counting create: 746
Start time: 1735651073.735928
Counting create: 747
Start time: 1735651074.7608168
Counting create: 748
Start time: 1735651075.7756748
Counting create: 749
Start time: 1735651076.799975
Counting create: 750
Start time: 1735651077.8236558
Counting create: 751
Start time: 1735651078.845574
Counting create: 752
Start time: 1735651079.866115
Counting create: 753
Start time: 1735651080.877057
Counting create: 754
Start time: 1735651081.89746
Counting create: 755
Start time: 1735651082.916553
Counting create: 756
Start time: 1735651083.93438

Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...


End time: 1735651149.247426
Difference: 90.92439389228821
End time: 1735651149.255726
Difference: 30.59109115600586
End time: 1735651149.263388
Difference: 100.13553595542908
End time: 1735651149.2706292
Difference: 87.78786134719849
End time: 1735651149.2777731
Difference: 86.77980208396912
End time: 1735651149.28432
Difference: 40.84831714630127
End time: 1735651149.290571
Difference: 77.60474705696106
End time: 1735651149.296567
Difference: 72.49659204483032
End time: 1735651149.302482
Difference: 89.96217179298401
End time: 1735651149.3080819
Difference: 64.36121702194214
End time: 1735651149.313748
Difference: 99.15993285179138
End time: 1735651149.3195038
Difference: 82.72313594818115
End time: 1735651149.3254771
Difference: 98.15710115432739
End time: 1735651149.331552
Difference: 67.43409204483032
End time: 1735651149.3376
Difference: 52.15179705619812
End time: 1735651149.343335
Difference: 85.81481885910034
End time: 1735651149.3490949
Difference: 58.28965783119202
End time: 

Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
  9%|▉         | 721/8100 [24:19<5:53:36,  2.88s/it]Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit exceeded. Attempting exponential backoff...
Rate limit ex

In [None]:
# Run baseline
if True:
    features = [None]
    steerages = [0]
    experiments = await tabular_experiments(features, steerages)
    experiments.to_csv("data/" + now_str()+".csv", index=False)

In [None]:
# Run some random features
if False:
    features = list(client.features.search("elephants", model=base, top_k=1)[0])
    steerages = [-0.8, -0.5, -0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3, 0.5, 0.8]
    personas = [0]
    experiments = tabular_experiments(features, steerages, personas)
    experiments.to_csv("data/" + now_str()+".csv", index=False)

In [None]:
# persona test
if False:
    features = list(client.features.search("moral", model=base, top_k=5)[0])
    steerages = [0]
    persona_tags = ['nationalities', 'ages', 'genders']
    for i, personas in enumerate([nationalities, ages, genders]):
        experiments = tabular_experiments(features[:1], steerages, personas)
        experiments.to_csv("data/" + now_str()+persona_tags[i]+".csv", index=False)

In [None]:
import time
# keywords
#'overall impact','duty', 'dignity', 'greater good', git 
if False:
    for keyword in [#'obligation','ethic']: # 'dignity', 'greater good',
        'obligation','ethic']:
        print(f'Running search and steering for features associated with "{keyword}"\n')
        features = client.features.search(keyword, model=base)[0][:20]
        steerages = [-.5, -0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3, 0.5]
        experiments = tabular_experiments(features, steerages, personas=None, wait=1.5, base=base)
        experiments.to_csv("data/" + now_str()+''.join(keyword)+".csv", index=False)
        time.sleep(2)

In [None]:
from itertools import batched
if False:
    for feature_ids in batched(range(0, feature_count), 20):
        features = client.features.lookup(list(feature_ids), model=base)
        print(features)


In [None]:
# Experiment with logits
if False:
    logits = await client.chat.logits(
        messages=[
            {"role": "user", "content": "A random number between 0 and 9 is "}
        ],
        model="meta-llama/Llama-3.3-70B-Instruct",
        filter_vocabulary=list('0123456789')
    )
    print(logits.logits) 
    probs = dict(zip(logits.logits.keys(), softmax(np.array(list(logits.logits.values())))))
    print(probs)