# Requirements & imports

In [None]:
!pip install goodfire
!pip install bayesian-optimization

Collecting goodfire
  Downloading goodfire-0.2.32-py3-none-any.whl.metadata (1.2 kB)
Collecting ipywidgets<9.0.0,>=8.1.5 (from goodfire)
  Downloading ipywidgets-8.1.5-py3-none-any.whl.metadata (2.3 kB)
Collecting comm>=0.1.3 (from ipywidgets<9.0.0,>=8.1.5->goodfire)
  Downloading comm-0.2.2-py3-none-any.whl.metadata (3.7 kB)
Collecting widgetsnbextension~=4.0.12 (from ipywidgets<9.0.0,>=8.1.5->goodfire)
  Downloading widgetsnbextension-4.0.13-py3-none-any.whl.metadata (1.6 kB)
Collecting jedi>=0.16 (from ipython>=6.1.0->ipywidgets<9.0.0,>=8.1.5->goodfire)
  Downloading jedi-0.19.2-py2.py3-none-any.whl.metadata (22 kB)
Downloading goodfire-0.2.32-py3-none-any.whl (28 kB)
Downloading ipywidgets-8.1.5-py3-none-any.whl (139 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.8/139.8 kB[0m [31m11.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading comm-0.2.2-py3-none-any.whl (7.2 kB)
Downloading widgetsnbextension-4.0.13-py3-none-any.whl (2.3 MB)
[2K   [90m━━━━━━━━━━━━━

In [None]:
from dataset.load_dataset import load_dataset_by_challenge
from dataset.split_dataset import split_dataset
from dataset.contrastive_pairs import get_contrastive_pairs

from abstract_and_reason import solver_v1, utils

from pipeline.config import Config

import json
from tqdm import tqdm

from bayes_opt.logger import JSONLogger
from bayes_opt.event import Events
from bayes_opt import BayesianOptimization

from google.colab import userdata

# Training


In [None]:
GOODFIRE_API_KEY = userdata.get('GOODFIRE_API_KEY')
MODEL_PATH = 'meta-llama/Meta-Llama-3-8B-Instruct'
MAX_N = 20
MAX_M = 20

In [None]:
import goodfire

client = goodfire.Client(
    GOODFIRE_API_KEY
  )

# Instantiate a model variant
variant = goodfire.Variant(MODEL_PATH)
solver = solver_v1.Solver(model=None, long_prompt=False)

In [None]:
def generate_answer(model, prompt, max_completion_tokens=5000):
  answer = ''
  try:
    for token in client.chat.completions.create(
      [
        {"role": "user", "content": prompt}
      ],
      model=variant,
      stream=True,
      max_completion_tokens=max_completion_tokens,
    ):
      answer += token.choices[0].delta.content
  except Exception as e:
    print(e)

  return answer

In [70]:
def generate_and_evaluate_solutions(dataset, model, solver, output_path):
    correct_challenges, incorrect_challenges = [], []
    completions = []
    total_score = 0
    for challenge in tqdm(dataset):
        # Some challenges have more than one test
        answers_str = [generate_answer(model, question) for question in challenge['questions']]

        answers = solver.evaluate_answers(answers_str)

        try:
          score = utils.get_score(answers, challenge['correct_answers'])
        except ValueError:
          score = 0

        if score == 1:
            correct_challenges.append(challenge)
        else:
            incorrect_challenges.append(challenge)

        del challenge['correct_answers']
        challenge['answers'] = answers_str
        challenge['score'] = score

        completions.append(challenge)

        total_score += score

    with open(output_path, "w") as f:
            json.dump(completions, f, indent=4)


    return total_score,correct_challenges, incorrect_challenges

In [None]:
    small_challenges_train = utils.get_tiny_arc(solver.training_challenges, max_n=MAX_N, max_m=MAX_M)
    train_dataset = load_dataset_by_challenge(small_challenges_train, solver=solver)
    total_score, correct_challenges, incorrect_challenges = generate_and_evaluate_solutions(train_dataset, variant, solver,
                                                                               'completions_train_baseline_goodfire.json')
    print(f'\n--------Training-------------')
    print(f'Correct challenges: {len(correct_challenges)}')
    print(f'Incorrect challenges: {len(incorrect_challenges)}')
    print(f'Total score: {total_score}/{len(small_challenges_train)}')

  3%|▎         | 3/108 [01:46<1:19:18, 45.32s/it]




 39%|███▉      | 42/108 [08:16<16:03, 14.60s/it]

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<HTML><HEAD><META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">
<TITLE>ERROR: The request could not be satisfied</TITLE>
</HEAD><BODY>
<H1>504 ERROR</H1>
<H2>The request could not be satisfied.</H2>
<HR noshade size="1px">
CloudFront attempted to establish a connection with the origin, but either the attempt failed or the origin closed the connection.
We can't connect to the server for this app or website at this time. There might be too much traffic or a configuration error. Try again later, or contact the app or website owner.
<BR clear="all">
If you provide content to customers through CloudFront, you can find steps to troubleshoot and help prevent this error by reviewing the CloudFront documentation.
<BR clear="all">
<HR noshade size="1px">
<PRE>
Generated by cloudfront (CloudFront)
Request ID: tkL3lXC61fJqERzL009Z87GfUrWPc8l8mXKl402n3tB1OjLkBiciwg==
</PRE>
<A

100%|██████████| 108/108 [20:44<00:00, 11.52s/it]


--------Training-------------
Correct challenges: 10
Incorrect challenges: 98
Total score: 10/108





# Select relevant features

In [None]:
correct_challenges_ids = [challenge['category'] for challenge in correct_challenges]
correct_challenges_ids

['5582e5ca',
 '62c24649',
 '67e8384a',
 '6fa7a44f',
 '7b7f7511',
 '8be77c9e',
 'a416b8f3',
 'd037b0a7',
 'd4469b4b',
 'e9afcf9a']

In [None]:
correct_challenges_prompts = []
for challenge in train_dataset:
    if challenge['category'] in correct_challenges_ids:
        correct_challenges_prompts += challenge['questions']

In [None]:
def get_most_active(model, prompt, k=20):
  return client.features.inspect(
    [
        {
            "role": "user",
            "content": prompt
        },

    ],
    model=model,
  ).top(k=k)

In [None]:
most_active = {}
for propmt in correct_challenges_prompts:
  for feature_and_activation in get_most_active(variant, propmt):
    feature = feature_and_activation.feature
    activation = feature_and_activation.activation

    if feature.uuid not in most_active:
      most_active[feature.uuid] = {'sum' : activation, 'count': 1}
    else:
      most_active[feature.uuid]['sum'] += activation
      most_active[feature.uuid]['count'] += 1

In [None]:
top_features = sorted(most_active.items(), key=lambda item: item[1]['count'], reverse=True)[:10]
#top_features = sorted(most_active.items(), key=lambda item: item[1]['sum'], reverse=True)[:10]

In [None]:
top_features_ids = [f[0] for f in top_features]
selected_features = client.features.list(top_features_ids)

In [None]:
selected_features

FeatureGroup([
   0: "Delimiter tokens in structured data (commas and tabs)",
   1: "Binary digit detection, especially '1' (true/on state)",
   2: "Numerical patterns and sequences in structured data",
   3: "Comma as a separator in lists, arrays, and numerical data",
   4: "Matrix and vector notation in mathematical expressions",
   5: "Comma separator in lists, code, and text",
   6: "Quaternion to matrix conversion calculations",
   7: "Array and matrix formatting tokens in code outputs",
   8: "NumPy array output formatting",
   9: "Matrix operations in mathematics and programming"
])

# Optimize & Test

In [None]:
    small_challenges_test = utils.get_tiny_arc(solver.evaluation_challenges, max_n=MAX_N, max_m=MAX_M)
    test_dataset = load_dataset_by_challenge(small_challenges_test, solver, solver.evaluation_challenges, solver.evaluation_solutions)

In [79]:
    variant.reset()
    test_dataset = load_dataset_by_challenge(small_challenges_test, solver, solver.evaluation_challenges, solver.evaluation_solutions)
    total_score, correct_challenges, incorrect_challenges = generate_and_evaluate_solutions(test_dataset, variant, solver,
                                                                               'completions_test_baseline_goodfire.json')
    print(f'\n--------Test baseline-------------')
    print(f'Correct challenges: {len(correct_challenges)}')
    print(f'Incorrect challenges: {len(incorrect_challenges)}')
    print(f'Total score: {total_score}/{len(small_challenges_test)}')

100%|██████████| 48/48 [09:31<00:00, 11.90s/it]


--------Test baseline-------------
Correct challenges: 4
Incorrect challenges: 44
Total score: 4/48





In [74]:
def get_total_score(activation0, activation1, activation2, activation3, activation4, activation5, activation6, activation7, activation8, activation9 ):
   variant.reset()
   test_dataset = load_dataset_by_challenge(small_challenges_test, solver, solver.evaluation_challenges, solver.evaluation_solutions)

   variant.set(selected_features[0], activation0)
   variant.set(selected_features[1], activation1)
   variant.set(selected_features[2], activation2)
   variant.set(selected_features[3], activation3)
   variant.set(selected_features[4], activation4)
   variant.set(selected_features[5], activation5)
   variant.set(selected_features[6], activation6)
   variant.set(selected_features[7], activation7)
   variant.set(selected_features[8], activation8)
   variant.set(selected_features[9], activation9)

   total_score, _, _ = generate_and_evaluate_solutions(test_dataset, variant, solver,
              f'completions_test_goodfire_{activation0}-{activation1}-{activation2}-{activation3}-{activation4}-{activation5}-{activation6}-{activation7}-{activation8}-{activation9}.json')

   return total_score

In [75]:
pbounds = {'activation' + str(i): (-1, 1) for i in range(10)}


optimizer = BayesianOptimization(
    f=get_total_score,
    pbounds=pbounds,
    random_state=1,
)

In [76]:
logger = JSONLogger(path="./logs.log")
optimizer.subscribe(Events.OPTIMIZATION_STEP, logger)

In [92]:
optimizer.maximize(
    init_points=10,
    n_iter=30,
)

  2%|▏         | 1/48 [01:30<1:11:10, 90.86s/it]


KeyboardInterrupt: 

In [97]:
test_dataset = load_dataset_by_challenge(small_challenges_test, solver, solver.evaluation_challenges, solver.evaluation_solutions)
len(test_dataset)
total_score, _, _ = generate_and_evaluate_solutions(test_dataset, variant, solver, 'c.json')

  0%|          | 0/48 [00:47<?, ?it/s]


KeyboardInterrupt: 

In [None]:
    variant.reset()
    test_dataset = load_dataset_by_challenge(small_challenges_test, solver, solver.evaluation_challenges, solver.evaluation_solutions)
    total_score, correct_challenges, incorrect_challenges = generate_and_evaluate_solutions(test_dataset, variant, solver,
                                                                               'completions_test_baseline_goodfire.json')

  2%|▏         | 1/48 [00:07<05:32,  7.07s/it]