# Setup (just run)

In [1]:
# Colab-specific setup

# !git clone https://github.com/AISC-Steering-LLMs/Steering-LLMs
# !pwd
# repo_path = '/content/repository/'


In [2]:
# Imports
import os
import pandas as pd
import main
from omegaconf import DictConfig, OmegaConf
import yaml
from hydra import initialize
from hydra.core.global_hydra import GlobalHydra
from hydra.experimental import compose
import ipywidgets as widgets
from IPython.display import display


  return torch._C._cuda_getDeviceCount() if nvml_count < 0 else nvml_count


In [3]:
# Initialize Hydra for configuration management
GlobalHydra.instance().clear()  # Clear any previous Hydra instance
initialize(config_path=".", job_name="experiment")


The version_base parameter is not specified.
Please specify a compatability version level, or None.
Will assume defaults for version 1.1
  initialize(config_path=".", job_name="experiment")


hydra.initialize()

## Helper Functions

In [4]:
def load_yaml_config(file_path):
    """Load a YAML configuration file."""
    with open(file_path, 'r') as file:
        return yaml.safe_load(file)
    
def create_form(config):
    """Create an interactive form for updating the configuration file."""
    form_items = []
    for key, value in config.items():
        # Choose the right widget based on the value's type
        widget_type = widgets.Checkbox if isinstance(value, bool) else widgets.IntText if isinstance(value, int) else widgets.FloatText if isinstance(value, float) else widgets.Text
        widget = widget_type(value=value, description=key)
        widget.layout = widgets.Layout(width='100%')
        widget.style.description_width='initial'
        form_items.append(widget)
    return widgets.VBox(form_items)

def update_config_and_save(btn, form):
    """Update the configuration file with values from the form."""
    updated_config = {widget.description: widget.value for widget in form.children}
    with open('config.yaml', 'w') as file:
        yaml.safe_dump(updated_config, file)
    print("Configuration updated and saved.")


# Experiment Setup

## Load existing configuration and edit if needed

In [5]:
# Load configuration and create interactive form
config = load_yaml_config('config.yaml')
form = create_form(config)
display(form)

# Create a button to save the configuration, pass the form to the event handler
save_button = widgets.Button(description="Save Configuration")
save_button.on_click(lambda btn: update_config_and_save(btn, form))
display(save_button)

VBox(children=(Text(value='Adding Hydra library for experiment configuration.', description='experiment_notes'…

Button(description='Save Configuration', style=ButtonStyle())

In [6]:
# Compose the final configuration from Hydra
cfg = compose(config_name="config")

# Load inputs and create output directories
prompts_dict = main.csv_to_dictionary(cfg.prompts_sheet)
experiment_base_dir, images_dir = main.create_output_directories()

# Save configurations and prompts
main.write_experiment_parameters(cfg, prompts_dict, experiment_base_dir)




## Model Initialization and Data Processing

In [7]:
# Initialize the model and populate the data
model = main.load_model(cfg)
activations_cache = main.populate_data(prompts_dict)

# Compute activations and add hidden states
main.compute_activations(model, activations_cache)
main.add_numpy_hidden_states(activations_cache)

Loaded pretrained model gpt2-small into HookedTransformer


## Visualization

In [None]:
# Generate and display visualizations
main.tsne_plot(activations_cache, images_dir)
main.pca_plot(activations_cache, images_dir)
main.raster_plot(activations_cache, images_dir)

## Save Results

In [None]:
# Save the activations cache if required by the configuration
if cfg.write_cache:
    main.save_activations_cache(activations_cache, experiment_base_dir)