### Imports

In [12]:
from getting_examples import *
import pprint
from predict_activations import *
from utils import *

### Run an experiment

In [14]:
with open(f'feats.json', 'r') as file:
    feature_data = json.load(file)

results = run_experiments(
    num_features=3, 
    feature_data=feature_data,
    test_pos=3, # Experiment with
    test_neg=3, # Experiment with
    show_pos=0, # Experiment with
    show_neg=0, # Experiment with
    neg_type='others', # Experiment with
    binary_class=True, # Experiment with
    show_max_token=True, # Experiment with
    num_completions=1, # Experiment with
    debug=False, 
    randomize_pos=False, 
    seed=42,
)

# the run_experiments function automatically saves results to results/exp_{timestamp}.json
pprint.pprint(results)



{'hyperparameters': {'binary_class': True,
                     'debug': False,
                     'neg_type': 'others',
                     'num_completions': 1,
                     'randomize_pos': False,
                     'seed': 42,
                     'show_max_token': True,
                     'show_neg': 0,
                     'show_pos': 0,
                     'test_neg': 3,
                     'test_pos': 3},
 'num_features': 3,
 'results': [{'description': 'timestamps or dates in a specific format',
              'feature_index': 521,
              'gpt_predictions': [(1, 0.0),
                                  (1, 0.0),
                                  (1, 0.0),
                                  (0, 0.0),
                                  (0, 0.0),
                                  (0, 0.0)],
              'highest_activation': 51.19043350219727,
              'show_sentences': [],
              'test_sentences': [{'max_token': ' posted',
                       

### Load a past result file

In [15]:
json_data = load_json_results('results/exp_1715322008.339694.json')
pprint.pprint(json_data)

{'hyperparameters': {'binary_class': True,
                     'debug': False,
                     'neg_type': 'others',
                     'num_completions': 1,
                     'randomize_pos': True,
                     'seed': 42,
                     'show_max_token': False,
                     'show_neg': 0,
                     'show_pos': 0,
                     'test_neg': 3,
                     'test_pos': 3},
 'num_features': 1,
 'results': [{'description': 'phrases related to direct confrontation or '
                             'comparison',
              'feature_index': 3111,
              'gpt_predictions': [[1, 0.0],
                                  [1, 0.0],
                                  [1, 1.0],
                                  [0, 0.0],
                                  [0, 0.0],
                                  [0, 0.0]],
              'highest_activation': 44.37568664550781,
              'show_sentences': [],
              'test_sentences': [{'

### Get a simpler idea by printing the json tree (or just by opening a results file)

In [16]:
print_json_tree(json_data)


{
hyperparameters
    {
    test_pos
    test_neg
    show_pos
    show_neg
    binary_class
    neg_type
    show_max_token
    num_completions
    debug
    randomize_pos
    seed
    }

num_features
results []
        {
        feature_index
        gpt_predictions []
                .
                .
                .
            .
            .
            .
        description
        test_sentences []
                {
                max_value
                max_value_token_index
                sentence_string
                max_token
                tokens []
                    .
                    .
                    .
                values []
                    .
                    .
                    .
                }

            .
            .
            .
        show_sentences []
        highest_activation
        }

    .
    .
    .
timestamp
}


### Do analysis on loaded json_data

In [None]:
#TODO

In [3]:
save_json_results(fetch_feature_data(1), 'feat1.json')




### Older things

In [4]:
def get_predictions(feature_num):
    predictions = predict_activations(feature_num, test_number=10, show_examples=8)
    return predictions

def run():
    data = get_predictions(991) #806
    # for i in range(len(all_data)):
        # data = all_data[i]
        # print(feature_nums[i])
    print()
    pprint.pprint(data)
    custom = custom_accuracy(data)

    print(custom)

    # custom = [custom_accuracy(data, eps = 0.1) for data in all_data]

for _ in range(1):
    run()

TypeError: predict_activations() got an unexpected keyword argument 'test_number'

In [None]:
import matplotlib.pyplot as plt

### Losses
def mse(data, normalize = False):
    values = ([((elem[0]-elem[1])/(elem[0] if normalize else 1))**2 for elem in data])
    return sum(values)/len(values)

def nll_variant(data, eps = 1e-1):
    values = ([np.log((min(elem) + eps)/(max(elem) + eps)) for elem in data])
    return -sum(values)/len(values)

def l1(data, normalize = True, eps = 0.1):
    values = ([((eps + abs(elem[0]-elem[1]))/((max(elem) if normalize else 1) + eps))  for elem in data])
    return sum(values)/len(values)

### Plots
def plot_mses_cdf(mses):
    # Plotting the Mean Squared Errors (MSE) for each dataset
    mses_sorted = np.sort(mses)
    cdf = np.arange(1, len(mses_sorted)+1) / len(mses_sorted)
    plt.plot(mses_sorted, cdf)
    plt.title('Cumulative Distribution Function of MSEs')
    plt.xlabel('MSE')
    plt.ylabel('CDF')
    plt.grid(True)
    plt.show()

def plot_probability_distribution(data, bins='auto', density=True, title = "Default Title"):
    """
    Plots the probability distribution of the given data using a histogram.

    Parameters:
    - data (list or numpy array): The floating point numbers whose distribution you want to plot.
    - bins (int, sequence or str, optional): The method for calculating histogram bins. Default is 'auto'.
    - density (bool, optional): If True, the histogram is normalized to form a probability density,
                                i.e., the area under the histogram will sum to 1. Default is True.
    """
    # Calculate the histogram
    counts, bin_edges = np.histogram(data, bins=bins, density=density)

    # Calculate bin centers
    bin_centers = 0.5 * (bin_edges[:-1] + bin_edges[1:])

    # Plotting the histogram
    plt.figure(figsize=(8, 6))
    plt.bar(bin_centers, counts*np.diff(bin_edges), align='center', width=np.diff(bin_edges), edgecolor='black', alpha=0.7)
    plt.xlabel('Value')
    plt.ylabel('Probability Density')
    plt.title('Probability Distribution of Data')
    plt.title(title)
    plt.grid(True)
    plt.show()

def analyze_data(all_data):
    mses = [mse(data, normalize = False) for data in all_data]
    nlls = [nll_variant(data) for data in all_data]
    l1s = [l1(data, normalize = True) for data in all_data]

    print('l1s', sorted(l1s))
    plot_probability_distribution(mses, title = "Distribution of MSEs")
    plot_probability_distribution(nlls, title = "Distribution of NLL variant")
    plot_probability_distribution(l1s, title = "Distribution of l1s variant")

In [None]:
feature_nums = [806]#random.sample(range(0, 1000), 10)

def get_predictions(feature_num):
    predictions = predict_activations(feature_num, test_number=10, show_examples=8)
    return predictions


with concurrent.futures.ThreadPoolExecutor() as executor:
    all_data = list(executor.map(get_predictions, feature_nums))

mses = [mse(data, normalize = False) for data in all_data]
nlls = [nll_variant(data) for data in all_data]
l1s = [l1(data, normalize = True) for data in all_data]

# print('l1s', sorted(l1s))
plot_probability_distribution(mses, title = "Distribution of MSEs")
plot_probability_distribution(nlls, title = "Distribution of NLL variant")
plot_probability_distribution(l1s, title = "Distribution of l1s variant")

NameError: name 'concurrent' is not defined

In [None]:
def custom_accuracy(data):
    eps = max([elem[0] for elem in data]) / 10
    values = []
    for elem in data:
        true, pred = elem
        ## Add eps to avoid zero case
        true, pred = true + eps, pred + eps
        # Scale values
        true, pred = true ** 0.75, pred ** 0.75
        # Calculate difference
        difference = abs(true - pred)
        # Take ratio
        error = difference / max(true, pred)
        
        accuracy = 1 - error
        values.append(accuracy)
    return sum(values)/len(values)

In [None]:
feature_nums

NameError: name 'feature_nums' is not defined

In [None]:
def run():
    data = get_predictions(806)
    # for i in range(len(all_data)):
        # data = all_data[i]
        # print(feature_nums[i])
    pprint.pprint(data)
    custom = custom_accuracy(data)
    print(custom)

    # custom = [custom_accuracy(data, eps = 0.1) for data in all_data]

for _ in range(1):
    run()

# plot_probability_distribution(custom, title = "Distribution of custom accuracy")

('You are evaluating an english description of an autoencoder feature. The '
 'description should correspond to sentences which result in high activation. '
 'The english description of the feature is: " past tense verbs"\n'
 'Here are 8 examples of sentences and their corresponding activations:\n'
 ' Example: " economy\'s cooled off enough, but it wasn\'t always so. Back in '
 'the mid", Activation: 19.96\n'
 'Example: " NL<|endoftext|>," Watts said.ĊĊRubio\'s disclosure sheds new '
 'light on his", Activation: 0.00\n'
 'Example: " in their NL<|endoftext|>," Watts said.ĊĊRubio\'s disclosure sheds '
 'new light", Activation: 0.00\n'
 'Example: "ĊĊRubio\'s disclosure sheds new light on his comments in October, '
 'when he", Activation: 0.00\n'
 'Example: " be sure to add a great feel and glitz to any game. These '
 'wonderful futuristic", Activation: 0.00\n'
 'Example: " their NL<|endoftext|>," Watts said.ĊĊRubio\'s disclosure sheds '
 'new light on", Activation: 0.00\n'
 'Example: " of

In [None]:
analyze_data(all_data)

TypeError: 'int' object is not subscriptable