In [1]:
%run ./Preamble.ipynb

all_models = PM.check_for_existing_patterns("activations")
# Uncomment below lines to use specific model sets:
# all_models=PM.gpt2xl_models
# all_models=PM.gpt2_models
# all_models=['gpt2','gpt2-untrained_1','gpt2-untrained_1_weight_config_all']
print(all_models)

## load preprocessed data
ys_6 = PM.load_ys(compress_to_POS_6=True, v=0) # , use_cache=False)
Xss_6 = PM.load_Xss(all_models, compress_to_POS_6=True, v=0) # , use_cache=False)
 

['gpt2-xl', 'gpt2-xl-untrained_1', 'gpt2', 'gpt2-untrained_1', 'gpt2-untrained_2', 'gpt2-untrained_3', 'gpt2-untrained_4', 'gpt2-untrained_5', 'gpt2-untrained_6', 'gpt2-untrained_7', 'gpt2-untrained_8', 'gpt2-untrained_9', 'gpt2-untrained_1_weight_config_all', 'gpt2-untrained_2_weight_config_all', 'gpt2-untrained_3_weight_config_all', 'gpt2-untrained_4_weight_config_all', 'gpt2-untrained_5_weight_config_all', 'gpt2-untrained_6_weight_config_all', 'gpt2-untrained_7_weight_config_all', 'gpt2-untrained_8_weight_config_all', 'gpt2-untrained_9_weight_config_all']


loading mydatadict:   0%|          | 0/22 [00:00<?, ?it/s]

loading models:   0%|          | 0/21 [00:00<?, ?it/s]

In [2]:
exp = "exp2" # experiment folder code

In [3]:
from scipy.stats import zscore

def create_splits(n1=10, n2=8, n3=5):
    """
    Create randomized splits for training, validation, and test sets.

    Args:
        n1 (int): Number of training samples.
        n2 (int): Number of validation samples.
        n3 (int): Number of test samples.

    Returns:
        list: Sorted indices for each split.
    """
    rng = np.random.default_rng(1)
    n = n1 + n2 + n3
    splits = np.split(rng.permutation(n), (n1, n1+n2))
    return [np.sort(x) for x in splits]

def generate_tvt_inds(cat_inds, tvt_splits):
    """
    Generate indices for training, validation, and test splits based on categories.

    Args:
        cat_inds (list): List of indices for each category.
        tvt_splits (list): List of splits for training, validation, and test.

    Returns:
        list: Indices for training, validation, and test sets.
    """
    choices_inds = []
    for inds in cat_inds:
        rng = np.random.default_rng(1)
        choice = rng.choice(inds, sum(len(x) for x in tvt_splits), replace=False)
        choices_inds.append(choice)
    return ([np.sort(x.take(split, axis=0)) for x in choices_inds] for split in tvt_splits)

def reduce_Xss_6(Xss_6, X_inds_set):
    """
    Reduce Xss_6 activations to the selected indices for each set.

    Args:
        Xss_6 (dict): Dictionary of activations.
        X_inds_set (list): List of indices to reduce to.

    Returns:
        dict: Reduced activations dictionary.
    """
    Xss_6_set = {}
    for model, Xs_6 in Xss_6.items():
        Xss_6_set_i = {}
        for layer, X_6 in Xs_6.items():
            list_of_values = []
            for inds in X_inds_set:
                X_6i = X_6.take(inds, axis=0).T
                list_of_values.append(X_6i)
            Xss_6_set_i[layer] = list_of_values
        Xss_6_set[model] = Xss_6_set_i
    return Xss_6_set

# Helper function to return a default value
def def_value():
    def _def_value():
        return [None] * 6
    return defaultdict(_def_value)

# Prepare data splits and reduce activations
if len(list(Xss_6.keys())):
    num_train = 200
    num_valid = 100
    num_testt = 75
    num_all = num_train + num_valid + num_testt
    
    # Create splits for train, validation, and test sets
    tvt_splits = create_splits(num_train, num_valid, num_testt)
    
    # Categorize indices based on POS tags
    cat_inds = [[i for i, v in enumerate(ys_6["POS_7_id"] == x) if v] for x in range(6)]
    
    # Generate indices for train, validation, and test sets
    X_inds_train, X_inds_valid, X_inds_test = generate_tvt_inds(cat_inds, tvt_splits)
    
    # Combine all indices and reduce activations
    all_inds = [np.sort(list(x) + list(y) + list(z)) for x, y, z in zip(X_inds_train, X_inds_valid, X_inds_test)]
    Xss_6_all = reduce_Xss_6(Xss_6, all_inds)
    
    # Clean up original activations dictionary
    for k in list(Xss_6.keys()):
        del Xss_6[k]


# Normalize activations and split them into train, validation, and test sets
if len(list(Xss_6_all.keys())):
    Xss_6_train = defaultdict(def_value)
    Xss_6_valid = defaultdict(def_value)
    Xss_6_test = defaultdict(def_value)
    
    for model, Xs_6 in Xss_6_all.items():
        for layer, X_6 in Xs_6.items():
            X_6_concat = np.concatenate(X_6, axis=1)
            zscored = zscore(X_6_concat, axis=1)
            
            for i in range(6):
                i1, i2 = num_all * i, num_all * (i + 1)
                zscored_part = zscored[:, i1:i2]
                Xss_6_train[model][layer][i] = zscored_part[:, :num_train]
                Xss_6_valid[model][layer][i] = zscored_part[:, num_train:num_train + num_valid]
                Xss_6_test[model][layer][i] = zscored_part[:, num_train + num_valid:]



In [4]:
def get_master_activations_df(split_names=["Xss_6_train", "Xss_6_valid", "Xss_6_test"], splits=[Xss_6_train, Xss_6_valid, Xss_6_test]):
    """
    Create a master DataFrame for all activations across different splits.

    Args:
        split_names (list): List of split names.
        splits (list): List of activations splits.

    Returns:
        pd.DataFrame: Master DataFrame containing all activations.
    """
    data_list = []
    for split_name, split in zip(split_names, splits):
        for model, Xs_6 in tqdm(split.items()):
            for layer, X_6_splits in Xs_6.items():
                for cat_ind, X_6 in enumerate(X_6_splits):
                    for neuron_id, activations in enumerate(X_6):
                        data_list.append([split_name, model, layer, cat_ind, neuron_id, activations])

    columns = ["split_name", "model", "layer", "cat_ind", "neuron_id", "activations"]
    master_activations_df = pd.DataFrame(data_list, columns=columns)
    list_of_tuples = [tuple(x) for x in master_activations_df[["split_name", "model", "layer", "cat_ind", "neuron_id"]].values]
    master_activations_df = master_activations_df.set_index(pd.MultiIndex.from_tuples(list_of_tuples, names=("split_name", "model", "layer", "cat_ind", "neuron_id")))
    return master_activations_df

# Generate master activations DataFrame and save it
master_activations_df = get_master_activations_df()
master_activations_df.to_pickle(PM.directories[exp] / f"master_activations_df_quicksave.pkl")


  0%|          | 0/21 [00:00<?, ?it/s]

  0%|          | 0/21 [00:00<?, ?it/s]

  0%|          | 0/21 [00:00<?, ?it/s]

In [5]:
from scipy.stats import ranksums

def calc_all_rank_sums(Xss_6_train, use_cache=True):
    """
    Calculate rank sums for all layers and neurons in the training set.

    Args:
        Xss_6_train (dict): Activations from the training set.
        use_cache (bool, optional): Use cached results if available.

    Returns:
        pd.DataFrame: DataFrame containing rank sums for all layers and neurons.
    """
    def calc_layer_rank_sums(model, layer, layer_activations, use_cache=True):
        """
        Calculate rank sums for each layer's activations.
        """
        layer_save_name = f"ranksum_layers_{model=},{layer=}.csv"
        if use_cache and (layer_df := PM.load_data(exp, layer_save_name)) is not None:
            return layer_df
        neuron_dfs = []
        for neuron_idx in tqdm(range(len(layer_activations[0])), position=2, leave=False, mininterval=0.4):
            neuron_activations = [x[neuron_idx] for x in layer_activations]
            neuron_df = calc_neuron_rank_sums(model, layer, neuron_idx, neuron_activations, use_cache=use_cache)
            neuron_dfs.append(neuron_df)
        layer_df = pd.concat(neuron_dfs)
        PM.save_data(exp, layer_save_name, layer_df)
        return layer_df

    def calc_neuron_rank_sums(model, layer, neuron_idx, neuron_activations, use_cache=True):
        """
        Calculate rank sums for a specific neuron's activations.
        """
        neuron_save_name = f"ranksums_{model=},{layer=},{neuron_idx=}.csv"
        neuron_df = calc_rank_sums_for_neuron(model, layer, neuron_idx, neuron_activations)
        return neuron_df

    def calc_rank_sums_for_neuron(model, layer, neuron_idx, neuron_activations):
        """
        Perform rank sum tests between categories for a given neuron.
        """
        i_s, j_s, r_s, p_s = [], [], [], []
        cached_results = {i: {j: (None, None) for j in range(6) if not i == j} for i in range(6)}
        idx_of_max = np.argmax(np.mean(neuron_activations, axis=1))
        
        for i in range(6):
            for j in range(6):
                if i == j:
                    continue
                r, p = cached_results[i][j]
                if p is None:
                    if idx_of_max == i or idx_of_max == j:
                        r, p = tuple(ranksums(neuron_activations[i], neuron_activations[j]))
                    else:
                        r, p = (0, 1)  # same average, no certainty
                    cached_results[i][j] = (r, p)
                    cached_results[j][i] = (-r, p)
                i_s.append(i)
                j_s.append(j)
                r_s.append(r)
                p_s.append(p)
        
        res_df = pd.DataFrame.from_dict({
            "model": [model]*len(i_s),
            "layer": [layer]*len(i_s),
            "neuron_idx": [neuron_idx]*len(i_s),
            "cat1": i_s,
            "cat2": j_s,
            "r": r_s,
            "p": p_s
        })
        return res_df

    # Process rank sums for all models and layers
    layer_dfs = []
    for model, Xs in tqdm(Xss_6_train.items(), position=0, leave=True):
        for layer, neuron_activations in tqdm(Xs.items(), position=1, leave=False):
            layer_dfs.append(calc_layer_rank_sums(model, layer, neuron_activations, use_cache=use_cache))
    return pd.concat(layer_dfs)

# Load master activations DataFrame and calculate rank sums
master_activations_df = pd.read_pickle(PM.directories[exp] / f"master_activations_df_quicksave.pkl")
rank_sums_df = calc_all_rank_sums(Xss_6_train, use_cache=True)
PM.save_data(exp, f"rank_sums_df_quicksave.csv", rank_sums_df)


  0%|          | 0/21 [00:00<?, ?it/s]

  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

In [6]:
def calc_feature_selectivity_pmax(rank_sums_df):
    """
    Calculate feature selectivity based on the p-value max from the rank sums.

    Args:
        rank_sums_df (pd.DataFrame): DataFrame containing rank sums.

    Returns:
        pd.DataFrame: DataFrame containing feature selectivity values.
    """
    res_list = []
    idx_labels = ["model", "layer", "neuron_idx"]
    
    for idx, df in tqdm(rank_sums_df.groupby(idx_labels)):
        columns = idx_labels.copy()
        model, layer, neuron_idx = idx
        res_i = [*idx]
        
        for cat1, df2 in df.groupby("cat1"):
            res_i.append(df2["p"].max())
            columns.append(f"maxp_{cat1}")
            res_i.append(df2["r"].min())
            columns.append(f"minr_{cat1}")
        res_list.append(res_i)
    
    res = pd.DataFrame(res_list, columns=columns)
    return res[["model", "layer", "neuron_idx", "maxp_0", "maxp_1", "maxp_2", "maxp_3", "maxp_4", "maxp_5", "minr_0", "minr_1", "minr_2", "minr_3", "minr_4", "minr_5"]]

# Load rank sums and calculate feature selectivity
rank_sums_df = PM.load_data(exp, f"rank_sums_df_quicksave.csv")
FS_pmax_df = calc_feature_selectivity_pmax(rank_sums_df)
PM.save_data(exp, f"FS_pmax_df_quicksave.csv", FS_pmax_df)



  0%|          | 0/346496 [00:00<?, ?it/s]

In [7]:
def calc_feature_selectivity_df(FS_pmax_df, thresh=10**-3, only_largest=True):
    """
    Calculate feature selectivity and filter neurons based on a threshold.

    Args:
        FS_pmax_df (pd.DataFrame): DataFrame containing p-max values for feature selectivity.
        thresh (float, optional): P-value threshold for selectivity (default is 10^-3).
        only_largest (bool, optional): If True, only consider largest min_r values.

    Returns:
        pd.DataFrame: DataFrame with feature selectivity.
    """
    res_df = FS_pmax_df.copy()
    res_df['neuron_idx'] = res_df['neuron_idx'].astype(str)
    
    for i in range(6):
        if only_largest:
            res_df[f"sel_{i}"] = ((res_df[f"maxp_{i}"] < thresh) & (res_df[f"minr_{i}"] > 0)).replace({True: 1, False: 0})
        else:
            res_df[f"sel_{i}"] = (res_df[f"maxp_{i}"] < thresh).replace({True: 1, False: 0})
        res_df = res_df.drop(columns=[f"maxp_{i}", f"minr_{i}"])
    
    res_df['neuron_idx'] = res_df['neuron_idx'].astype(int)
    res_df["total_sels"] = res_df["sel_0"] + res_df["sel_1"] + res_df["sel_2"] + res_df["sel_3"] + res_df["sel_4"] + res_df["sel_5"]
    return res_df

# Load feature selectivity data and calculate selectivity
FS_pmax_df = PM.load_data(exp, f"FS_pmax_df_quicksave.csv")
FS_df = calc_feature_selectivity_df(FS_pmax_df, thresh=10**-3)
PM.save_data(exp, f"FS_df_quicksave.csv", FS_df)


In [8]:
def calc_selective_neurons_df(FS_df):
    """
    Calculate selectivity indices (FSI) for neurons based on activations.

    Args:
        FS_df (pd.DataFrame): DataFrame containing feature selectivity data.

    Returns:
        tuple: DataFrame of unique selective neurons and multi-selective neurons.
    """
    def calc_FSI(x, y):
        """
        Calculate the feature selectivity index (FSI) between two categories.
        """
        meanx = x.mean()
        meany = y.mean()
        stdx = x.std()
        stdy = y.std()
        stdave = (stdx**2 + stdy**2) / 2
        return (meanx - meany) / (stdave**0.5)
    
    def add_FSI_to_selective_neurons(unique_selective_neurons, split_name="Xss_6_train"):
        """
        Add FSI values to the selective neurons DataFrame.
        """
        FSIs = []
        for (model, layer, neuron_idx, cat_ind), df in tqdm(unique_selective_neurons.groupby(["model", "layer", "neuron_idx", "cat_ind"])):
            assert len(df) == 1, "should only have 1 matching value"
            selective_category = master_activations_df.loc[split_name, model, layer, cat_ind, neuron_idx]["activations"]
            other_categories = np.array([master_activations_df.loc[split_name, model, layer, i, neuron_idx]["activations"] for i in range(6) if i != cat_ind])
            FSIs.append(calc_FSI(selective_category, other_categories))
        return FSIs
    
    # Process feature selectivity for neurons
    selective_neurons = []
    for i, sel in enumerate(sels):
        sel_inds = FS_df[FS_df[sel] == 1][["model", "layer", "neuron_idx", "total_sels"]]
        sel_inds["cat_ind"] = i
        sel_inds = sel_inds[["model", "layer", "neuron_idx", "cat_ind", "total_sels"]]
        selective_neurons.append(sel_inds)
    
    selective_neurons = pd.concat(selective_neurons, ignore_index=True)
    multi_selective_neurons = selective_neurons[selective_neurons["total_sels"] != 1]
    unique_selective_neurons = selective_neurons[selective_neurons["total_sels"] == 1].drop(columns=["total_sels"])
    unique_selective_neurons["FSI"] = add_FSI_to_selective_neurons(unique_selective_neurons, split_name="Xss_6_train")
    unique_selective_neurons["FSI_valid"] = add_FSI_to_selective_neurons(unique_selective_neurons, split_name="Xss_6_valid")
    
    return unique_selective_neurons, multi_selective_neurons

# Load feature selectivity data and calculate selective neurons
FS_df = PM.load_data(exp, f"FS_df_quicksave.csv")
unique_selective_neurons, _ = calc_selective_neurons_df(FS_df)
PM.save_data(exp, f"unique_selective_neurons_quicksave.csv", unique_selective_neurons)

  0%|          | 0/93482 [00:00<?, ?it/s]

  0%|          | 0/93482 [00:00<?, ?it/s]