In [None]:
!pip install PopulationLM@git+https://github.com/JesseTNRoberts/PopulationLM
!pip install -U accelerate
!pip install git+https://github.com/JesseTNRoberts/minicons_modded

In [None]:
import argparse
import csv
import itertools
from tqdm import tqdm
import pandas as pd
import os

import torch
from transformers import AutoModelForMaskedLM, AutoModelForCausalLM
from torch.utils.data import DataLoader

from minicons import scorer
import PopulationLM as pop

In [None]:
# Only necessary when running in Google Colab. Should only need to be run once per Google account
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Only necessary when running in Google Colab with huggingface api_token saved.
# Necessary for some models. Can be replaced by saving the token as a string in
# the my_token variable. Otherwise, this must run every time the notebook is loaded
from google.colab import userdata
my_token = userdata.get('hf_token')

In [3]:
#Adapted from add_dropout_layers in PopulationLM. Necessary for Falcon models until added to PopulationLM.
#TODO: check whether PopLM add_new_dropout layers has been updated to work with Falcon by default
def add_falcon_dropout_layers(
      model:torch.nn.Module, layer_name_to_replace='Linear', MLP_layer_names=['LlamaMLP', 'MistralMLP', 'MixtralBlockSparseTop2MLP', 'GemmaMLP', 'FalconMLP'], verbose=False
    ):
        for child in model.children():
          if child._get_name() in MLP_layer_names:
            pop.DropoutUtils.add_new_dropout_layers(child, layer_name_to_replace='FalconLinear')
            for subchild in child.children():
              if subchild._get_name() == 'FalconMLP':
                pop.DropoutUtils.convert_dropouts(subchild)
              if verbose:
                print('layer: ', child._get_name(), 'dropout added')
          else:
            add_falcon_dropout_layers(child, layer_name_to_replace=layer_name_to_replace, MLP_layer_names=MLP_layer_names, verbose=verbose)

In [None]:
#TODO: Possibly no longer necessary. Test with in-built pop.add_new_dropout_layers

def add_dropout_layers(model):
  for layer in model.children():
    if layer._get_name() == 'LlamaModel':
      for sublayer in layer.children():
        if sublayer._get_name() == 'ModuleList':
          for subsublayer in sublayer.children():
            if subsublayer._get_name() == 'LlamaDecoderLayer':
              for subsubsublayer in subsublayer.children():
                if subsubsublayer._get_name() == 'LlamaMLP':
                  for name, subsubsubsublayer in subsubsublayer.named_children():
                    if subsubsubsublayer._get_name() == 'Linear':
                      new = torch.nn.Sequential(subsubsubsublayer, torch.nn.Dropout(p=0,))
                      setattr(subsubsublayer, name, new)
    return model

In [2]:
def run_experiment(exp_path,
                  transformer,
                  model_name='bert-base-uncased',
                  results_loc=None,
                  batch_size=10,
                  num_batches=-1,
                  committee_size=50,
                  #  device='cuda',
                  #  lm_type='masked',
                  #  add_dropout=False,
                  #  falcon=False
                  ):
    dataset = []
    with open(exp_path + '/prompts.csv', "r") as f:
        reader = csv.DictReader(f, delimiter='|')
        column_names = reader.fieldnames
        for row in reader:
            dataset.append(list(row.values()))

    if "/" in model_name:
        model_name = model_name.replace("/", "_")

    if results_loc is None:
        results_loc = exp_path + f"/results_{model_name}.csv"

    results = []
    control_results = []
    conclusion_only = []

    column_names += ["dv_prob"]
    with open(results_loc, "w", newline='') as f:
        writer = csv.writer(f, delimiter='|')
        writer.writerow(column_names)

    # create a lambda function alias for the method that performs classifications
    call_me = lambda p1, q1: transformer.conditional_score(p1, q1, reduction=lambda x: (x.sum(0).item(), x.mean(0).item(), x.tolist()))

    stimuli_loader = DataLoader(dataset, batch_size = batch_size, num_workers=0)
    if num_batches < 0:
        num_batches = len(stimuli_loader)
    for batch in tqdm(stimuli_loader):
        out_dataset = [[] for _ in range(len(batch))]
        dv_scores = []
        for i in range(len(batch)):
            out_dataset[i].extend(batch[i])

        results = {'dv_prob': []}
        p_list = list(batch[0])
        dv_list = list(batch[5])

        population = pop.generate_dropout_population(transformer.model, lambda: call_me(p_list, dv_list), committee_size=committee_size)
        outs = [item for item in pop.call_function_with_population(transformer.model, population, lambda: call_me(p_list, dv_list))]

        transposed_outs = [[row[i] for row in outs] for i in range(len(outs[0]))]

        dv_scores = [score for score in transposed_outs]

        results['dv_prob'].extend(dv_scores)

        out_dataset.append(results['dv_prob'])
        with open(results_loc, "a", newline='') as f:
            writer = csv.writer(f, delimiter='|')
            writer.writerows(list(zip(*out_dataset)))

    del population

    print('\nResults saved to: ', results_loc)

In [3]:
def run_all_experiments(model,
                        lm_type,
                        batch_size=10,
                        add_dropout=False,
                        drive_loc='/content/drive/MyDrive/Research/Alignment_Testing/prompts/',
                        save_name=None,
                        falcon=False,
                        token=my_token):
    experiments = [
                   'Exp1-THS',
                   'Exp2-TLSP',
                   'Exp3-TLSN',
                   'Exp4-TRB',
                   'Exp5-PDLS',
                   'Exp6-PDHS',
                  ]

    base_model_name = model
    device='cuda'

    # Load the model
    if lm_type == "masked" or lm_type == "mlm":
        try:
          transformer = scorer.MaskedLMScorer(base_model_name,
                                              device=device,
                                              local_files_only=False,
                                              low_cpu_mem_usage=True,
                                              torch_dtype=torch.float16,
                                              device_map="auto",
                                              token=token)
        except:
          transformer = scorer.MaskedLMScorer(base_model_name, device=device, token=token)
    elif lm_type == "incremental" or lm_type == "causal":
        try:
          transformer = scorer.IncrementalLMScorer(base_model_name,
                                                   device=device,
                                                   local_files_only=False,
                                                   low_cpu_mem_usage=True,
                                                   torch_dtype=torch.float16,
                                                   device_map="auto",
                                                   token=token)
        except:
          transformer = scorer.IncrementalLMScorer(base_model_name, device=device, token=token)

    #Overwrite local model with base model (handles local loading limitation in minicons)
    if save_name is not None:
        model_name = save_name
    else:
        model_name = base_model_name

    if falcon:
        add_falcon_dropout_layers(transformer.model, MLP_layer_names=['FalconDecoderLayer'])
    elif add_dropout:
        pop.DropoutUtils.add_new_dropout_layers(transformer.model)

    # convert the internal model to use MC Dropout
    if not falcon:
      pop.DropoutUtils.convert_dropouts(transformer.model)
    pop.DropoutUtils.activate_mc_dropout(transformer.model, activate=True, random=0.1)

    for exp in experiments:
        print(f'Running experiment: {exp}')
        run_experiment(drive_loc + exp,
                       transformer,
                       model_name=model_name,
                       batch_size=24,
                       committee_size=30)

    del transformer
    torch.cuda.empty_cache()

In [None]:
# format: (model_name, model_type, save_name, falcon)
bert_race_loc = '/content/drive/MyDrive/Research/Alignment_Testing/local_models/debiased_model_bert-base-uncased_race/'
bert_gend_loc = '/content/drive/MyDrive/Research/Alignment_Testing/local_models/debiased_model_bert-base-uncased_gender/'
models = [
    ('bert-base-uncased',                   'masked',      'BERT',                 False)
    ('meta-llama/Llama-2-7b-hf',            'incremental', 'LLaMa2-7B',            False)
    ('meta-llama/Meta-Llama-3-8B',          'incremental', 'LLaMa3-8B',            False)
    ('meta-llama/Meta-Llama-3-8B-Instruct', 'incremental', 'LLaMa3-8B-Instruct',   False)
    ('openai-community/gpt2',               'incremental', 'GPT2',                 False)
    ('mistralai/Mistral-7B-v0.1',           'incremental', 'Mistral-7B',           False)
    ('openlm-research/open_llama_7b',       'incremental', 'OpenLLaMa',            False)
    ('PKU-Alignment/beaver-7b-v1.0',        'incremental', 'Beaver-7B',            False)
    ('PKU-Alignment/alpaca-7b-reproduced',  'incremental', 'Alpaca-7B',            False)
    ('tiiuae/falcon-7b',                    'incremental', 'Falcon-7B',            True )
    ('tiiuae/falcon-7b-instruct',           'incremental', 'Falcon-7B-instruct',   True )
    ('FacebookAI/roberta-base',             'masked',      'RoBERTa',              False)
    ('FacebookAI/roberta-large',            'masked',      'RoBERTa-large',        False)
    ('upstage/SOLAR-10.7B-v1.0',            'incremental', 'SOLAR-10.7B',          False)
    (bert_race_loc,                         'masked',      'debiased_bert-race',   False)
    (bert_gend_loc,                         'masked',      'debiased_bert-gender', False)
]

for mn, mt, sn, fal in models:
    run_all_experiments(mn, mt, batch_size = 24, save_name = sn, falcon = fal)