In [None]:
!pip install PopulationLM@git+https://github.com/JesseTNRoberts/PopulationLM
!pip install minicons

Collecting PopulationLM@ git+https://github.com/JesseTNRoberts/PopulationLM
  Cloning https://github.com/JesseTNRoberts/PopulationLM to /tmp/pip-install-1jscpfco/populationlm_cce4f9c569d74a6a97e2287a37452e0e
  Running command git clone --filter=blob:none --quiet https://github.com/JesseTNRoberts/PopulationLM /tmp/pip-install-1jscpfco/populationlm_cce4f9c569d74a6a97e2287a37452e0e
  Resolved https://github.com/JesseTNRoberts/PopulationLM to commit f5f3261716599d0678683ac28723d2544bad4daa
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: PopulationLM
  Building wheel for PopulationLM (setup.py) ... [?25l[?25hdone
  Created wheel for PopulationLM: filename=PopulationLM-0.1-py3-none-any.whl size=3775 sha256=087a4daf478b6c66f50f244e2c700c551766afa413f38a00f11450925b883410
  Stored in directory: /tmp/pip-ephem-wheel-cache-5r4jn9jd/wheels/76/d3/b7/994b47d195d749b9752de5e275517c30ab4ef097e508ec8f2f
Successfully built PopulationLM
Installing collected

In [None]:
import argparse
import csv
import itertools
from tqdm import tqdm
import pandas as pd
import os

import torch
from transformers import AutoModelForMaskedLM, AutoModelForCausalLM
from torch.utils.data import DataLoader

from minicons import scorer
import PopulationLM as pop

In [None]:
#Change to be more elegant by recursively searching for LlamaMLP instead of using nested fors
#If doing so, make sure it works (i.e. we don't have any pass-by-)

def add_dropout_layers(model):
  for layer in model.children():
    if layer._get_name() == 'LlamaModel':
      for sublayer in layer.children():
        if sublayer._get_name() == 'ModuleList':
          for subsublayer in sublayer.children():
            if subsublayer._get_name() == 'LlamaDecoderLayer':
              for subsubsublayer in subsublayer.children():
                if subsubsublayer._get_name() == 'LlamaMLP':
                  for name, subsubsubsublayer in subsubsublayer.named_children():
                    if subsubsubsublayer._get_name() == 'Linear':
                      new = torch.nn.Sequential(subsubsubsublayer, torch.nn.Dropout(p=0,))
                      setattr(subsubsublayer, name, new)
    return model

In [None]:
def run_experiment(exp_path,
                   base_model_name='bert-base-uncased',
                   ow_model_loc=None,
                   results_loc=None,
                   batch_size=10,
                   num_batches=-1,
                   committee_size=50,
                   device='cuda',
                   lm_type='masked',
                   add_dropout=False):
  dataset = []
  with open(exp_path + '/prompts.csv', "r") as f:
      reader = csv.DictReader(f, delimiter='|')
      column_names = reader.fieldnames
      for row in reader:
          dataset.append(list(row.values()))

  # Load the model
  if lm_type == "masked" or lm_type == "mlm":
      transformer = scorer.MaskedLMScorer(base_model_name, device)
  elif lm_type == "incremental" or lm_type == "causal":
      transformer = scorer.IncrementalLMScorer(base_model_name, device)

  #Overwrite local model with base model (handles local loading limitation in minicons)
  if ow_model_loc is not None:
      model_name = os.path.basename(os.path.normpath(ow_model_loc))
      if lm_type in ['mlm', 'masked']:
          overwrite_model = AutoModelForMaskedLM.from_pretrained(ow_model_loc, local_files_only=True)
      else:
          overwrite_model = AutoModelForCausalLM.from_pretrained(ow_model_loc, local_files_only=True)
      overwrite_model.to(device)
      transformer.model = overwrite_model
  else:
      model_name = base_model_name

  model_name = model_name.replace("/", "_")

  if results_loc is None:
      results_loc = exp_path + f"/results_{model_name}.csv"

  if add_dropout:
      transformer.model = add_dropout_layers(transformer.model)

  # convert the internal model to use MC Dropout
  pop.DropoutUtils.convert_dropouts(transformer.model)
  pop.DropoutUtils.activate_mc_dropout(transformer.model, activate=True, random=0.1)

  results = []
  control_results = []
  conclusion_only = []

  column_names += ["dv_prob"]
  with open(results_loc, "w", newline='') as f:
      writer = csv.writer(f, delimiter='|')
      writer.writerow(column_names)

  # create a lambda function alias for the method that performs classifications
  call_me = lambda p1, q1: transformer.conditional_score(p1, q1, reduction=lambda x: (x.sum(0).item(), x.mean(0).item(), x.tolist()))

  stimuli_loader = DataLoader(dataset, batch_size = batch_size, num_workers=0)
  if num_batches < 0:
      num_batches = len(stimuli_loader)
  for batch in tqdm(stimuli_loader):
      out_dataset = [[] for _ in range(len(batch))]
      dv_scores = []
      for i in range(len(batch)):
          out_dataset[i].extend(batch[i])

      results = {'dv_prob': []}
      p_list = list(batch[0])
      dv_list = list(batch[5])

      population = pop.generate_dropout_population(transformer.model, lambda: call_me(p_list, dv_list), committee_size=committee_size)
      outs = [item for item in pop.call_function_with_population(transformer.model, population, lambda: call_me(p_list, dv_list))]

      transposed_outs = [[row[i] for row in outs] for i in range(len(outs[0]))]

      dv_scores = [score for score in transposed_outs]

      results['dv_prob'].extend(dv_scores)

      out_dataset.append(results['dv_prob'])
      with open(results_loc, "a", newline='') as f:
          writer = csv.writer(f, delimiter='|')
          writer.writerows(list(zip(*out_dataset)))

  print('Results saved to: ', results_loc)

In [None]:
def run_all_experiments(model, lm_type, batch_size=10, add_dropout=False, drive_loc='/content/drive/MyDrive/Research/Alignment_Testing/prompts/', ow_model_loc=None):
    experiments = [
                   'Exp1-basic',
                   'Exp2-DG',
                   'Exp3-PGG',
                   'Exp4-CYD',
                   'Exp5-FAA',
                   'Exp6-WM'
                  ]
    for exp in experiments:
        print(f'Running experiment: {exp}')
        run_experiment(drive_loc + exp,
                       base_model_name=model,
                       ow_model_loc=ow_model_loc,
                       batch_size=24,
                       committee_size=50,
                       device='cuda',
                       lm_type=lm_type,
                       add_dropout=add_dropout)

In [None]:
# run_all_experiments('openai-community/gpt2', 'incremental', batch_size=24)
# run_all_experiments('PKU-Alignment/alpaca-7b-reproduced', 'incremental', batch_size=24, add_dropout=True)
run_all_experiments('PKU-Alignment/beaver-7b-v1.0', 'incremental', batch_size=24, add_dropout=True)
# run_all_experiments('bert-base-uncased', 'masked', batch_size=24,
#                     ow_model_loc='/content/drive/MyDrive/Research/Alignment_Testing/local_models/debiased_model_bert-base-uncased_race/')
# run_all_experiments('bert-base-uncased', 'masked', batch_size=24,
#                     ow_model_loc='/content/drive/MyDrive/Research/Alignment_Testing/local_models/debiased_model_bert-base-uncased_gender/')
# run_all_experiments('bert-base-uncased', 'masked', batch_size=24)


Running experiment: Exp1-basic


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/725 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/21.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/435 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/507 [00:00<?, ?B/s]

pytorch_model.bin.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

pytorch_model-00002-of-00002.bin:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

  return self.fget.__get__(instance, owner)()


generation_config.json:   0%|          | 0.00/136 [00:00<?, ?B/s]

100%|██████████| 25/25 [12:30<00:00, 30.03s/it]


Results saved to:  /content/drive/MyDrive/Research/Alignment_Testing/prompts/Exp1-basic/results_PKU-Alignment_beaver-7b-v1.0.csv
Running experiment: Exp2-DG


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

100%|██████████| 50/50 [51:55<00:00, 62.31s/it]


Results saved to:  /content/drive/MyDrive/Research/Alignment_Testing/prompts/Exp2-DG/results_PKU-Alignment_beaver-7b-v1.0.csv
Running experiment: Exp3-PGG


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

100%|██████████| 33/33 [55:04<00:00, 100.15s/it]


Results saved to:  /content/drive/MyDrive/Research/Alignment_Testing/prompts/Exp3-PGG/results_PKU-Alignment_beaver-7b-v1.0.csv
Running experiment: Exp4-CYD


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

100%|██████████| 25/25 [26:05<00:00, 62.63s/it]


Results saved to:  /content/drive/MyDrive/Research/Alignment_Testing/prompts/Exp4-CYD/results_PKU-Alignment_beaver-7b-v1.0.csv
Running experiment: Exp5-FAA


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

100%|██████████| 25/25 [15:53<00:00, 38.16s/it]


Results saved to:  /content/drive/MyDrive/Research/Alignment_Testing/prompts/Exp5-FAA/results_PKU-Alignment_beaver-7b-v1.0.csv
Running experiment: Exp6-WM


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

100%|██████████| 25/25 [17:01<00:00, 40.87s/it]

Results saved to:  /content/drive/MyDrive/Research/Alignment_Testing/prompts/Exp6-WM/results_PKU-Alignment_beaver-7b-v1.0.csv





In [None]:
models = [
          # ('incremental', 'PKU-Alignment/beaver-7b-v1.0'),
          # ('incremental', 'PKU-Alignment/alpaca-7b-reproduced'),
          # ('masked', 'bert-base-uncased'),
          # ('masked', '../local_models/debiased_model_bert-base-uncased_gender'),
          # ('masked', '../local_models/debiased_model_bert-base-uncased_race')
         ]

for lm_type, model in models:
    run_all_experiments(model, lm_type)