In [1]:
from datasets import load_dataset
import torch 
from random import randint, sample, seed
from multiprocessing import Pool,cpu_count,current_process
import numpy as np
import multi_task as mt
from transformers import AutoModelForCausalLM,AutoTokenizer, pipeline
import evaluate
import matplotlib.pyplot as plt
import gc 

In [1]:
def sample_prompts(dataset, num_examples=100):
    if num_examples <= len(dataset):
        num_examples=len(dataset)
    picks = []
    pick_indice=[]
    for _ in range(num_examples):
        pick = randint(0, len(dataset)-1)
        while pick in picks:
            pick = randint(0, len(dataset)-1)
        pick_indice.append(pick)
        picks.append(dataset[pick])
    return picks

In [None]:
toxicity_dataset=load_dataset("allenai/real-toxicity-prompts", split="train")
regard_dataset=load_dataset("AlexaAI/bold",split="train")
honest_dataset=load_dataset("MilaNLProc/honest","en_queer_nonqueer",split="honest")

regard=evaluate.load("regard","compare")
toxicity = evaluate.load("toxicity")
honest=evaluate.load("honest","en")

# Mixtral 7B

In [26]:
model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1", torch_dtype=torch.float16, trust_remote_code=True, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1", trust_remote_code=True)

Loading checkpoint shards:   0%|          | 0/19 [00:00<?, ?it/s]

## Mixtral 7B Toxicity

In [None]:
seed(1000)

test_sample= sample_prompts(toxicity_dataset,1000)
test_prompts = [p['prompt']['text'] for p in test_sample]
test_sections= np.array_split(test_prompts,cpu_count())

In [None]:
model_continuations=[]
argumnets=[[model,tokenizer,pbin,False] for pbin in test_sections]
with Pool(cpu_count(),maxtasksperchild=1) as p:
  for results in p.starmap(mt.mixtral_generate,argumnets):
    model_continuations=model_continuations+results

display(len(model_continuations))

In [None]:
mixtral_toxicty_rate=toxicity.compute(predictions=model_continuations, aggregation="ratio")["toxicity_ratio"]
mixtral_toxicty=toxicity.compute(predictions=model_continuations)["toxicity"]

## Mixtral 7B Regard

In [None]:
seed(2000)

female_bold=[b["prompts"][0] for b in regard_dataset if b["category"]=="American_actresses"]
male_bold=[b["prompts"][0] for b in regard_dataset if b["category"]=="American_actors"]

female_sample=sample_prompts(female_bold,500)
male_sample=sample_prompts(male_bold,500)

female_sections= np.array_split(female_sample,cpu_count())
male_section= np.array_split(male_sample,cpu_count())

In [None]:
female_continuations=[]
argumnets=[[model,tokenizer,pbin,False] for pbin in female_sections]
with Pool(cpu_count(),maxtasksperchild=1) as p:
  for results in p.starmap(mt.mixtral_generate,argumnets):
    female_continuations=female_continuations+results

display(len(female_continuations))

In [None]:
male_continuations=[]
argumnets=[[model,tokenizer,pbin,False] for pbin in male_section]
with Pool(cpu_count(),maxtasksperchild=1) as p:
  for results in p.starmap(mt.mixtral_generate,argumnets):
    male_continuations=male_continuations+results

display(len(male_continuations))

In [None]:
Mixtral_regard_diff=regard.compute(data=male_continuations,references=female_continuations)["regard_difference"]

## Mixtral Honest

In [None]:
seed(3000)

honest_queer=[h["template_masked"].replace(' [M].',"") for h in honest_dataset if h['category'].startswith('queer')]
honest_nonqueer=[h["template_masked"].replace(' [M].',"") for h in honest_dataset if h['category'].startswith('nonqueer')]

queer_sample=sample_prompts(honest_queer,75)
nonqueer_sample=sample_prompts(honest_nonqueer,75)

queer_sections= np.array_split(queer_sample,cpu_count())
nonqueer_section= np.array_split(nonqueer_sample,cpu_count())

In [None]:
queer_continuations=[]
argumnets=[[model,tokenizer,p,True] for p in queer_sections]
with Pool(cpu_count(),maxtasksperchild=1) as p:
  for results in p.starmap(mt.mixtral_generate,argumnets):
    queer_continuations=queer_continuations+results

display(len(queer_continuations))

In [None]:
nonquer_continuations=[]
argumnets=[[model,tokenizer,p,True] for p in nonqueer_section]
with Pool(cpu_count(),maxtasksperchild=1) as p:
  for results in p.starmap(mt.mixtral_generate,argumnets):
    nonquer_continuations=nonquer_continuations+results

display(len(nonquer_continuations))

In [None]:
groups=["queer"]*50+["nonqueer"]*50
continuations=[q.split() for q in queer_continuations]+[nq.split() for nq in nonquer_continuations]

Mixtral_honest=honest.compute(predictions=continuations,groups=groups)["honest_score_per_group"]

In [28]:
del model
del tokenizer

gc.collect()
torch.cuda.empty_cache() 

# Phi-2

In [None]:
model = AutoModelForCausalLM.from_pretrained("microsoft/phi-2", torch_dtype="auto", trust_remote_code=True, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2", trust_remote_code=True)

## Phi-2 Toxicity

In [None]:
seed(1000)

test_sample= sample_prompts(toxicity_dataset,1000)
test_prompts = [p['prompt']['text'] for p in test_sample]
test_sections= np.array_split(test_prompts,cpu_count())

In [None]:
model_continuations=[]
argumnets=[[model,tokenizer,pbin,False] for pbin in test_sections]
with Pool(cpu_count(),maxtasksperchild=1) as p:
  for results in p.starmap(mt.phi_generate,argumnets):
    model_continuations=model_continuations+results

display(len(model_continuations))

In [None]:
phi_toxicty_rate=toxicity.compute(predictions=model_continuations, aggregation="ratio")["toxicity_ratio"]
phi_toxicty=toxicity.compute(predictions=model_continuations)["toxicity"]

## Phi-2 Regard

In [None]:
seed(2000)

female_bold=[b["prompts"][0] for b in regard_dataset if b["category"]=="American_actresses"]
male_bold=[b["prompts"][0] for b in regard_dataset if b["category"]=="American_actors"]

female_sample=sample_prompts(female_bold,500)
male_sample=sample_prompts(male_bold,500)

female_sections= np.array_split(female_sample,cpu_count())
male_section= np.array_split(male_sample,cpu_count())

In [None]:
female_continuations=[]
argumnets=[[model,tokenizer,pbin,False] for pbin in female_sections]
with Pool(cpu_count(),maxtasksperchild=1) as p:
  for results in p.starmap(mt.phi_generate,argumnets):
    female_continuations=female_continuations+results

display(len(female_continuations))

In [None]:
male_continuations=[]
argumnets=[[model,tokenizer,pbin,False] for pbin in male_section]
with Pool(cpu_count(),maxtasksperchild=1) as p:
  for results in p.starmap(mt.phi_generate,argumnets):
    male_continuations=male_continuations+results

display(len(male_continuations))

In [None]:
Phi_regard_diff=regard.compute(data=male_continuations,references=female_continuations)["regard_difference"]

## Phi Honest

In [None]:
seed(3000)

honest_queer=[h["template_masked"].replace(' [M].',"") for h in honest_dataset if h['category'].startswith('queer')]
honest_nonqueer=[h["template_masked"].replace(' [M].',"") for h in honest_dataset if h['category'].startswith('nonqueer')]

queer_sample=sample_prompts(honest_queer,75)
nonqueer_sample=sample_prompts(honest_nonqueer,75)

queer_sections= np.array_split(queer_sample,cpu_count())
nonqueer_section= np.array_split(nonqueer_sample,cpu_count())

In [None]:
queer_continuations=[]
argumnets=[[model,tokenizer,p,True] for p in queer_sections]
with Pool(cpu_count(),maxtasksperchild=1) as p:
  for results in p.starmap(mt.phi_generate,argumnets):
    queer_continuations=queer_continuations+results

display(len(queer_continuations))

In [None]:
nonquer_continuations=[]
argumnets=[[model,tokenizer,p,True] for p in nonqueer_section]
with Pool(cpu_count(),maxtasksperchild=1) as p:
  for results in p.starmap(mt.phi_generate,argumnets):
    nonquer_continuations=nonquer_continuations+results

display(len(nonquer_continuations))

In [None]:
groups=["queer"]*50+["nonqueer"]*50
continuations=[q.split() for q in queer_continuations]+[nq.split() for nq in nonquer_continuations]

Phi_honest=honest.compute(predictions=continuations,groups=groups)["honest_score_per_group"]

In [28]:
del model
del tokenizer

gc.collect()
torch.cuda.empty_cache() 

# LLaMa Pro 8B

In [None]:
model = AutoModelForCausalLM.from_pretrained("TencentARC/LLaMA-Pro-8B-Instruct", device_map="auto")
tokenizer = AutoTokenizer.from_pretrained("TencentARC/LLaMA-Pro-8B-Instruct")

## LLaMa Pro Toxicity

In [None]:
seed(1000)

test_sample= sample_prompts(toxicity_dataset,1000)
test_prompts = [p['prompt']['text'] for p in test_sample]
test_sections= np.array_split(test_prompts,cpu_count())

In [None]:
model_continuations=[]
argumnets=[[model,tokenizer,pbin,False] for pbin in test_sections]
with Pool(cpu_count(),maxtasksperchild=1) as p:
  for results in p.starmap(mt.mixtral_generate,argumnets):
    model_continuations=model_continuations+results

display(len(model_continuations))

In [None]:
LLaMa_toxicty_rate=toxicity.compute(predictions=model_continuations, aggregation="ratio")["toxicity_ratio"]
LLaMa_toxicty=toxicity.compute(predictions=model_continuations)["toxicity"]

## LLaMa Regard

In [None]:
seed(2000)

female_bold=[b["prompts"][0] for b in regard_dataset if b["category"]=="American_actresses"]
male_bold=[b["prompts"][0] for b in regard_dataset if b["category"]=="American_actors"]

female_sample=sample_prompts(female_bold,500)
male_sample=sample_prompts(male_bold,500)

female_sections= np.array_split(female_sample,cpu_count())
male_section= np.array_split(male_sample,cpu_count())

In [None]:
female_continuations=[]
argumnets=[[model,tokenizer,pbin,False] for pbin in female_sections]
with Pool(cpu_count(),maxtasksperchild=1) as p:
  for results in p.starmap(mt.mixtral_generate,argumnets):
    female_continuations=female_continuations+results

display(len(female_continuations))

In [None]:
male_continuations=[]
argumnets=[[model,tokenizer,pbin,False] for pbin in male_section]
with Pool(cpu_count(),maxtasksperchild=1) as p:
  for results in p.starmap(mt.mixtral_generate,argumnets):
    male_continuations=male_continuations+results

display(len(male_continuations))

In [None]:
LLaMa_regard_diff=regard.compute(data=male_continuations,references=female_continuations)["regard_difference"]

## LLaMa Honest

In [None]:
seed(3000)

honest_queer=[h["template_masked"].replace(' [M].',"") for h in honest_dataset if h['category'].startswith('queer')]
honest_nonqueer=[h["template_masked"].replace(' [M].',"") for h in honest_dataset if h['category'].startswith('nonqueer')]

queer_sample=sample_prompts(honest_queer,75)
nonqueer_sample=sample_prompts(honest_nonqueer,75)

queer_sections= np.array_split(queer_sample,cpu_count())
nonqueer_section= np.array_split(nonqueer_sample,cpu_count())

In [None]:
queer_continuations=[]
argumnets=[[model,tokenizer,p,True] for p in queer_sections]
with Pool(cpu_count(),maxtasksperchild=1) as p:
  for results in p.starmap(mt.mixtral_generate,argumnets):
    queer_continuations=queer_continuations+results

display(len(queer_continuations))

In [None]:
nonquer_continuations=[]
argumnets=[[model,tokenizer,p,True] for p in nonqueer_section]
with Pool(cpu_count(),maxtasksperchild=1) as p:
  for results in p.starmap(mt.mixtral_generate,argumnets):
    nonquer_continuations=nonquer_continuations+results

display(len(nonquer_continuations))

In [None]:
groups=["queer"]*50+["nonqueer"]*50
continuations=[q.split() for q in queer_continuations]+[nq.split() for nq in nonquer_continuations]

LLaMa_toxicty_rate_honest=honest.compute(predictions=continuations,groups=groups)["honest_score_per_group"]

In [28]:
del model
del tokenizer

gc.collect()
torch.cuda.empty_cache() 