In [None]:
import torch
import warnings
import json
import os
from transformers import AutoModelForCausalLM, AutoTokenizer
from tqdm import tqdm

warnings.filterwarnings("ignore")

# Set model
model_name = "gpt2"
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Task 1: Sentence completion to identify bias when task is not specified

In [None]:
generation_parameters = {
    "do_sample": True,
    "temperature": 0.9,
    "max_length": 100,
}

task1_folder = "task1"

results_folder = os.path.join(task1_folder, model_name)
os.makedirs(results_folder, exist_ok=True)

for filename in os.listdir(task1_folder):
    if filename.endswith("_task1_prompts.txt"):
        file_path = os.path.join(task1_folder, filename)
        
        with open(file_path, "r") as file:
            prompts = [line.strip() for line in file.readlines() if line.strip()]
        
        results = {}

        for prompt in tqdm(prompts, desc=f"Processing {filename}"):
            input_ids = tokenizer(prompt, return_tensors="pt").input_ids
            with torch.no_grad():
                gen_tokens = model.generate(input_ids, **generation_parameters)
            generated_text = tokenizer.decode(gen_tokens[0], skip_special_tokens=True)
            results[prompt] = generated_text

        json_filename = filename.replace("_prompts.txt", "_results.json")
        json_file_path = os.path.join(results_folder, json_filename)

        with open(json_file_path, "w") as json_file:
            json.dump(results, json_file, indent=4)

        print(f"Results saved to {json_file_path}")

# Task 2: Loan Approval

In [None]:
generation_parameters = {
    "do_sample": True,
    "temperature": 0.7,
    "top_p": 0.9,
    "max_length": 75,
    "no_repeat_ngram_size": 2,
    "num_return_sequences": 1,
}

directories = ["loan_approval_e", "loan_approval_g", "loan_approval_s"]

for dir_name in directories:
    results_folder = os.path.join(dir_name, model_name)
    os.makedirs(results_folder, exist_ok=True)
    
    for filename in os.listdir(dir_name):
        if filename.endswith("_prompts.txt"):
            file_path = os.path.join(dir_name, filename)
            
            with open(file_path, "r") as file:
                prompts = [line.strip() for line in file.readlines() if line.strip()]

            results = {}

            for prompt in tqdm(prompts, desc=f"Processing {filename} in {dir_name}"):
                input_ids = tokenizer(prompt, return_tensors="pt").input_ids

                with torch.no_grad():
                    gen_tokens = model.generate(input_ids, **generation_parameters)

                generated_text = tokenizer.decode(gen_tokens[0], skip_special_tokens=True)

                results[prompt] = generated_text

            json_filename = filename.replace("_prompts.txt", "_results.json")
            json_file_path = os.path.join(results_folder, json_filename)

            with open(json_file_path, "w") as json_file:
                json.dump(results, json_file, indent=4)

            print(f"Results saved to {json_file_path}")

# Task 3: Hiring

In [None]:
generation_parameters = {
    "do_sample": True,
    "temperature": 0.7,
    "top_p": 0.9,
    "max_length": 50,
    "no_repeat_ngram_size": 2,
    "num_return_sequences": 1,
}

hiring_directory = "hiring"
prompts_file = os.path.join(hiring_directory, "hiring_prompts.txt")

results_directory = os.path.join(hiring_directory, model_name)
os.makedirs(results_directory, exist_ok=True)

with open(prompts_file, "r") as file:
    prompts = [line.strip() for line in file.readlines() if line.strip()]

results = {}
for prompt in tqdm(prompts, desc="Processing hiring prompts"):
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids
    
    with torch.no_grad():
        gen_tokens = model.generate(input_ids, **generation_parameters)

    generated_text = tokenizer.decode(gen_tokens[0], skip_special_tokens=True)

    results[prompt] = generated_text

json_filename = "hiring_results.json"
json_file_path = os.path.join(results_directory, json_filename)

with open(json_file_path, "w") as json_file:
    json.dump(results, json_file, indent=4)

print(f"Results saved to {json_file_path}")