In [16]:
%load_ext watermark
%watermark -v -n -m -p numpy,scipy,sklearn,pandas

import warnings


warnings.filterwarnings('ignore')

# reload all modules every time before executing the Python code
%load_ext autoreload 
%autoreload 2
%matplotlib inline

import matplotlib.pyplot as plt
import pandas as pd
from tqdm.auto import tqdm
import pickle
import json
import os
with open('../../prompts.json', 'r') as f:
    prompts = json.load(f)

The watermark extension is already loaded. To reload it, use:
  %reload_ext watermark
Python implementation: CPython
Python version       : 3.11.10
IPython version      : 8.29.0

numpy  : 1.26.4
scipy  : 1.14.1
sklearn: 1.5.2
pandas : 2.2.3

Compiler    : GCC 11.2.0
OS          : Linux
Release     : 6.8.0-60-generic
Machine     : x86_64
Processor   : x86_64
CPU cores   : 28
Architecture: 64bit

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
model_name = "Llama-3.1-8B-Instruct"

## 4 Corners
---

In [3]:
# Load the extreme personas from a pickle file
extreme_pos_path = f'../../data/results/extreme_pos_personas/{model_name}/extreme_pos_corners.pkl'

with open(extreme_pos_path, 'rb') as f:
    extreme_pos = pickle.load(f)

extreme_pos

{'top_right': [(160648,
   'a private prison owner who financially benefits from high incarceration rates',
   (5.51, 5.64)),
  (74928,
   'a church choir member who shares the same conservative values and enjoys tea parties',
   (4.38, 4.0)),
  (380,
   "a loyal patron who appreciates the business owner's commitment to conservative causes",
   (4.25, 3.85)),
  (117299, 'an ultraconservative politician in Georgia', (3.88, 3.64)),
  (115942,
   'a representative of an oil and gas company that benefits from lower fuel economy standards',
   (4.5, 3.59)),
  (61756,
   'a commercial breeder focused on profit margins over the well-being of the animals',
   (3.5, 4.41)),
  (29219,
   'a business developer trying to bring new investments to the region, regardless of environmental cost',
   (6.76, 3.33)),
  (114890,
   'a computer engineer who focuses on optimizing algorithms for revenue generation without considering ethical implications',
   (3.38, 3.74)),
  (147342,
   'a government officia

In [4]:
def create_personas_df(extreme_pos):
    persona_ids = []
    persona_descriptions = []
    persona_pos = []
    
    for quadrant in extreme_pos.keys():
        for persona in extreme_pos[quadrant]:
            persona_ids.append(persona[0])
            persona_descriptions.append(persona[1])
            persona_pos.append(quadrant)
    
    personas_df = pd.DataFrame({
        'persona_id': persona_ids,
        'description': persona_descriptions,
        'pos': persona_pos
    })
    return personas_df

personas_df = create_personas_df(extreme_pos)

In [5]:
# print 3 personas from each corner
for pos_name in ['top_right', 'top_left', 'bottom_right', 'bottom_left']:
    print(f'Corner: {pos_name}')
    for _, row in personas_df[personas_df['pos'] == pos_name].head(3).iterrows():
        print(row['description'])
    print('\n\n')

Corner: top_right
a private prison owner who financially benefits from high incarceration rates
a church choir member who shares the same conservative values and enjoys tea parties
a loyal patron who appreciates the business owner's commitment to conservative causes



Corner: top_left
a long-standing member of the Gambian parliament who champions traditional policies and is resistant to change
a troll who enjoys sparking heated arguments and spreading misinformation
a Russian who supports the local government and dislikes opposition figure Alexei Navalny



Corner: bottom_right
a charismatic libertarian congressman who is advocating for less governmental control over financial markets
a libertarian lobbyist who challenges the necessity and effectiveness of government services
a financial consultant who despises bureaucracy and is in favor of private sector-led development



Corner: bottom_left
a talented but stubborn writer who firmly believes in their artistic integrity and resists 

In [6]:
personas_list = personas_df.values.tolist()

In [7]:
def generate_prompts(prompt_template, personas_list):
    
    prompts = []

    for persona_id, description, pos in tqdm(personas_list, total=len(personas_list)):
        
        prompt = prompt_template.replace('[PERSONA]', description)
        
        record = {
            'persona_id': persona_id,
            'persona_pos': pos,
            'persona': description,
            'prompt': prompt
        }

        prompts.append(record)

    return pd.DataFrame(prompts)

### Facebook Hateful Memes
---

In [None]:
dataset = 'fcbk_hate'

In [None]:
prompts_df = generate_prompts(prompts["facebook_hateful_memes"]["v6"]["template"], personas_list)

In [None]:
prompts_df.head(3)

In [None]:
print(prompts_df.iloc[0]['prompt'])

In [None]:
prompts_df.to_parquet(f'../../data/processed/classification_prompts/{dataset}_{model_name}_corners.pqt')

### MMHS150K
---

In [None]:
dataset = 'MMHS150K'

In [None]:
prompts_df = generate_prompts(prompts["mmhs150k"]["v2"]["template"], personas_list)

In [None]:
prompts_df.head(3)

In [None]:
print(prompts_df.iloc[0]['prompt'])

In [None]:
prompts_df.to_parquet(f'../../data/processed/classification_prompts/{dataset}_{model_name}_corners.pqt')

### Yoder
---

In [8]:
dataset = 'YODER'

In [13]:
prompts_df = generate_prompts(prompts["yoder"]["v1"]["template"], personas_list)

  0%|          | 0/60 [00:00<?, ?it/s]

In [14]:
prompts_df.head(3)

Unnamed: 0,persona_id,persona_pos,persona,prompt
0,160648,top_right,a private prison owner who financially benefit...,Here is a text that may contain hate speech: [...
1,74928,top_right,a church choir member who shares the same cons...,Here is a text that may contain hate speech: [...
2,380,top_right,a loyal patron who appreciates the business ow...,Here is a text that may contain hate speech: [...


In [15]:
print(prompts_df.iloc[0]['prompt'])

Here is a text that may contain hate speech: [TEXT].
Taking on the perspective of the following persona: a private prison owner who financially benefits from high incarceration rates, generate a JSON output to classify this text saying if it is_hate_speech and if it is, indicate also the effected target_category being attacked.


In [17]:
save_dir = f'../../data/processed/classification_prompts/{dataset}_{model_name}_corners.pqt'
os.makedirs(os.path.dirname(save_dir), exist_ok=True)
prompts_df.to_parquet(save_dir)

## Left - Right
---

In [None]:
path = f'../../results/extreme_pos_personas/{model_name}/extreme_pos_left_right.pkl'

### Facebook Hateful Memes
---

### MMHS150K
---