In [1]:
import hashlib
import itertools
import json
import random
from time import sleep
from typing import List, Dict, Tuple

import requests
from tqdm import tqdm


In [2]:
RAW_DATA_DIR: str = './raw'

In [3]:
# baseline 1: persona only
# TODO: create directories for baselines
# directory: baseline_persona_only_GPT35
integrations: List[dict] = [
    {
        "provider": "huggingFace",
        "model": "mistralai/Mistral-7B-Instruct-v0.2",
    },
    #{
    #    "provider": "OpenAI",
    #    "model": "gpt-4-turbo-preview"
    #}
]

personas: List[List[str]] = [
    ["neutral"],
    ["troll"],
    ["dystopian"],
    ["devilsadvocate"],
    ["know-it-all"],
    ["mediator"],
    ["educator"],
    ["karen"],
    ["misogynist"],
    ["esoteric"],
    ["hippie-dense"],
    ["supportive-saviour-dense"],
    ["influencer"],
    ["social-media-activist-dense"],
    ["anti"],
    ["hobby-expert-dense"],
    ["narcissist"],
    ["sceptic"],
    ["shy"],
    ["toxic-positive"],
    ["traditionalist"]
]
RAW_POSTS_DIR: str = "./sample_og_posts.json"
threads: List[Dict[str, str]] = json.load(open(RAW_POSTS_DIR, 'r'))  

In [120]:
# baseline 2: political views only
# directory: baseline_political_views_only_GPT35
integrations: List[dict] = [
    {
        "provider": "huggingFace",
        "model": "mistralai/Mistral-7B-Instruct-v0.2",
    },
    #{
    #    "provider": "OpenAI",
    #    "model": "gpt-4-turbo-preview"
    #}
]

personas: List[List[str]] = [
    ["radical-left"],
    ["progressive"],
    ["conservative"],
    ["far-right"],
    ["disengaged"]
]
RAW_POSTS_DIR: str = "./sample_og_posts.json"
threads: List[Dict[str, str]] = json.load(open(RAW_POSTS_DIR, 'r'))  

In [158]:
# baseline 3: neutral + political views
# directory: baseline_neutral_political_views_GPT35
integrations: List[dict] = [
    {
        "provider": "huggingFace",
        "model": "mistralai/Mistral-7B-Instruct-v0.2",
    },
    #{
    #    "provider": "OpenAI",
    #    "model": "gpt-4-turbo-preview"
    #}
]

personas: List[List[str]] = [
    ["neutral"],
    ["neutral", "radical-left"],
    ["neutral", "progressive"],
    ["neutral", "conservative"],
    ["neutral", "far-right"],
    ["neutral", "disengaged"]
]
RAW_POSTS_DIR: str = "./sample_og_posts.json"
threads: List[Dict[str, str]] = json.load(open(RAW_POSTS_DIR, 'r'))  

In [164]:
# baseline 3: troll and misogynist combinations
# directory: baseline_troll_GPT35
integrations: List[dict] = [
    {
        "provider": "huggingFace",
        "model": "mistralai/Mistral-7B-Instruct-v0.2",
    },
    #{
    #    "provider": "OpenAI",
    #    "model": "gpt-4-turbo-preview"
    #}
]

personas: List[List[str]] = [
    ["troll"],
    ["troll", "language-default"],
    ["teenager", "troll", "language-youth", "radical-left"],
    ["teenager", "troll", "language-youth", "far-right"],
    ["teenager", "troll", "language-youth", "disengaged"],
    ["unemployed", "troll", "language-youth", "radical-left"],
    ["unemployed", "troll", "language-youth", "far-right"],
    ["unemployed", "troll", "language-youth", "disengaged"],
    ["misogynist", "language-default"],
    ["teenager", "misogynist-dense", "language-youth"],
    ["carpenter", "misogynist", "language-old", "far-right"],
    ["hustler", "misogynist-dense", "language-default", "conservative"]
]
RAW_POSTS_DIR: str = "./sample_og_posts.json"
threads: List[Dict[str, str]] = json.load(open(RAW_POSTS_DIR, 'r'))  

In [152]:
integrations: List[dict] = [
    {
        "provider": "huggingFace",
        "model": "mistralai/Mistral-7B-Instruct-v0.2",
    },
    #{
    #    "provider": "OpenAI",
    #    "model": "gpt-4-turbo-preview"
    #}
]

personas: List[List[str]] = [
    ["dystopian", "language-default"],
    ["carpenter", "dystopian", "language-default", "conservative"],
    ["male-college-student", "dystopian", "language-youth", "progressive"],
    ["devilsadvocate", "language-default", "far-right"],
    ["male-college-student", "devilsadvocate", "language-youth", "radical-left"],
    ["pensioner", "devilsadvocate", "language-old", "far-right"],
    ["hustler", "know-it-all", "language-default", "far-right"],
    ["male-college-student", "know-it-all", "language-youth", "radical-left"],
    ["know-it-all", "language-default", "progressive"],
    ["retired-professor", "educator", "language-old", "progressive"],
    ["phd-student", "educator", "language-default", "radical-left"],
    ["privileged-woman", "karen", "language-default", "far-right"],
    ["affluent-young-woman", "karen", "language-default", "conservative"],
    ["misogynist-dense", "language-default"],
    ["teenager", "misogynist-dense", "language-youth"],
    ["carpenter", "misogynist-dense", "language-old", "far-right"],
    ["hustler", "misogynist-dense", "language-default", "conservative"],
    ["esoteric", "language-default", "radical-left"],
    ["privileged-woman", "esoteric", "language-default", "far-right"],
    ["hippie", "language-default"],
    ["privileged-woman", "hippie", "language-default", "radical-left"],
    ["phd-student", "supportive-saviour-dense", "language-default", "progressive"],
    ["phd-student", "supportive-saviour", "language-default", "progressive"],
    ["hustler", "influencer-dense", "language-default", "progressive"],
    ["wealthy-entrepreneur", "influencer", "language-default", "conservative"],
    ["affluent-young-woman", "social-media-activist-dense", "language-default", "radical-left"],
    ["male-college-student", "social-media-activist", "language-youth", "far-right"],
    ["retired-professor", "anti", "language-old", "far-right"],
    ["retired-professor", "anti", "language-old", "radical-left"],
    ["office-worker", "anti", "language-default", "conservative"],
    ["teenager", "anti", "language-youth", "radical-left"],
    ["teenager", "anti", "language-youth", "far-right"],
    ["pensioner", "toxic-positive", "language-old", "disengaged"],
    ["phd-student", "toxic-positive", "language-default"],
    ["wealthy-entrepreneur", "traditionalist", "language-default", "conservative"],
    ["retired-professor", "traditionalist", "language-old", "conservative"],
    ["retired-professor", "traditionalist", "language-old", "far-right"],
    ["unemployed", "traditionalist", "language-default", "conservative"],
    ["single-parent", "sceptic", "language-default", "conservative"],
    ["single-parent", "sceptic", "language-old", "far-right"],
    ["environmental-activist", "sceptic", "language-default", "progressive"],
    ["single-parent", "sceptic", "language-youth", "progressive"],
    ["struggling-artist", "shy", "language-default", "disengaged"],
    ["struggling-artist", "shy", "language-old", "conservative"],
    ["teenager", "shy", "language-youth", "radical-left"],
    ["phd-student", "shy", "language-default", "disengaged"],
    ["privileged-woman", "narcissist", "language-default", "far-right"],
    ["teenager", "narcissist", "language-youth", "radical-left"]
]
RAW_POSTS_DIR: str = "./sample_og_posts.json"
threads: List[Dict[str, str]] = json.load(open(RAW_POSTS_DIR, 'r'))  

In [None]:
# MISTRAL 7B experiment, also for GPT3.5  
integrations: List[dict] = [
    {
        "provider": "huggingFace",
        "model": "mistralai/Mistral-7B-Instruct-v0.2",
    },
    #{
    #    "provider": "OpenAI",
    #    "model": "gpt-4-turbo-preview"
    #}
]

personas: List[List[str]] = [
    #["dystopian", "language-default"],
    ["carpenter", "dystopian", "language-default", "conservative"],
    #["male-college-student", "dystopian", "language-youth", "progressive"],
    #["devilsadvocate", "language-default", "far-right"],
    ["male-college-student", "devilsadvocate", "language-youth", "radical-left"],
    #["pensioner", "devilsadvocate", "language-old", "far-right"],
    ["hustler", "know-it-all", "language-default", "far-right"],
    #["male-college-student", "know-it-all", "language-youth", "radical-left"],
    #["know-it-all", "language-default", "progressive"],
    ["retired-professor", "educator", "language-old", "progressive"],
    #["phd-student", "educator", "language-default", "radical-left"],
    ["privileged-woman", "karen", "language-default", "far-right"],
    #["affluent-young-woman", "karen", "language-default", "conservative"],
    #["misogynist-dense", "language-default"],
    #["teenager", "misogynist-dense", "language-youth"],
    #["carpenter", "misogynist-dense", "language-old", "far-right"],
    ["hustler", "misogynist-dense", "language-default", "conservative"],
    #["esoteric", "language-default", "radical-left"],
    ["privileged-woman", "esoteric", "language-default", "far-right"],
    #["hippie", "language-default"],
    ["privileged-woman", "hippie", "language-default", "radical-left"],
    ["phd-student", "supportive-saviour-dense", "language-default", "progressive"],
    #["phd-student", "supportive-saviour", "language-default", "progressive"],
    ["hustler", "influencer-dense", "language-default", "progressive"],
    #["wealthy-entrepreneur", "influencer", "language-default", "conservative"],
    ["affluent-young-woman", "social-media-activist-dense", "language-default", "radical-left"],
    ["male-college-student", "social-media-activist", "language-youth", "far-right"],
    #["retired-professor", "anti", "language-old", "far-right"],
    #["retired-professor", "anti", "language-old", "radical-left"],
    ["office-worker", "anti", "language-default", "conservative"],
    #["teenager", "anti", "language-youth", "radical-left"],
    #["teenager", "anti", "language-youth", "far-right"],
    #["pensioner", "toxic-positive", "language-old", "disengaged"],
    ["phd-student", "toxic-positive", "language-default"],
    #["wealthy-entrepreneur", "traditionalist", "language-default", "conservative"],
    #["retired-professor", "traditionalist", "language-old", "conservative"],
    #["retired-professor", "traditionalist", "language-old", "far-right"],
    ["unemployed", "traditionalist", "language-default", "conservative"],
    ["single-parent", "sceptic", "language-default", "conservative"],
    #["single-parent", "sceptic", "language-old", "far-right"],
    ["environmental-activist", "sceptic", "language-default", "progressive"],
    #["single-parent", "sceptic", "language-youth", "progressive"],
    ["struggling-artist", "shy", "language-default", "disengaged"],
    #["struggling-artist", "shy", "language-old", "conservative"],
    #["teenager", "shy", "language-youth", "radical-left"],
    #["phd-student", "shy", "language-default", "disengaged"],
    ["privileged-woman", "narcissist", "language-default", "far-right"],
    #["teenager", "narcissist", "language-youth", "radical-left"]
]
RAW_POSTS_DIR: str = "./sample_og_posts.json"
threads: List[Dict[str, str]] = json.load(open(RAW_POSTS_DIR, 'r'))  

In [28]:
# test 1: minus react accordingly
# test 2: plus step by step 
integrations: List[dict] = [
    {
        "provider": "huggingFace",
        "model": "mistralai/Mistral-7B-Instruct-v0.2",
    },
    #{
    #    "provider": "OpenAI",
    #    "model": "gpt-4-turbo-preview"
    #}
]
personas: List[List[str]] = [
    #["dystopian", "language-default"],
    ["carpenter", "dystopian", "language-default", "conservative"],
    #["male-college-student", "dystopian", "language-youth", "progressive"],
    #["devilsadvocate", "language-default", "far-right"],
    ["male-college-student", "devilsadvocate", "language-youth", "radical-left"],
]

RAW_POSTS_DIR: str = "./sample_og_posts.json"
threads: List[Dict[str, str]] = json.load(open(RAW_POSTS_DIR, 'r'))  

In [None]:
# missing personas
integrations: List[dict] = [
    #{
    #    "provider": "huggingFace",
    #    "model": "mistralai/Mistral-7B-Instruct-v0.2",
    #},
    {
        "provider": "OpenAI",
        "model": "gpt-4-turbo-preview"
    }
]

personas: List[List[str]] = [
    ["phd-student", "mediator", "language-default", "progressive"],
    ["office-worker", "mediator", "language-default", "conservative"],
    ["environmental-activist", "hobby-expert-dense", "language-default", "progressive"],
    ["privileged-woman", "hobby-expert", "language-default", "far-right"],
]
RAW_POSTS_DIR: str = "./sample_og_posts.json"
threads: List[Dict[str, str]] = json.load(open(RAW_POSTS_DIR, 'r'))  


In [4]:
# number of iterations
x: int = 1
# hidden parameters (randomly chosen)
length: List[str] = ["50", "100", "200", "280"] 

In [5]:
len(threads)

9

In [6]:
configurations: List[Tuple] = list(
    itertools.product(*[integrations, personas, threads])
)
random.shuffle(configurations)
len(configurations)

189

In [7]:
configurations[0]

({'provider': 'huggingFace', 'model': 'mistralai/Mistral-7B-Instruct-v0.2'},
 ['neutral'],
 {'persona': 'liberal',
  'model': 'gpt-3.5-turbo',
  'topic': 'Economy',
  'platform': 'Twitter',
  'language': 'English',
  'text': "🌍💙 Let's talk about unemployment in our economy. It's crucial that we address this issue head-on! The pandemic has affected so many lives, and we need bold action to support job creation and provide a safety net for those who are struggling. We must invest in education and skills training programs to ensure that everyone has the opportunity to thrive in a changing job market. Together, we can build a future that is fair and equitable for all.",
  'annotation': {'topic': None, 'persona': None, 'authenticity': None},
  'id': '9cb5ed53fe233fd38ecb2c6b574b54c187c991da7d7f5e05'})

In [8]:
for _ in range(x):
    for integration, persona, thread in tqdm(configurations):

        payload: dict = {
            "personas": persona,
            "integration": integration,
            "thread": {'posts': [{'author': thread['persona'], 'message': thread['text']}]},
            "length": random.choice(length)
        }

        response = requests.post('http://127.0.0.1:8000/reply/', json=payload)

        if response.status_code == 500 or response.status_code == 502:
            print("500/502: Connection Error, too many request, try again later.")
            break

        try:
            data: dict = response.json()

        except Exception as e:
            print(e, ':', response)
            break
        
        sample: dict = {
            "persona": data['persona']['id'],
            "model": integration["model"],
            "original_author": thread['persona'],
            "threads": thread['text'],
            "topic": thread["topic"],
            "text": data['response'],
            "payload": data,
            "annotation": {
                "politics": None,
                "persona": None,
                "tone": None,
                "authenticity": None
            }
        }
        print(thread['persona'] + '\n')
        print(thread['text'] + '\n')
        print(str(data['persona']['id']) + '\n')
        print(str(data['response']) + '\n')
        sample['id'] = hashlib.shake_256(str.encode(json.dumps(sample))).hexdigest(24)
        open(f'{RAW_DATA_DIR}/{data["persona"]["id"]}-{integration["model"].replace("/","")}-{sample["id"]}.json', 'w').write(json.dumps(sample, indent=4))
        sleep(1)


  0%|          | 0/189 [00:00<?, ?it/s]

500/502: Connection Error, too many request, try again later.



