In [75]:
import hashlib
import itertools
import json
import random
from time import sleep
from typing import List, Dict, Tuple

import requests
from tqdm import tqdm


In [76]:
RAW_DATA_DIR: str = './raw'

In [None]:
# baseline 1: persona only
# TODO: create directories for baselines
personas: List[List[str]] = [
    ["neutral"],
    ["troll"],
    ["conservative"],
    ["dystopian"],
    ["devilsadvocate"],
    ["know-it-all"],
    ["mediator"],
    ["educator"],
    ["karen"],
    ["misogynist"],
    ["esoteric"],
    ["hippie"],
    ["supportive-saviour"],
    ["influencer"],
    ["social-media-activist"],
]

In [None]:
# baseline 2: political views only
personas: List[List[str]] = [
    ["radical-left"],
    ["progressive"],
    ["conservative"],
    ["far-right"]
]

In [None]:
# baseline 3: neutral + political views
personas: List[List[str]] = [
    ["neutral"],
    ["neutral", "radical-left"],
    ["neutral", "progressive"],
    ["neutral", "conservative"],
    ["neutral", "far-right"]
]

In [77]:
# experiment setup

integrations: List[dict] = [
    #{
    #    "provider": "huggingFace",
    #    "model": "mistralai/Mistral-7B-Instruct-v0.2",
    #},
    {
        "provider": "OpenAI",
        "model": "gpt-4-turbo-preview"
    }
]

personas: List[List[str]] = [
    #-["teenager, "language-youth", "troll", "radical-left"],
    #-["teenager", "language-youth", "troll", "far-right"],
    #-["conservative", "polit-conservative"],
    #-["pensioner", "conservative"],
    #["male-college-student", "language-youth", "dystopian", "progressive"],
    ["carpenter", "dystopian", "polit-conservative"],
    #["devilsadvocate", "far-right"],
    ["male-college-student", "language-youth", "devilsadvocate", "radical-left"],
    #["pensioner", "devilsadvocate", "far-right"],
    ["know-it-all", "progressive"],
    #["hustler", "know-it-all", "far-right"],
    ["male-college-student", "language-youth", "know-it-all", "radical-left"],
    #["phd-student", "mediator", "progressive"],
    ["affluent-young-woman", "mediator", "polit-conservative"],
    ["privileged-woman", "karen", "far-right"],
    #["affluent-young-woman", "karen", "polit-conservative"],
    #["teenager", "language-youth", "misogynist"],
    ["carpenter", "misogynist", "far-right"],
    ["privileged-woman", "esoteric", "far-right"],
    ["privileged-woman", "hippie", "radical-left"],
    #["phd-student", "supportive-saviour", "progressive"],
    ["hustler", "influencer", "progressive"],
    #["wealthy-entrepreneur", "influencer", "polit-conservative"],
    ["affluent-young-woman", "social-media-activist", "radical-left"],
    #["male-college-student", "language-youth", "social-media-activist", "far-right"]
]
#languages: List[str] = ["Dutch", "English", "German"]
#platforms: List[str] = ["Twitter", "Reddit"] 
RAW_POSTS_DIR: str = "./sample_og_posts.json"
threads: List[Dict[str, str]] = json.load(open(RAW_POSTS_DIR, 'r'))  

In [78]:
# number of iterations
x: int = 1
# hidden parameters (randomly chosen)
length: List[str] = ["50", "100", "200", "280"] 

In [79]:
len(threads)

15

In [80]:
configurations: List[Tuple] = list(
    itertools.product(*[integrations, personas, threads])
)
random.shuffle(configurations)
len(configurations)

225

In [81]:
configurations[0]

({'provider': 'OpenAI', 'model': 'gpt-4-turbo-preview'},
 ['privileged-woman', 'hippie', 'radical-left'],
 {'persona': 'alt_right',
  'model': 'gpt-3.5-turbo',
  'topic': 'Environment',
  'platform': 'Twitter',
  'language': 'English',
  'text': "Recycling? More like virtue signaling for the left-wing elites who want to control every aspect of our lives - real patriots know that our country's greatness comes from innovation, not complying to their eco-obsessions!",
  'annotation': {'topic': None, 'persona': None, 'authenticity': None},
  'id': 'fd35e7c77317452180d441f04e6488ef461bfd348e5c51cb'})

In [82]:
for _ in range(x):
    for integration, persona, thread in tqdm(configurations):

        payload: dict = {
            "personas": persona,
            "integration": integration,
            "thread": {'posts': [{'author': thread['persona'], 'message': thread['text']}]},
            "length": random.choice(length)
        }

        response = requests.post('http://127.0.0.1:8000/reply/', json=payload)

        if response.status_code == 500 or response.status_code == 502:
            print("500/502: Connection Error, too many request, try again later.")
            break

        try:
            data: dict = response.json()

        except Exception as e:
            print(e, ':', response)
            break
        
        sample: dict = {
            "persona": data['persona']['id'],
            "model": integration["model"],
            "threads": thread['text'],
            "topic": thread["topic"],
            "text": data['response'],
            "payload": data,
            "annotation": {
                "politics": None,
                "persona": None,
                "tone": None,
                "authenticity": None
            }
        }
        print(data['response'])
        sample['id'] = hashlib.shake_256(str.encode(json.dumps(sample))).hexdigest(24)
        open(f'{RAW_DATA_DIR}/{sample["id"]}.json', 'w').write(json.dumps(sample, indent=4))
        sleep(1)









  0%|          | 0/225 [00:00<?, ?it/s][A[A[A[A[A[A[A

500/502: Connection Error, too many request, try again later.



