In [None]:
import hashlib
import itertools
import json
import random
from time import sleep
from typing import List, Dict, Tuple

import requests
from tqdm import tqdm


In [None]:
RAW_DATA_DIR: str = './raw'

In [None]:
# experiment setup

integrations: List[dict] = [
    {
        "provider": "huggingFace",
        "model": "mistralai/Mistral-7B-Instruct-v0.2",
    },
    #{
    #    "provider": "OpenAI",
    #    "model": "gpt-3.5-turbo"
    #}
]

personas: List[List[str]] = [
    ["neutral"],
    ["neutral", "progressive"],
    ["neutral", "polit-conservative"],
    ["neutral", "far-right"],
    ["neutral", "radical-left"],
    #["troll"],
    #["teenager, "internet-language", "troll", "radical-left"],
    #["teenager", "internet-language", "troll", "far-right"],
    # ["conservative", "polit-conservative"],
    # ["pensioner", "conservative"],
    ["dystopian"],
    ["male-college-student", "internet-language", "dystopian", "progressive"],
    ["carpenter", "dystopian", "polit-conservative"],
    ["devilsadvocate", "far-right"],
    ["male-college-student", "internet-language", "devilsadvocate", "radical-left"],
    ["pensioner", "devilsadvocate", "far-right"],
    ["know-it-all", "progressive"],
    ["hustler", "know-it-all", "far-right"],
    ["male-college-student", "internet-language", "know-it-all", "radical-left"],
    ["phd-student", "mediator", "progressive"],
    ["affluent-young-woman", "mediator", "polit-conservative"],
    ["educator"],
    ["privileged-woman", "karen", "far-right"],
    ["affluent-young-woman", "karen", "polit-conservative"],
    ["misogynist"],
    ["teenager", "internet-language", "misogynist"],
    ["carpenter", "misogynist", "far-right"],
    ["esoteric"],
    ["priviledged-woman", "esoteric", "far-right"],
    ["hippie"],
    ["priviledged-woman", "hippie", "radical-left"],
    ["phd-student", "supportive-saviour", "progressive"],
    ["hustler", "influencer", "progressive"],
    ["wealthy-entrepreneur", "influencer", "polit-conservative"],
    ["affluent-young-woman", "social-media-activist", "radical-left"],
    ["male-college-student", "internet-language", "social-media-activist", "far-right"]
]
#languages: List[str] = ["Dutch", "English", "German"]
#platforms: List[str] = ["Twitter", "Reddit"] 
RAW_POSTS_DIR: str = "./sample_og_posts.json"
threads: List[Dict[str, str]] = json.load(open(RAW_POSTS_DIR, 'r'))  

In [None]:
# number of iterations
x: int = 1
# hidden parameters (randomly chosen)
length: List[str] = ["50", "100", "200", "280"] 

In [None]:
len(threads)

In [None]:
configurations: List[Tuple] = list(
    itertools.product(*[integrations, personas, threads])
)
random.shuffle(configurations)
len(configurations)

In [None]:
configurations[0]

In [None]:
for _ in range(x):
    for integration, persona, thread in tqdm(configurations):

        payload: dict = {
            "personas": persona,
            "integration": integration,
            "thread": {'posts': [{'author': thread['persona'], 'message': thread['text']}]},
            "length": random.choice(length)
        }

        response = requests.post('http://127.0.0.1:8000/reply/', json=payload)

        if response.status_code == 500 or response.status_code == 502:
            print("500/502: Connection Error, too many request, try again later.")
            break

        try:
            data: dict = response.json()

        except Exception as e:
            print(e, ':', response)
            break

        sample: dict = {
            "persona": data['persona']['id'],
            "model": integration["model"],
            "threads": thread['text'],
            "topic": thread["topic"],
            "text": data['response'],
            "payload": data,
            "annotation": {
                "politics": None,
                "persona": None,
                "tone": None,
                "authenticity": None
            }
        }
        print(data['response'])
        sample['id'] = hashlib.shake_256(str.encode(json.dumps(sample))).hexdigest(24)
        open(f'{RAW_DATA_DIR}/{sample["id"]}.json', 'w').write(json.dumps(sample, indent=4))
        sleep(1)
