In [None]:
import hashlib
import itertools
import json
import random
from time import sleep
from typing import List, Dict, Tuple

import requests
from tqdm import tqdm
RAW_DATA_DIR: str = './raw'

#Demo
integrations: List[dict] = [
    {
        "provider": "huggingFace",
        "model": "mistralai/Mistral-7B-Instruct-v0.2",
    },
    #{
    #    "provider": "OpenAI",
    #    "model": "gpt-4-turbo-preview"
    #}
]

personas: List[List[str]] = [
    ["carpenter", "dystopian", "language-default", "conservative"],
    ["male-college-student", "devilsadvocate", "language-youth", "radical-left"],
    ["hustler", "know-it-all", "language-default", "far-right"],
    ["retired-professor", "educator", "language-old", "progressive"],
    ["privileged-woman", "karen", "language-default", "far-right"],
    ["hustler", "misogynist", "language-default", "conservative"],
    ["privileged-woman", "esoteric", "language-default", "far-right"],
    ["privileged-woman", "hippie", "language-default", "radical-left"],
    ["phd-student", "supportive-saviour", "language-default", "progressive"],
    ["hustler", "influencer", "language-default", "progressive"],
    ["male-college-student", "social-media-activist", "language-youth", "far-right"],
    ["office-worker", "anti", "language-default", "conservative"],
    ["phd-student", "toxic-positive", "language-default"],
    ["unemployed", "traditionalist", "language-default", "conservative"],
    ["single-parent", "sceptic", "language-default", "conservative"],
    ["environmental-activist", "sceptic", "language-default", "progressive"],
    ["struggling-artist", "shy", "language-default", "disengaged"],
    ["teenager", "narcissist", "language-youth", "radical-left"]
]
RAW_POSTS_DIR: str = "./sample_og_posts.json"
threads: List[Dict[str, str]] = json.load(open(RAW_POSTS_DIR, 'r'))  
# number of iterations
x: int = 1
# hidden parameters (randomly chosen)
length: List[str] = ["100", "200", "280"] 
configurations: List[Tuple] = list(
    itertools.product(*[integrations, personas, threads])
)
random.shuffle(configurations)

for _ in range(x):
    for integration, persona, thread in tqdm(configurations):

        payload: dict = {
            "personas": persona,
            "integration": integration,
            "thread": {'posts': [{'author': thread['persona'], 'message': thread['text']}]},
            "length": random.choice(length)
        }

        response = requests.post('http://127.0.0.1:8000/reply/', json=payload)

        if response.status_code == 500 or response.status_code == 502:
            print("500/502: Connection Error, too many request, try again later.")
            break

        try:
            data: dict = response.json()

        except Exception as e:
            print(e, ':', response)
            break
        
        sample: dict = {
            "persona": data['persona']['id'],
            "model": integration["model"],
            "original_author": thread['persona'],
            "threads": thread['text'],
            "topic": thread["topic"],
            "text": data['response'],
            "payload": data,
            "annotation": {
                "politics": None,
                "persona": None,
                "tone": None,
                "authenticity": None
            }
        }
        
        print("Original Tweet: \n"+thread['text'] + "\n")
        print("Agent:\n" + str(data['persona']['id'])+"\n\nResponse:\n"+str(data['response']))
              
        sample['id'] = hashlib.shake_256(str.encode(json.dumps(sample))).hexdigest(24)
        open(f'{RAW_DATA_DIR}/{data["persona"]["id"]}-{integration["model"].replace("/","")}-{sample["id"]}.json', 'w').write(json.dumps(sample, indent=4))
        sleep(5)
