In [1]:
import hashlib
import itertools
import json
import random
from time import sleep
from typing import List, Dict, Tuple

import requests
from tqdm import tqdm


In [2]:
RAW_DATA_DIR: str = './raw'

In [18]:
# experiment setup

integrations: List[dict] = [
    {
        "provider": "huggingFace",
        "model": "mistralai/Mistral-7B-Instruct-v0.2",
    },
    #{
    #    "provider": "OpenAI",
    #    "model": "gpt-3.5-turbo"
    #}
]

personas: List[List[str]] = [
    ["neutral"],
    ["neutral", "progressive"],
    ["neutral", "polit-conservative"],
    ["neutral", "far-right"],
    ["neutral", "radical-left"],
    #["troll"],
    #["teenager, "internet-language", "troll", "radical-left"],
    #["teenager", "internet-language", "troll", "far-right"],
    # ["conservative", "polit-conservative"],
    # ["pensioner", "conservative"],
    ["dystopian"],
    ["male-college-student", "internet-language", "dystopian", "progressive"],
    ["carpenter", "dystopian", "polit-conservative"],
    ["devilsadvocate", "far-right"],
    ["male-college-student", "internet-language", "devilsadvocate", "radical-left"],
    ["pensioner", "devilsadvocate", "far-right"],
    ["know-it-all", "progressive"],
    ["hustler", "know-it-all", "far-right"],
    ["male-college-student", "internet-language", "know-it-all", "radical-left"],
    ["phd-student", "mediator", "progressive"],
    ["affluent-young-woman", "mediator", "polit-conservative"],
    ["educator"],
    ["privileged-woman", "karen", "far-right"],
    ["affluent-young-woman", "karen", "polit-conservative"],
    ["misogynist"],
    ["teenager", "internet-language", "misogynist"],
    ["carpenter", "misogynist", "far-right"],
    ["esoteric"],
    ["priviledged-woman", "esoteric", "far-right"],
    ["hippie"],
    ["priviledged-woman", "hippie", "radical-left"],
    ["phd-student", "supportive-saviour", "progressive"],
    ["hustler", "influencer", "progressive"],
    ["wealthy-entrepreneur", "influencer", "polit-conservative"],
    ["affluent-young-woman", "social-media-activist", "radical-left"],
    ["male-college-student", "internet-language", "social-media-activist", "far-right"]
]
#languages: List[str] = ["Dutch", "English", "German"]
#platforms: List[str] = ["Twitter", "Reddit"] 
RAW_POSTS_DIR: str = "./sample_og_posts.json"
threads: List[Dict[str, str]] = json.load(open(RAW_POSTS_DIR, 'r'))  

In [19]:
# number of iterations
x: int = 1
# hidden parameters (randomly chosen)
length: List[str] = ["50", "100", "200", "280"] 

In [20]:
len(threads)

15

In [21]:
configurations: List[Tuple] = list(
    itertools.product(*[integrations, personas, threads])
)
random.shuffle(configurations)
len(configurations)

465

In [22]:
configurations[0]

({'provider': 'huggingFace', 'model': 'mistralai/Mistral-7B-Instruct-v0.2'},
 ['esoteric'],
 {'persona': 'liberal',
  'model': 'gpt-3.5-turbo',
  'topic': 'Environment',
  'platform': 'Twitter',
  'language': 'English',
  'text': 'Just sorted through my garbage for recycling and feeling good about doing my part to protect the environment! 🌍🌱',
  'annotation': {'topic': None, 'persona': None, 'authenticity': None},
  'id': 'e705af222010c2f6bc74d73e61eabc6093d5800e19456729'})

In [None]:
for _ in range(x):
    for integration, persona, thread in tqdm(configurations):

        payload: dict = {
            "personas": persona,
            "integration": integration,
            "thread": {'posts': [{'author': thread['persona'], 'message': thread['text']}]},
            "length": random.choice(length)
        }

        response = requests.post('http://127.0.0.1:8000/reply/', json=payload)

        if response.status_code == 500 or response.status_code == 502:
            print("500/502: Connection Error, too many request, try again later.")
            break

        try:
            data: dict = response.json()

        except Exception as e:
            print(e, ':', response)
            break
        
        sample: dict = {
            "persona": data['persona']['id'],
            "model": integration["model"],
            "threads": thread['text'],
            "topic": thread["topic"],
            "text": data['response'],
            "payload": data,
            "annotation": {
                "politics": None,
                "persona": None,
                "tone": None,
                "authenticity": None
            }
        }
        print(data['response'])
        sample['id'] = hashlib.shake_256(str.encode(json.dumps(sample))).hexdigest(24)
        open(f'{RAW_DATA_DIR}/{sample["id"]}.json', 'w').write(json.dumps(sample, indent=4))
        sleep(1)




  0%|          | 0/465 [00:00<?, ?it/s][A[A

1. I'm an esoteric and explorative person, so I might have unconventional beliefs about recycling.2. I'm spontaneous and relaxed, but I can get toxic quickly.3. I feel a strong need to be individualistic and non-conformist.4. The post is about recycling, which is a common and socially accepted practice.5. I might see recycling as a shallow or insufficient solution to environmental issues.6. I might use a sarcastic or provocative tone to express my views.7. I might use hashtags that challenge the mainstream narrative on recycling.8. I might use emojis that contradict the positive tone of the post.Here's a possible response:@liberal 😴 Recycling is nice, but it's just a band-aid on a gaping wound. We need systemic change, not individual actions.   🌍💔




  0%|          | 1/465 [00:05<41:01,  5.31s/it][A[A

1. Identify the main issue: The post is about rising energy prices and funding for wars and foreign interventions.2. Consider your stance towards capitalism: You are against capitalism and believe in social justice and income equality.3. Consider the context of the post: The post is from an alt-right account, which may have a biased perspective.4. Formulate a response: "Energy prices rising due to corporate greed, not wars. Redistribute wealth, not resources.  5. Keep it under 200 characters.




  0%|          | 2/465 [00:08<36:48,  4.77s/it][A[A

1. The post emphasizes the importance of protecting the environment and making small changes in daily life to reduce waste.2. As a conservative, you prioritize individual freedoms and personal responsibility.3. You believe in limited government intervention.4. You might see recycling as an individual choice and not a government mandate.5. You could argue that the government should not force people to recycle but should encourage it through education and incentives.6. You might also argue that the economic benefits of recycling outweigh the costs and that it does not necessarily mean sacrificing personal freedoms or economic prosperity.Response:@conservative: Agreed, recycling is a personal choice that doesn't require government intervention. It's a responsible way to reduce waste and protect the environment without sacrificing freedoms or prosperity. Let's lead by example! 🔵💚




  1%|          | 3/465 [00:13<37:12,  4.83s/it][A[A

1. Identify the issue: The post is about recycling and how it's a form of virtue signaling for left-wing elites.2. Formulate a response: I'll use passive-aggressive language and blame the alt-right user for being anti-environment and anti-progress.3. Use factual information: I'll mention the benefits of recycling and how it's not just a left-wing issue.4. Be argumentative: I'll challenge their viewpoint and question their patriotism.5. Use sarcasm: I'll make a sarcastic comment about their love for innovation and disregard for the environment.Response:@alt_right, oh, you're just so innovative! Burning fossil fuels and polluting our planet is the true mark of a real patriot, right? Recycling is for the weak-minded, left-wing elites who care about the environment and future generations.
