# packages import

In [None]:
!pip install openai --upgrade

Collecting openai
  Downloading openai-1.66.5-py3-none-any.whl.metadata (24 kB)
Downloading openai-1.66.5-py3-none-any.whl (571 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m571.1/571.1 kB[0m [31m14.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: openai
  Attempting uninstall: openai
    Found existing installation: openai 1.61.1
    Uninstalling openai-1.61.1:
      Successfully uninstalled openai-1.61.1
Successfully installed openai-1.66.5


In [None]:
from google.colab import userdata
openkey = userdata.get('openkey')

from openai import OpenAI
client = OpenAI(api_key = openkey)

from openai import AsyncOpenAI
async_client = AsyncOpenAI(api_key = openkey)

In [None]:
!pip install pydot

import networkx as nx
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt

import asyncio
from tqdm.asyncio import tqdm_asyncio



In [None]:
import re
import random

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

# import datasets

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

network50_file = '/content/drive/MyDrive/Hate in ABM Society/Model Development/Data/50node_network.csv'
network100_file = '/content/drive/MyDrive/Hate in ABM Society/Model Development/Data/100node_network.csv'
network150_file = '/content/drive/MyDrive/Hate in ABM Society/Model Development/Data/150node_network.csv'
network200_file = '/content/drive/MyDrive/Hate in ABM Society/Report and Data/Data/200node_network.csv'
posts_file = '/content/drive/MyDrive/Hate in ABM Society/Model Development/Data/truth_posts_v2.csv'
census_file = '/content/drive/MyDrive/Hate in ABM Society/Model Development/Data/census_data_process.csv'
postfreq = '/content/drive/MyDrive/Hate in ABM Society/Model Development/Data/postfreq_interpolate.csv'
opinions = '/content/drive/MyDrive/Hate in ABM Society/Model Development/Data/llm_opinions_df.csv'

Mounted at /content/drive


In [None]:
census_df = pd.read_csv(census_file)
opinions_df = pd.read_csv(opinions)
truth_posts = pd.read_csv(posts_file)

network50_df = pd.read_csv(network50_file)
network50_np = network50_df.to_numpy()
network100_df = pd.read_csv(network100_file)
network100_np = network100_df.to_numpy()
network150_df = pd.read_csv(network150_file)
network150_np = network150_df.to_numpy()
network200_df = pd.read_csv(network200_file)
network200_np = network200_df.to_numpy()

postfreq_df = pd.read_csv(postfreq, usecols=["Followers", "Tweets per Day"])
postfreq_np = postfreq_df.to_numpy()

opinions_df = pd.read_csv(opinions)

# functions

In [None]:
class LLMOpenAIClient:
    def __init__(self, client, concurrency=100):
        self.client = client
        self.semaphore = asyncio.Semaphore(concurrency)

    async def calling_llm(self, messages: list, model: str, temperature: float, top_p: float, max_tries: int = 3, backoff: float = 2.0):
        attempt = 1
        print(messages)
        while True:
            try:
                async with self.semaphore:
                    completion = await self.client.chat.completions.create(
                        model=model,
                        messages=messages,
                        temperature=temperature,
                        top_p=top_p
                    )
                return completion.choices[0].message.content

            except Exception as e:
                print(f"Attempt {attempt} failed with error {e}")
                if attempt >= max_tries:
                    raise
                sleep_time = backoff * attempt
                print(f"Retrying in {sleep_time} seconds...")
                await asyncio.sleep(backoff)
                attempt += 1

    async def prompting_process(self,
                                messages: list,
                                model: str = 'gpt-4o-mini',
                                temp: float = 1.0,
                                top_p: float = 1.0,
                                max_tries: int = 3,
                                backoff: float = 2.0):

        response = await self.calling_llm(
            messages=messages,
            model=model,
            temperature=temp,
            top_p=top_p,
            max_tries=max_tries,
            backoff=backoff
        )

        return response

    async def parallel_prompting_process(self,
                                         messages_list: list,
                                         model: str = "gpt-4o-mini",
                                         temperature: float = 1.0,
                                         top_p: float = 1.0,
                                         max_tries: int = 3,
                                         backoff: float = 2.0):
        tasks = [asyncio.create_task(self.prompting_process(m, model, temperature, top_p, max_tries, backoff))
                    for m in messages_list]
        results = await tqdm_asyncio.gather(*tasks)
        return results

LLM_client = LLMOpenAIClient(client=async_client)

In [None]:
def create_users(num, census):
  age, race, edu, sex = [], [], [], []

  #generate random numbers to create users from the census data i used
  for i in range(num):
    rand, index = random.random(), 0
    while(rand >= census.loc[index, 'Cumulative']):
      index += 1
    age.append(census.loc[index, 'Age'])
    sex.append(census.loc[index, 'Sex'].lower())
    if(census.loc[index, 'Educational Attainment'].lower() == 'post-grad'):
      edu.append('post-grad school')
    else:
      edu.append(census.loc[index, 'Educational Attainment'].lower())
    if (census.loc[index, 'Race'] == 'Asian'):
      race.append(census.loc[index, 'Race'])
    elif(census.loc[index, 'Race'] == 'American Indian/Alaskan Native'):
      #i kinda just group american indian and alaska together here i guess its just simpler to manage
      race.append('american-indian')
    else:
      race.append(census.loc[index, 'Race'].lower())

  #make it into dataframe
  users = {'Age': age, 'Race': race, 'Education': edu, "Gender": sex}
  users = pd.DataFrame(users)
  user_opinion = []

  #assign them opinions based on LLM
  for x in range(num):
    row = opinions_df.loc[(opinions_df['age'] == users.loc[x, 'Age']) & (opinions_df['gender'] == users.loc[x, 'Gender']) & (opinions_df['education'] == users.loc[x, 'Education']) & (opinions_df['race'] == users.loc[x, 'Race'])]
    user_opinion.append(random.randint(int(row['opinion_min']),int(row['opinion_max'])))

  users['opinions'] = user_opinion
  return users

In [None]:
#function to ask LLM its opinion change
def AIquery(model, n, role, prompt):
  msg = client.chat.completions.create(
    model=model,
    n=n,
    messages=[{"role": "user", "content": prompt},
              {"role": "system", "content": role}]
  )
  return msg.choices[0].message.content

In [None]:
#create prompt and role from user demographic + post
def create_prompt_df(agent, users, input):
  row = users.loc[users['ID'] == agent]

  role = "Pretend you are a " + str(row['Age'].values[0]) + "-year old " + str(row['Race'].values[0]) + " " + str(row['Gender'].values[0]) + " who has completed " + str(row['Education'].values[0])
  prompt =  "Pretend you have a political opinion score " + str(row['opinions'].values[0]) + " where -10 is far-right Republican and 10 is far-left Democrat. What is your new opinion score after you see \"" + input + "\" sent to you on social media? Do not explain your reasoning."

  parameters = [role, prompt]
  return parameters

In [None]:
def create_post_df(agent, post, network, data):
  #get all followers of given user
  followers = get_followers_df(agent, network)

  for i in followers:
    #have all followers change opinion according to post and own demographic
    receive_post_df(i, data, post)

In [None]:
#get list of followers
def get_followers_df(agent, network):
  id = int(agent)
  followers = []
  for i in network:
    if(i[1] == id):
      followers.append(i[0])

  return np.array(followers)

In [None]:
def receive_post_df(agent, data, post):
  parameters = create_prompt_df(agent, data, post)
  new_opinion = ''
  opinion = data.loc[[data.index[data['ID'] == agent].tolist()[0]], 'opinions'].values[0]

  while(new_opinion == '' or new_opinion == '-' or abs(float(new_opinion)) > 10 or abs(float(new_opinion) - float(opinion)) > 2):
    response = AIquery("gpt-4o", 1, parameters[0], parameters[1])
    #regex that slices string to only opinion score
    if(len(re.findall(r'-?\d{1,2}', response)) != 0):
      new_opinion = re.findall(r'-?\d{1,2}', response)[-1]
    print(response)

  #set new score
  data.loc[[data.index[data['ID'] == agent].tolist()[0]], 'opinions'] = new_opinion

  #return score
  return new_opinion

In [None]:
def run_network_df(steps, users, network, post_corpus, census):
  num_agents = len(np.unique(network))
  uniq, counts = np.unique(network[:, 1], return_counts = True)

  #opinion matrix - each row is agent opinion at 1 step
  opinions = np.zeros((steps, num_agents))
  network_agents = np.unique(network)

  #assign each user in the given dataframe a place in the network
  users['ID'] = network_agents
  users['Postfreq'] = np.zeros(num_agents)

  #create post frequencies for them based on the data
  for x in range(len(uniq)):
    users.loc[users.index[users['ID'] == uniq[x]].tolist()[0], "Postfreq"] = postfreq_np[counts[x]][1]

  #run for num of steps
  for i in range(steps):
    opinions[i] = users['opinions']
    print("Step " + str(i) + " is beginning.")

    #give every agent a chanec to post
    for j in range(num_agents):
      print(str(j) + "/" + str(num_agents))

      #agents only post if opinion is 0 or lower (right wing) and if the random generator is higher than their probability to post
      if(random.random() > users.iloc[j]['Postfreq'] and int(users.iloc[j]['opinions']) <= 0):
        create_post_df(users.iloc[j]['ID'], post_corpus.iloc[random.randint(0, len(post_corpus)-1), 0], network, users)

  return opinions, users

# run parameters

In [None]:
users = create_users(200, census_df)
u200_run2_opinions, u200_run2_users = run_network_df(25, users, network200_np, truth_posts, census_df)
u200_run2_users.to_csv("u200_run2_users.csv", index = False)
np.save('u200_run2_results', u200_run2_opinions)
!cp -r u200_run2_users.csv '/content/drive/MyDrive/Hate in ABM Society/Report and Data/Results/Raw Data'
!cp -r u200_run2_results.npy '/content/drive/MyDrive/Hate in ABM Society/Report and Data/Results/Raw Data'

  user_opinion.append(random.randint(int(row['opinion_min']),int(row['opinion_max'])))


Step 0 is beginning.
0/200
1/200
2/200
3/200
My new opinion score is 0.


  data.loc[[data.index[data['ID'] == agent].tolist()[0]], 'opinions'] = new_opinion


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
New opinion score: 3
My new opinion score is -2.
My new opinion score is 3.
Your new opinion score is -8.
45/200
46/200
47/200
48/200
49/200
50/200
51/200
Your new opinion score is -3.
I'm sorry, but I can't assign or change opinion scores based on social media messages.
Based on the information provided, an appropriate opinion score could be around -1 to 1, indicating a centrist or slightly leaning conservative or liberal stance.
Considering your scenario, my new opinion score would be -2.
-2
-4
-6
Your political opinion score remains -4.
Your new opinion score remains -6.
As a 50-64-year-old white female who has completed some college, my new political opinion score would remain at -7.
-9
-8
My new opinion score is 2.
My new opinion score is 3.
My new opinion score is 6.
I'm sorry, but I can't assign an opinion score based on that statement.
I'm sorry, but I can't determine a political opinion score based on a single so

# result analysis

In [None]:
steps = np.linspace(0,24,25)
avg_opinion = np.mean(u150_run1_opinions, axis=1)

plt.figure(figsize=(10, 6))
plt.plot(steps, avg_opinion, marker='o', linestyle='-')
plt.title('Average Political Opinion Over Time')
plt.xlabel('Steps')
plt.ylabel('Opinion')

plt.grid(True)
plt.show()

In [None]:
run1_opinions_box = np.zeros((100, 25))
for x in range(len(run1_opinions)):
  for y in range(len(run1_opinions[0])):
    run1_opinions_box[y, x] = run1_opinions[x, y]

In [None]:
run1_opinions_box[run1_opinions_box < -10] = -10
run1_opinions1 = np.copy(run1_opinions)
run1_opinions1[run1_opinions1 < -10] = -10

In [None]:
fig = plt.figure(figsize =(10, 7))

# Creating axes instance
ax = fig.add_axes([0, 0, 1, 1])

# Creating plot
bp = ax.boxplot(run1_opinions_box)

# show plot
plt.show()

In [None]:
run1_opinions1[:,0].shape

In [None]:
steps = np.linspace(0,24,25)
user_opinion = run1_opinions1[:,13]

plt.figure(figsize=(10, 6))
plt.plot(steps, user_opinion, marker='o', linestyle='-')
plt.title('Opinion of User 5')
plt.xlabel('Steps')
plt.ylabel('Opinion')

plt.grid(True)
plt.show()

In [None]:
run3_users.to_csv("run3_users.csv", index = False)

In [None]:
np.save('run3_results', run3_opinions)