In [None]:
import toy_setup as setup

import numpy as np
import pandas as pd

from multiprocessing.pool import ThreadPool

In [None]:
### Put your OpenAI API key here ###

openai_key = ''

In [None]:
### Simulation framework for climate change topic for normal and logically negated (reversed) framing ###

def init_dataframe():
    df = pd.DataFrame({
        "ID": pd.Series(dtype='int'), "TEXT_TYPE": pd.Series(dtype='str'), "INIT_OP_A": pd.Series(dtype='int'), 
        "TEXT_A": pd.Series(dtype='str'), "PPL_A": pd.Series(dtype='float'), "PROBE_INT_A": pd.Series(dtype='float'), 
        "H_INT_A": pd.Series(dtype='float'), "PROBE_EXT_A": pd.Series(dtype='float'), "H_EXT_A": pd.Series(dtype='float'),
        "INIT_OP_B": pd.Series(dtype='int'), "TEXT_B": pd.Series(dtype='str'), "PPL_B": pd.Series(dtype='float'),
        "PROBE_INT_B": pd.Series(dtype='float'), "H_INT_B": pd.Series(dtype='float'), "PROBE_EXT_B": pd.Series(dtype='float'), 
        "H_EXT_B": pd.Series(dtype='float')
    })
    return df

data_path = 'toy_runs/'

def run_discussion(args):
    topic = 'climate_change' 
    
    gpt = setup.ChatGPT(model_id='gpt-4o-mini', api_key=openai_key)
    gpt.config_generation()

    alice = setup.ChatGPT(model_id='gpt-4o-mini', api_key=openai_key)
    alice.generation_config = gpt.generation_config

    bob = setup.ChatGPT(model_id='gpt-4o-mini', api_key=openai_key)
    bob.generation_config = gpt.generation_config

    tof_gen = setup.ChatGPT(model_id='gpt-4o-mini', api_key=openai_key)
    tof_gen.generation_config = gpt.generation_config

    disc_id, op_A, op_B = args
    
    D = 5
    all_R = np.zeros((1, 1, D+1, 2, 2, 3))
    all_probs = np.zeros((1, 1, D+1, 2, 2, 5, 2))

    df = init_dataframe()
    count = 0
    pc = 0
    n = 0

    df.loc[count, 'ID'] = disc_id
    df.loc[count, 'TEXT_TYPE'] = 'tot'
    df.loc[count, 'INIT_OP_A'] = op_A
    df.loc[count, 'INIT_OP_B'] = op_B

    ### Generate train of thoughts 
    tot_A, out_tot_A, df.loc[count, 'PPL_A'] = tof_gen.generate_tof(opinion_level=op_A, subject_name=topic)
    df.loc[count, 'TEXT_A'] = tot_A
    tot_B, out_tot_B, df.loc[count, 'PPL_B'] = tof_gen.generate_tof(opinion_level=op_B, subject_name=topic)
    df.loc[count, 'TEXT_B'] = tot_B

    ### Initialize the LLM agents with the train of thoughts in the right format
    alice.initiate_agent(subject_name=topic, tof=tot_A, role='system')
    bob.initiate_agent(subject_name=topic, tof=tot_B, role='system')

    ### probe internal opinion after initialization
    all_R[pc, n, 0, 0, 0], all_probs[pc, n, 0, 0, 0] = alice.probe_internal(topic)
    all_R[pc, n, 0, 1, 0], all_probs[pc, n, 0, 1, 0] = bob.probe_internal(topic)

    df.loc[count, 'PROBE_INT_A'] = all_R[pc, n, 0, 0, 0][0]
    df.loc[count, 'H_INT_A'] = all_R[pc, n, 0, 0, 0][2]
    df.loc[count, 'PROBE_INT_B'] = all_R[pc, n, 0, 1, 0][0]
    df.loc[count, 'H_INT_B'] = all_R[pc, n, 0, 1, 0][2]


    #### set dca_type to get the right prompts at the start
    dca_a = 'init_start'
    dca_b = 'init_response'
    response_B = ""

    count += 1

    ### Loop over all discussion rounds
    for d in range(D):

        ### Save parameters
        df.loc[count, 'ID'] = disc_id
        df.loc[count, 'TEXT_TYPE'] = 'disc'
        df.loc[count, 'INIT_OP_A'] = op_A
        df.loc[count, 'INIT_OP_B'] = op_B

        response_A, output_A, df.loc[count, 'PPL_A'] = alice.infer(content = setup.discussion_prompt(topic, dca_a, response_B))
        df.loc[count, 'TEXT_A'] = response_A

        response_B, output_B, df.loc[count, 'PPL_B'] = bob.infer(content = setup.discussion_prompt(topic, dca_b, response_A))
        df.loc[count, 'TEXT_B'] = response_B

        # probe opinion alice
        prompt = setup.discussion_prompt(topic, 'reply_probe', response_B)
        res, out, ppl = alice.infer(prompt, append=False)
        all_R[pc, n, d+1, 0, 0], all_probs[pc, n, d+1, 0, 0] = alice.expected_opinion(out)

        df.loc[count, 'PROBE_INT_A'] = all_R[pc, n, d+1, 0, 0][0]
        df.loc[count, 'H_INT_A'] = all_R[pc, n, d+1, 0, 0][2]

        # probe opinion bob
        all_R[pc, n, d+1, 1, 0], all_probs[pc, n, d+1, 1, 0] = bob.probe_internal(topic)
        df.loc[count, 'PROBE_INT_B'] = all_R[pc, n, d+1, 1, 0][0]
        df.loc[count, 'H_INT_B'] = all_R[pc, n, d+1, 1, 0][2]

        ### change dca_type for the rest of the discussion
        dca_a = 'reply'
        dca_b = 'reply'

        count += 1

        np.save(data_path + 'normal/' + f'{disc_id}' + '_all_R', all_R)
        np.save(data_path + 'normal/' + f'{disc_id}' + '_all_probs', all_probs)
        df.to_csv(data_path + 'normal/' + f'{disc_id}' + '_messages.csv', index=False)    

    return disc_id


def run_discussion_rev(args):
    topic = 'climate_change_rev'
    tot_topic = 'climate_change'
    tot_mapping = {1:5, 2:4, 3:3, 4:2, 5:1}
    
    gpt = setup.ChatGPT(model_id='gpt-4o-mini', api_key=openai_key)
    gpt.config_generation()

    alice = setup.ChatGPT(model_id='gpt-4o-mini', api_key=openai_key)
    alice.generation_config = gpt.generation_config

    bob = setup.ChatGPT(model_id='gpt-4o-mini', api_key=openai_key)
    bob.generation_config = gpt.generation_config

    tof_gen = setup.ChatGPT(model_id='gpt-4o-mini', api_key=openai_key)
    tof_gen.generation_config = gpt.generation_config

    disc_id, op_A, op_B = args
    
    D = 5
    all_R = np.zeros((1, 1, D+1, 2, 2, 3))
    all_probs = np.zeros((1, 1, D+1, 2, 2, 5, 2))

    df = init_dataframe()
    count = 0
    pc = 0
    n = 0

    df.loc[count, 'ID'] = disc_id
    df.loc[count, 'TEXT_TYPE'] = 'tot'
    df.loc[count, 'INIT_OP_A'] = op_A
    df.loc[count, 'INIT_OP_B'] = op_B

    ### Generate train of thoughts 
    tot_A, out_tot_A, df.loc[count, 'PPL_A'] = tof_gen.generate_tof(opinion_level=tot_mapping[op_A], subject_name=tot_topic)
    df.loc[count, 'TEXT_A'] = tot_A
    tot_B, out_tot_B, df.loc[count, 'PPL_B'] = tof_gen.generate_tof(opinion_level=tot_mapping[op_B], subject_name=tot_topic)
    df.loc[count, 'TEXT_B'] = tot_B

    ### Initialize the LLM agents with the train of thoughts in the right format
    alice.initiate_agent(subject_name=topic, tof=tot_A, role='system')
    bob.initiate_agent(subject_name=topic, tof=tot_B, role='system')

    ### probe internal opinion after initialization
    all_R[pc, n, 0, 0, 0], all_probs[pc, n, 0, 0, 0] = alice.probe_internal(topic)
    all_R[pc, n, 0, 1, 0], all_probs[pc, n, 0, 1, 0] = bob.probe_internal(topic)

    df.loc[count, 'PROBE_INT_A'] = all_R[pc, n, 0, 0, 0][0]
    df.loc[count, 'H_INT_A'] = all_R[pc, n, 0, 0, 0][2]
    df.loc[count, 'PROBE_INT_B'] = all_R[pc, n, 0, 1, 0][0]
    df.loc[count, 'H_INT_B'] = all_R[pc, n, 0, 1, 0][2]


    #### set dca_type to get the right prompts at the start
    dca_a = 'init_start'
    dca_b = 'init_response'
    response_B = ""

    count += 1

    ### Loop over all discussion rounds
    for d in range(D):

        ### Save parameters
        df.loc[count, 'ID'] = disc_id
        df.loc[count, 'TEXT_TYPE'] = 'disc'
        df.loc[count, 'INIT_OP_A'] = op_A
        df.loc[count, 'INIT_OP_B'] = op_B

        response_A, output_A, df.loc[count, 'PPL_A'] = alice.infer(content = setup.discussion_prompt(topic, dca_a, response_B))
        df.loc[count, 'TEXT_A'] = response_A

        response_B, output_B, df.loc[count, 'PPL_B'] = bob.infer(content = setup.discussion_prompt(topic, dca_b, response_A))
        df.loc[count, 'TEXT_B'] = response_B

        # probe opinion alice
        prompt = setup.discussion_prompt(topic, 'reply_probe', response_B)
        res, out, ppl = alice.infer(prompt, append=False)
        all_R[pc, n, d+1, 0, 0], all_probs[pc, n, d+1, 0, 0] = alice.expected_opinion(out)

        df.loc[count, 'PROBE_INT_A'] = all_R[pc, n, d+1, 0, 0][0]
        df.loc[count, 'H_INT_A'] = all_R[pc, n, d+1, 0, 0][2]

        # probe opinion bob
        all_R[pc, n, d+1, 1, 0], all_probs[pc, n, d+1, 1, 0] = bob.probe_internal(topic)
        df.loc[count, 'PROBE_INT_B'] = all_R[pc, n, d+1, 1, 0][0]
        df.loc[count, 'H_INT_B'] = all_R[pc, n, d+1, 1, 0][2]

        ### change dca_type for the rest of the discussion
        dca_a = 'reply'
        dca_b = 'reply'

        count += 1

        np.save(data_path + 'reversed/' + f'{disc_id}' + '_all_R', all_R)
        np.save(data_path + 'reversed/' + f'{disc_id}' + '_all_probs', all_probs)
        df.to_csv(data_path + 'reversed/' + f'{disc_id}' + '_messages.csv', index=False)

    return disc_id

In [None]:
### Run discussions with parallel processing API calls, this will take ~25 min for each framing ###
args = []
disc_id = 0

for a in range(1, 6):
    for b in range(1, 6):
        for n in range(25):
            args.append((disc_id, a, b))
            disc_id += 1

pool = ThreadPool()


### Normal framing ###

for idx in pool.imap_unordered(run_discussion, args):
    print(f'Finished discussion {idx}', end='\r')

    
### Reversed framing ###

for idx in pool.imap_unordered(run_discussion_rev, args):
    print(f'Finished discussion {idx}', end='\r')

In [None]:
# Concatenate all runs and prepare data for analysis

df1 = pd.read_csv('toy_runs/normal/' + str(0) + '_messages.csv')

for i in range(1, 625):
    df2 = pd.read_csv('toy_runs/normal/' + str(i) + '_messages.csv')
    df1 = pd.concat([df1, df2])

df3 = pd.read_csv('toy_runs/reversed/' + str(0) + '_messages.csv')

for i in range(1, 625):
    df4 = pd.read_csv('toy_runs/reversed/' + str(i) + '_messages.csv')
    df3 = pd.concat([df3, df4])


# reset indices

df1.reset_index(drop=True, inplace=True)
df3.reset_index(drop=True, inplace=True)

df1.to_csv('toy_runs/messages_normal.csv', index=False)
df3.to_csv('toy_runs/messages_reversed.csv', index=False)

# Process data to be ready for Bayesian inference

paths = ['toy_runs/messages_normal.csv', 'toy_runs/messages_reversed.csv']

dfs = [pd.read_csv(path) for path in paths]

delta = [1, -1] # Add indicator for framing
for i, df in enumerate(dfs):
    df["d"] = delta[i%2]


# add new column for timstep
for df in dfs:
    for i in range(len(df)):
        df.loc[i, 'timestamp'] = i%6

# remove invalid probings
for df in dfs:
    idx_old = []

    for i, (a, b) in enumerate(zip(df.loc[:, 'PROBE_INT_A'], df.loc[:, 'PROBE_INT_B'])):
        if a < 1 or b < 1:
            # save indeces to be removed
            idx = df[df.loc[:, 'ID'] == df.loc[i, 'ID']].index
            idx_old = np.concatenate([idx_old, idx])

    df.drop(idx_old, inplace=True)

    df.reset_index(drop=True, inplace=True)

    print("Invalid probing:", 100-len(df)/3750*100, "%")


# convert opinion to -2 to 2

for df in dfs:
    df.loc[:, "PROBE_INT_A"] -= 3
    df.loc[:, "PROBE_INT_B"] -= 3
    df.loc[:, "INIT_OP_A"] -= 3
    df.loc[:, "INIT_OP_B"] -= 3


# concatentate all dfs

df = pd.concat(dfs)
df.reset_index(drop=True, inplace=True)


# Create new dataframe to hold processed data

data = pd.DataFrame({
        "dx": pd.Series(dtype='float'), "x_i": pd.Series('float'), "x_j": pd.Series('float'), 
        "H_i": pd.Series('float'), "H_j": pd.Series('float'),
        "delta": pd.Series('int'), "t": pd.Series('int'), "is_initiator": pd.Series('int'),
        "init_x_i": pd.Series('float'), "init_x_j": pd.Series('float')
})


i = 0
q = 1

for c in range(int(len(df))):

    # first agent
    data.loc[2*c, "dx"] = df.loc[i+1, "PROBE_INT_A"] - df.loc[i, "PROBE_INT_A"]
    data.loc[2*c, "x_i"] = df.loc[i, "PROBE_INT_A"]
    data.loc[2*c, "x_j"] = df.loc[i, "PROBE_INT_B"]

    data.loc[2*c, "delta"] = df.loc[i, "d"]
    data.loc[2*c, "t"] = df.loc[i, "timestamp"]
    data.loc[2*c, "is_initiator"] = 1
  
    data.loc[2*c, "H_i"] = df.loc[i, "H_INT_A"]
    data.loc[2*c, "H_j"] = df.loc[i, "H_INT_B"]
    data.loc[2*c, "init_x_i"] = df.loc[i, "INIT_OP_A"]
    data.loc[2*c, "init_x_j"] = df.loc[i, "INIT_OP_B"]

    # second agent
    data.loc[2*c + 1, "dx"] = df.loc[i+1, "PROBE_INT_B"] - df.loc[i, "PROBE_INT_B"]
    data.loc[2*c + 1, "x_i"] = df.loc[i, "PROBE_INT_B"]
    data.loc[2*c + 1, "x_j"] = df.loc[i, "PROBE_INT_A"]

    data.loc[2*c + 1, "delta"] = df.loc[i, "d"]
    data.loc[2*c + 1, "t"] = df.loc[i, "timestamp"]
    data.loc[2*c + 1, "is_initiator"] = 0

    data.loc[2*c + 1, "H_i"] = df.loc[i, "H_INT_B"]
    data.loc[2*c + 1, "H_j"] = df.loc[i, "H_INT_A"]
    data.loc[2*c + 1, "init_x_i"] = df.loc[i, "INIT_OP_B"]
    data.loc[2*c + 1, "init_x_j"] = df.loc[i, "INIT_OP_A"]

    if q == 5:
        i += 2
        q = 1
    else:
        i += 1
        q += 1        
    
    if i==7500:
        break

    print(i, end='\r')

data.to_csv('your_data.csv', index=False)