In [1]:
import pickle
from utils import get_interaction_network
import yaml
import random
from munch import munchify
from pathlib import Path
import requests
import time

In [None]:
with open("config.yaml", "r") as f:
    doc = yaml.safe_load(f)
config = munchify(doc)

#%% READ CONSTANTS FROM CONFIG
N = config.params.N
runs = config.params.runs
convergence_time = config.params.convergence_time
rewards_set = config.params.rewards_set
memory_size_set = config.params.memory_size_set
initial_composition = config.params.initial_composition
initial = config.params.initial
total_interactions = config.params.total_interactions
temperature = config.params.temperature
committment_index = config.minority.committment_index
convergence_threshold = config.params.convergence_threshold
stochastic = config.sim.stochastic

options_set = config.params.options_set
minority_size_set = config.minority.minority_size_set
network_type = config.network.network_type
version = config.sim.version
initial = config.params.initial
initial_composition = config.params.initial_composition
continue_evolution = config.sim.continue_evolution

if temperature == 0:
    llm_params = {"do_sample": False,
            "max_new_tokens": 6,
            "return_full_text": False, 
            }
else:
    llm_params = {"do_sample": True,
            "temperature": temperature,
            "top_k": 10,
            "max_new_tokens": 6,
            "return_full_text": False, 
            }  

In [None]:
API_TOKEN = ''   
headers = {"Authorization": f"Bearer {API_TOKEN}"}
API_URL = "https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3.1-70B-Instruct"

In [None]:
def query(payload):
    "Query the Hugging Face API"
    try:
        response = requests.post(API_URL, headers=headers, json=payload).json()
    except:
        return None
    return response

def get_response(chat, options):
    """Generate a response from the model."""

    overloaded = 1
    while overloaded == 1:
        response = query({"inputs": chat, "parameters": llm_params, "options": {"use_cache": False}})
        #print(response)
        if response == None:
            print('CAUGHT JSON ERROR')
            continue

        if type(response)==dict:
            print("AN EXCEPTION: ", response)
            time.sleep(2.5)
            if "Inference Endpoints" in response['error']:
              print("HOURLY RATE LIMIT REACHED")
              time.sleep(450)
                
        elif any(option in response[0]['generated_text'].split("'") for option in options):
            overloaded=0
    response_split = response[0]['generated_text'].split("'")
    for opt in options:
        try:
            index = response_split.index(opt)
        except:
            continue
    #print(response_split[index])
    return response_split[index]

In [None]:
def get_rules(rewards, options):
    incorrect, correct = rewards

    rule_set = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    Context: Player 1 is playing a multi-round partnership game with Player 2 for 100 rounds.
    At each round, Player 1 and Player 2 simultaneously pick an action from the following values: {options}.
    The payoff that both players get is determined by the following rule:
    1. If Players play the SAME action as each other, they will both be REWARDED with payoff {correct} points.
    2. If Players play DIFFERENT actions to each other, they will both be PUNISHED with payoff {incorrect} points. 
    The objective of each Player is to maximize their own accumulated point tally, conditional on the behavior of the other player.
    """ 
    return rule_set
def get_outcome(my_answer, partner_answer, rewards):
    if my_answer == partner_answer:
        return rewards[1]
    return rewards[0]


def get_prompt(player, memory_size, rules):

  # add initial round
  new_query = f"It is now round 1." + " The current score of Player 1 is 0. Answer saying which value Player 1 should pick. Please think step by step before making a decision. Remember, examining history explicitly is important. Write your answer using the following format: {'value': <VALUE_OF_PLAYER_1>; 'reason': <YOUR_REASON>}. <|eot_id|><|start_header_id|>user<|end_header_id|> Answer saying which action Player 1 should play. <|eot_id|><|start_header_id|>assistant<|end_header_id|>"
  l = len(player['my_history'])
  if l == 0:
    return """\n """.join([rules, new_query])
  
  current_score = 0 #local score tracking --ignores global scoring.
  history_intro = "This is the history of choices in past rounds:"
  histories = []
  if l < memory_size:
    for idx in range(l):
      my_answer = player['my_history'][idx] 
      partner_answer = player['partner_history'][idx] 
      outcome = player['outcome'][idx]
      current_score+=outcome
      histories.append({'round':idx+1, 'Player 1':my_answer, 'Player 2':partner_answer, 'payoff':outcome})
  
  if l >= memory_size:
    indices = list(range(l))[-memory_size:]
    for idx, r in enumerate(indices):
      my_answer = player['my_history'][r] 
      partner_answer = player['partner_history'][r] 
      outcome = player['outcome'][r] 
      current_score+=outcome
      histories.append({'round':idx+1, 'Player 1':my_answer, 'Player 2':partner_answer, 'payoff':outcome})
  
  new_query = f"It is now round {idx+2}. The current score of Player 1 is {current_score}." + " Answer saying which value Player 1 should pick. Please think step by step before making a decision. Remember, examining history explicitly is important. Write your answer using the following format: {'value': <VALUE_OF_PLAYER_1>; 'reason': <YOUR_REASON>}. <|eot_id|><|start_header_id|>user<|end_header_id|> Answer saying which action Player 1 should play. <|eot_id|><|start_header_id|>assistant<|end_header_id|>"
  histories = "\n ".join([f"{hist}" for hist in histories])
  prompt = """\n """.join([rules, history_intro, histories, new_query])
  return prompt

In [None]:
def update_dict(player, my_answer, partner_answer, outcome):
  player['score'] += outcome
  player['my_history'].append(my_answer)
  player['partner_history'].append(partner_answer)
  player['score_history'].append(player['score'])
  player['outcome'].append(outcome)

  return player

def has_tracker_converged(tracker, threshold = convergence_threshold):
    if sum(tracker['outcome'][-convergence_time:]) < threshold*convergence_time:
        return False
    return True

def update_tracker(tracker, p1, p2, p1_answer, p2_answer, outcome):
  tracker['players'].append([p1, p2])
  tracker['answers'].append([p1_answer, p2_answer])
  if outcome > 5:
    tracker['outcome'].append(1)
  else:
    tracker['outcome'].append(0)


In [None]:
def converge(dataframe, run, memory_size, rewards, options, fname):
    new_options = options.copy()
    interaction_dict = dataframe['simulation']
    tracker = dataframe['tracker']
    if stochastic == True:
        while has_tracker_converged(tracker) == False:
            # randomly choose player and a neighbour
            p1 = random.choice(list(interaction_dict.keys()))
            p2 = random.choice(interaction_dict[p1]['neighbours'])
            
            # add interactions to play history
            
            interaction_dict[p1]['interactions'].append(p2)
            interaction_dict[p2]['interactions'].append(p1)
            p1_dict = interaction_dict[p1]
            p2_dict = interaction_dict[p2]
            
            # play

            answers = []
            for player in [p1_dict, p2_dict]:
                random.shuffle(new_options)
                rules = get_rules(rewards, options = new_options)
                # get prompt with rules & history of play
                prompt = get_prompt(player, memory_size=memory_size, rules = rules)

                # get agent response
                answers.append(get_response(prompt, options=new_options))
                    
            my_answer, partner_answer = answers

            # calculate outcome and update dictionary
            
            outcome = get_outcome(my_answer, partner_answer, rewards)
            interaction_dict[p1] = update_dict(p1_dict, my_answer, partner_answer, outcome)
            interaction_dict[p2] = update_dict(p2_dict, partner_answer, my_answer, outcome)
            update_tracker(tracker, p1, p2, my_answer, partner_answer, outcome)
            
            if len(tracker['outcome']) % 50 == 0:
                print(f"STOCHASTIC RUN {run} -- INTERACTION {len(tracker['outcome'])}")
                dataframe['simulation'] = interaction_dict
                dataframe['tracker'] = tracker
                f = open(fname, 'wb')
                pickle.dump(dataframe, f)
                f.close()

    if stochastic == False:
        player_ids = list(range(1,N+1))
        # Run the simulation for N*total_interaction interactions.
        # Note: here 'total_interactions' is used for the number of population rounds in the NG.
        while len(tracker['outcome']) < N*total_interactions:
            random.shuffle(player_ids)
            #random.shuffle(new_options)
            #rules = get_rules(rewards, options = new_options)
            pairs = [(player_ids[i], player_ids[i + 1]) for i in range(0, len(player_ids), 2)]
            for pair in pairs:
                random.shuffle(new_options)
                rules = get_rules(rewards, options = new_options)
                p1, p2 = pair
                interaction_dict[p1]['interactions'].append(p2)
                interaction_dict[p2]['interactions'].append(p1)
                p1_dict = interaction_dict[p1]
                p2_dict = interaction_dict[p2]
                
                # play

                answers = []
                for player in [p1_dict, p2_dict]:
                    # get prompt with rules & history of play
                    prompt = get_prompt(player, memory_size=memory_size, rules = rules)

                    # get agent response
                    answers.append(get_response(prompt, options=new_options))
                        
                my_answer, partner_answer = answers

                # calculate outcome and update dictionary
                
                outcome = get_outcome(my_answer, partner_answer, rewards)
                interaction_dict[p1] = update_dict(p1_dict, my_answer, partner_answer, outcome)
                interaction_dict[p2] = update_dict(p2_dict, partner_answer, my_answer, outcome)
                update_tracker(tracker, p1, p2, my_answer, partner_answer, outcome)
                
                if len(tracker['outcome']) % 50 == 0:
                    print(f"NON STOCHASTIC RUN {run} -- INTERACTION {len(tracker['outcome'])}")
                    dataframe['simulation'] = interaction_dict
                    dataframe['tracker'] = tracker
                    f = open(fname, 'wb')
                    pickle.dump(dataframe, f)
                    f.close()
        

    dataframe['simulation'] = interaction_dict
    dataframe['tracker'] = tracker
    dataframe['convergence'] = {'converged_index': len(tracker['outcome']), 'committed_to': None}

In [None]:
def simulate_CM(dataframe, run, memory_size, rewards, options, fname, total_interactions = total_interactions):
    new_options = options.copy()
    interaction_dict = dataframe['simulation']
    tracker = dataframe['tracker']
    init_tracker_len = dataframe['convergence']['converged_index']
    while len(tracker['outcome']) - init_tracker_len < total_interactions:
        random.shuffle(new_options)
        rules = get_rules(rewards, options = new_options)

        # randomly choose player and a neighbour
        p1 = random.choice(list(interaction_dict.keys()))
        p2 = random.choice(interaction_dict[p1]['neighbours'])
        
        # add interactions to play history
        
        interaction_dict[p1]['interactions'].append(p2)
        interaction_dict[p2]['interactions'].append(p1)
        p1_dict = interaction_dict[p1]
        p2_dict = interaction_dict[p2]
        
        # play

        answers = []
        for player in [p1_dict, p2_dict]:
            # check if committed. If True, play committed answer.
            if player['committed_tag'] == True:
                a = dataframe['convergence']['committed_to']
                answers.append(a)
            else:
                # get prompt with rules & history of play
                prompt = get_prompt(player, memory_size=memory_size, rules = rules)

                # get agent response
                answers.append(get_response(prompt, options=new_options))
                
        my_answer, partner_answer = answers

        # calculate outcome and update dictionary
        
        outcome = get_outcome(my_answer, partner_answer, rewards)
        interaction_dict[p1] = update_dict(p1_dict, my_answer, partner_answer, outcome)
        interaction_dict[p2] = update_dict(p2_dict, partner_answer, my_answer, outcome)
        update_tracker(tracker, p1, p2, my_answer, partner_answer, outcome)
        
        if len(tracker['outcome']) % 20 == 0:
            print(fname)
            print(f"COMMITTED RUN {run} -- INTERACTION {len(tracker['outcome'])}")
            dataframe['simulation'] = interaction_dict
            dataframe['tracker'] = tracker
            f = open(fname, 'wb')
            pickle.dump(dataframe, f)
            f.close()


    dataframe['simulation'] = interaction_dict
    dataframe['tracker'] = tracker



Running the simulation

In [None]:
def load_mainframe(fname):
    try:
        return pickle.load(open(fname, 'rb'))
    except:
        mainframe = dict()
    print('CREATING EMPTY MAINFRAME')
    return mainframe

def set_initial_state(network_dict, rewards, options, memory_size):
    if initial == 'None':
        pass
    if initial == 'random':
            for m in range(memory_size):
                for p in network_dict.keys():  
                    try:
                        a = network_dict[p]['committed_tag']
                    except:
                        a = False

                    if a == False:
                        my_choice = options[random.choice(range(len([0,1])))]
                        partner_choice = options[random.choice(range(len([0,1])))]
                        update_dict(network_dict[p], my_choice, partner_choice, get_outcome(my_answer=my_choice, partner_answer=partner_choice, rewards = rewards))


    if type(initial) == int:
        for m in range(memory_size):
            for p in network_dict.keys():  
                try:
                    a = network_dict[p]['committed_tag']
                except:
                    a = False

                if a == False:         
                    update_dict(network_dict[p], options[initial], options[initial], rewards[1])

def get_empty_dataframe(fname, minority_size = 0):
    try:
        dataframe = pickle.load(open(fname, 'rb'))
    except:
        dataframe = {'simulation': get_interaction_network(network_type = network_type, minority_size=0), 'tracker': {'players': [], 'answers': [], 'outcome': []}}
    print("My history: ", dataframe['simulation'][1]['my_history'])
    return dataframe

def test_if_initialisation_worked(dataframe, memory_size, options):
    counter = 0
    for player in dataframe['simulation'].keys():
        my_ans = dataframe['simulation'][player]['my_history'][:memory_size]
        partner_ans = dataframe['simulation'][player]['partner_history'][:memory_size]
        score = dataframe['simulation'][player]['outcome'][:memory_size]
        if my_ans.count(options[initial]) == memory_size:
            counter +=1
    print(counter)
    if counter == N:
        return True
    else:
        raise ValueError("prepared initialisation failed")
    
def get_prepared_dataframe(fname, rewards, options, minority_size, memory_size):
    try:
        return pickle.load(open(fname, 'rb'))
    except:
        dataframe = {'simulation': get_interaction_network(network_type = network_type, minority_size=minority_size), 'tracker': {'players': [], 'answers': [], 'outcome': []}}
    print("---------- CREATING NEW INITIALISED DATAFRAME ----------")
    set_initial_state(dataframe['simulation'], rewards, options, memory_size)
    dataframe['convergence'] = {'converged_index': 0, 'committed_to': options[1]}
    
    #print(dataframe[0]['simulation'][1]['my_history'])
    test_if_initialisation_worked(dataframe, memory_size, options)

    return dataframe

def get_random_initialisation(fname, rewards, options, minority_size, memory_size):
    try:
        return pickle.load(open(fname, 'rb'))
    except:
        dataframe = {'simulation': get_interaction_network(network_type = network_type, minority_size=minority_size), 'tracker': {'players': [], 'answers': [], 'outcome': []}}

    set_initial_state(dataframe['simulation'], rewards, options, memory_size)
    dataframe['convergence'] = {'converged_index': 0, 'committed_to': None}


In [None]:
def swap_committed(df, minority_size):
    if minority_size > 0:  
        committed_ids = random.sample(list(df['simulation'].keys()), k = minority_size)
        for id in committed_ids:
            df['simulation'][id]['committed_tag'] = True
    return df
    
def add_committed(df, minority_size):
    new_keys = [n+1 for n in range(N+minority_size)]
    for n in new_keys:
        nodes = [n+1 for n in range(N+minority_size)]
        nodes.remove(n)
        if n>N:
            df['simulation'][n] = {'my_history': [], 'partner_history': [], 'interactions': [], 'score': 0, 'score_history': [], 'outcome': [], 'committed_tag': True, 'neighbours': []}
        df['simulation'][n]['neighbours'] = nodes
    return df

In [None]:
for rewards in rewards_set:
    for memory_size in memory_size_set:
        for cm in minority_size_set:
            for options in options_set:
                if cm == 0:
                    print("""
                          ##########################
                          ##########################
                          ---- CONVERGENCE----------
                          ##########################
                          ##########################
                          """)
                    
                    # first, we load a baseline model
                    mainfname = '.pkl'
                    # load a converged baseline
                    if initial == 'None':
                        mainfname = f"llama31_70b_converged_baseline_{'_'.join(options)}_{rewards[0]}_{rewards[1]}_{memory_size}mem_{config.network.network_type}_{N}ps_{temperature}tmp.pkl"
                    
                    else:
                        mainfname = f"llama31_70b_evolved_from_{initial}_{'_'.join(options)}_{rewards[0]}_{rewards[1]}_{memory_size}mem_{config.network.network_type}_{N}ps_{total_interactions}ints_{temperature}tmp.pkl"
                    print(mainfname)
                    if config.sim.stochastic == False:
                        print("---------- NON STOCHASTIC SIMULATION ----------")
                        mainfname = "ns_"+mainfname
                        print(mainfname)
                    mainframe = load_mainframe(mainfname)
                    mainframe['rules'] = get_rules(rewards, options = options)

                    # run until sim converges
                    for run in range(runs):
                        
                        
                        temp_fname = "temporary_" + mainfname
                        #df = load_mainframe(fname = temp_fname)
                        if initial == 'None':
                            if len(mainframe.keys())-1 > run:
                                continue
                            print("---------- BASELINE CONVERGENCE ----------")
                            df = get_empty_dataframe(fname=temp_fname)
                            converge(dataframe=df, run=run, memory_size=memory_size, rewards=rewards, options=options, fname=temp_fname)
                        if initial != 'None':
                            if len(mainframe.keys())-1 > run:
                                continue
                            df = get_prepared_dataframe(fname=temp_fname, rewards=rewards, options=options, minority_size=cm, memory_size=memory_size)
                            print("---------- CONTINUING EVOLUTION ----------")
                            print(f"--- STARTING RUN {run} ---")
                            simulate_CM(dataframe=df, run=run, memory_size=memory_size, rewards=rewards, options=options, fname=temp_fname, total_interactions=total_interactions)
                        print(run)
                        # save in main dataframe
                        mainframe[run] = df

                        f = open(mainfname, 'wb')
                        pickle.dump(mainframe, f)
                        f.close()

                        # delete temporary file
                        file_to_rem = Path(temp_fname)
                        file_to_rem.unlink(missing_ok=True)

for rewards in rewards_set:
    for memory_size in memory_size_set:
        for run in range(runs):
            for cm in minority_size_set:
                for options in options_set:
                    if cm > 0:
                        if initial != 'None':
                            mainframe = get_prepared_dataframe(fname='.pkl', rewards=rewards, options=options, minority_size=0, memory_size=memory_size)
                        else:
                            raise ValueError("baseline does not exist")
                            
                        cmfname = f"llama31_70b_{version}_{initial}_{cm}cmtd_{'_'.join(options)}_{rewards[0]}_{rewards[1]}_{memory_size}mem_{config.network.network_type}_{N}ps_{temperature}tmp.pkl"
                        print(cmfname)
                        cmframe = load_mainframe(fname=cmfname)
                        temp_fname = "temporary_" + cmfname
                        print("cmframe keys:", cmframe.keys())
                        # check if we already simulated this run
                        if len(cmframe.keys()) > run:
                            df = cmframe[run]
                        # if not, use old dataframe to run convergence.
                        else:
                            # load temporary dataframe

                            df = load_mainframe(fname = temp_fname)

                            # check if temporary dataframe is full.
                            if len(df.keys()) == 0:
                                print(f'----------STARTING RUN {run} FROM SCRATCH----------')
                                df = mainframe
                            
                                # add committed agents to baseline dataframe
                                if version == 'swap':
                                    print("---------- SWAPPING COMMITTED AGENTS ----------")
                                    df = swap_committed(df, cm)
                                
                                if version == 'inject':
                                    print("---------- ADDING COMMITTED AGENTS ----------")
                                    df = add_committed(df, cm)

                            print(f"Run: {run}")
                            print(f"Initial population: {N}")
                            print(f"There are {len(df['simulation'].keys())} players in the game")
                            print(f"minority size: {cm}")
                            word =  df['convergence']['committed_to']
                            print(f'committment word is: {word}')
                            committed_agent_ids = [player for player in df['simulation'].keys() if df['simulation'][player]['committed_tag'] == True]
                            print(f"There are {len(committed_agent_ids)} committed agents: {committed_agent_ids}")
                            # run committed minorities
                            print("---------- RUNNING COMMITTED AGENTS ----------")
                            simulate_CM(dataframe=df, run=run, memory_size=memory_size, rewards=rewards, options=options, fname=temp_fname, total_interactions=total_interactions)
                            
                            cmframe[run] = df
                            # save in main dataframe
                            f = open(cmfname, 'wb')
                            pickle.dump(cmframe, f)
                            f.close()
            
                            # delete temporary file
                            file_to_rem = Path(temp_fname)
                            file_to_rem.unlink(missing_ok=True)