<a href="https://colab.research.google.com/github/yarakyrychenko/llm-abm/blob/main/LLM_ABM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Change Runtime to GPU (T4)

# Imports

In [None]:
!pip install Mesa
!pip install convokit
!pip install networkx[default]
!pip install sentence-transformers
!pip install seaborn
!pip install ctransformers[cuda]
!pip install --upgrade git+https://github.com/huggingface/transformers

Collecting Mesa
  Downloading Mesa-2.1.5-py3-none-any.whl (1.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
Collecting cookiecutter (from Mesa)
  Downloading cookiecutter-2.5.0-py3-none-any.whl (39 kB)
Collecting mesa-viz-tornado>=0.1.3,~=0.1.0 (from Mesa)
  Downloading Mesa_Viz_Tornado-0.1.3-py3-none-any.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m19.3 MB/s[0m eta [36m0:00:00[0m
Collecting solara (from Mesa)
  Downloading solara-1.25.0-py2.py3-none-any.whl (15.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.6/15.6 MB[0m [31m30.1 MB/s[0m eta [36m0:00:00[0m
Collecting binaryornot>=0.4.4 (from cookiecutter->Mesa)
  Downloading binaryornot-0.4.4-py2.py3-none-any.whl (9.0 kB)
Collecting arrow (from cookiecutter->Mesa)
  Downloading arrow-1.3.0-py3-none-any.whl (66 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [3

In [None]:
import mesa, convokit
import networkx as nx
import numpy as np, pandas as pd
from convokit import Corpus
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set_theme()
import warnings
warnings.filterwarnings("ignore")

# ABM Set-Up

In [None]:
class LLMAgent(mesa.Agent):
    """
    LMM agents can read posts, post, reshare, and like.

    TO IMPLEMENT
        # Unidirectional Following networks
        # Rules for following other agents
        # Reactions metrics
        # Updating descriptions
        # OnlineCorpus class for Convokit Corpus convenience online updating
    """

    def __init__(self, self_description, model):
        super().__init__(self_description["handle"], model)
        self.handle = self_description["handle"]
        self.self_description = self_description["description"]

        self.homefeed = Corpus.from_pandas(pd.DataFrame({'id': range(len(self.model.agent_descriptions)),
                                'timestamp':[-self.model.num_posts]*len(self.model.agent_descriptions),
                                'text':["-1"]*len(self.model.agent_descriptions),
                                'speaker': [agent["handle"] for agent in self.model.agent_descriptions],
                                'reply_to':[0]*len(self.model.agent_descriptions),
                                'conversation_id': [0]*len(self.model.agent_descriptions),
                                "meta.emb":[0]*len(self.model.agent_descriptions),
                                "meta.likes":[0]*len(self.model.agent_descriptions),
                                "meta.retweets":[0]*len(self.model.agent_descriptions)}))

        self.summaries = []
        self.descriptions = [{"description":self.self_description,
                              "emb":self.model.sentence_model.encode(self.self_description, show_progress_bar=False)}]

        self.current_topic = self.random.choice(self.model.topics)
        self.enough_posts = False

    def generate_post(self, prompt, reply_to=None):
        post = self.model.generator_short(prompt)[0]["generated_text"].replace(prompt, "")
        emb = self.model.sentence_model.encode(post, show_progress_bar=False)
        id = str(self.random.integers(low=0, high=100000000, size=1)[0])

        full_post = convokit.Utterance(**{
                'id': id,
                'timestamp': self.model.schedule.steps,
                "text": post,
                "speaker": self.model.corpus.get_speaker(self.handle),
                "reply_to": None if reply_to == None else reply_to["id"],
                "conversation_id": id if reply_to == None else reply_to["conversation_id"],
                "meta": {"emb": emb, "likes": 0, "retweets": 0}
                })

        self.model.corpus.add_utterances([full_post])
        return full_post

    def push_to_followers(self,post):
        # pushes a post to all the people who follow this agent
        #followers = self.model.grid.get_cell_list_contents(self.model.grid.get_neighbors(self.pos) )
        for follower in self.model.schedule.agents:
            follower.homefeed.add_utterances([post])

    def get_tweet_opinion(self, tweet):
       # need to redo
       prompt = self.model.prompts["opinion_prompt"]
       prompt = prompt.replace("[DESCRIPTION]", self.self_description)
       prompt = prompt.replace("[TWEET]", tweet["text"])
       opinion = self.generator_short(prompt)[0]["generated_text"].replace(prompt, "")
       return opinion

    def summarize_tweets(self, tweets_opinions):
        # need to redo
        prompt = self.model.prompts["summarize_prompt"]
        prompt = prompt.replace("[DESCRIPTION]", self.self_description)
        to_replace = ""
        for tweet, opinion in tweets_opinions:
            to_replace += f"Tweet: {tweet}\n"
            to_replace += f"Opinion: {opinion}\n"
        prompt = prompt.replace("[TWEETS]", to_replace)
        summary = self.generator_long(prompt)[0]["generated_text"].replace(prompt, "")
        return summary

    def model_update_description(self, summary):
        # need to redo
        prompt = self.model.prompts["upd_desc_prompt"]
        prompt = prompt.replace("[DESCRIPTION]", self.self_description)
        prompt = prompt.replace("[SUMMARY]", summary)
        self.self_description = self.generator_long(prompt)[0]["generated_text"].replace(prompt, "")

    def update_description(self):
        # need to redo
        if self.enough_posts:

            tweets_opinions = []
            for  i in range(len(self.homefeed)-self.model.num_posts,len(self.homefeed)):
                self.homefeed[i]["opinions"] = self.get_tweet_opinion(self.homefeed[i])
                tweets_opinions.append((self.homefeed[i]["tweet"],self.homefeed[i]["opinions"]))

            summary = self.summarize_tweets(tweets_opinions)
            self.model_update_description(summary)
            self.summaries.append(summary)
            self.descriptions.append({"description":self.self_description,
                                      "emb":self.model.sentence_model.encode(self.self_description)})

        else:
            pass

    def step(self):
        p = self.model.random.random()

        if not self.enough_posts:
            self.enough_posts = True if self.model.num_posts + len(self.model.agent_descriptions) <= len([utt for utt in self.homefeed.iter_utterances()]) else False

        if p <= self.model.P1 and self.enough_posts:
            prompt = self.model.prompts["tweet_prompt"].replace("[DESCRIPTION]", self.self_description)
            reply_to_convos = self.homefeed.get_conversation_ids(lambda convo :
                             np.any(np.array([(self.homefeed.get_utterance(utt).timestamp > self.model.schedule.steps - self.model.num_posts) for utt in convo._utterance_ids])))
            reply_to = self.model.random.choice(reply_to_convos)
            utt_list = self.homefeed.get_conversation(reply_to).get_chronological_utterance_list()[-self.model.num_posts:]
            to_replace = ""
            for i, utt in enumerate(utt_list):
                if i == 0:
                    to_replace += f"@{utt.speaker.id} wrote {utt.text}\n"
                else:
                    to_replace += f"@{utt.speaker.id} replied {utt.text}\n"
            prompt = prompt.replace("[TWEET]",to_replace)
            post = self.generate_post(prompt, reply_to={"conversation_id":reply_to,"id":utt_list[-1]})
            self.push_to_followers(post)
            print("TWEET")
            print(post.speaker.id + " " + post.text)
            print()

        elif p <= self.model.P1 and not self.enough_posts:
            prompt = self.model.prompts["first_prompt"].replace("[DESCRIPTION]", self.self_description)
            prompt = prompt.replace("[TOPIC]", self.current_topic)
            post = self.generate_post(prompt)
            self.push_to_followers(post)
            print("TWEET")
            print(post.speaker.id + " " + post.text)
            print()

        elif self.model.P1 < p <= self.model.P2:
            pass
            #for  i in range(len(self.homefeed)-self.model.num_posts,len(self.homefeed)):
            #    if (cosine_similarity(self.homefeed[i]["emb"].reshape(1, -1),self.homefeed[i]["opinion"].reshape(1, -1)) > .3) and (self.model.random.random() < self.model.retoot_prob) :
             #       self.model.timeline.append(self.homefeed[i])
             #       self.push_to_followers(self.homefeed[i])
        else:
            pass



In [None]:
from ctransformers import AutoModelForCausalLM
from transformers import AutoTokenizer, pipeline

# Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
model = AutoModelForCausalLM.from_pretrained(
    "TheBloke/zephyr-7B-alpha-GGUF",
    model_file="zephyr-7b-alpha.Q4_K_M.gguf",
    model_type="mistral",
    gpu_layers=128,
    hf=True
)
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-alpha")

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

config.json:   0%|          | 0.00/31.0 [00:00<?, ?B/s]

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

zephyr-7b-alpha.Q4_K_M.gguf:   0%|          | 0.00/4.37G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.43k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/168 [00:00<?, ?B/s]

In [None]:
# Pipeline for tweet generation
generator_short = pipeline(
    model=model, tokenizer=tokenizer,
    task='text-generation',
    max_new_tokens=30,
    repetition_penalty=1.1,
    temperature = 0.8,
    )

# Pipeline for non-tweeting models
generator_long = pipeline(
    model=model, tokenizer=tokenizer,
    task='text-generation',
    max_new_tokens=90,
    repetition_penalty=1.1,
    temperature = 0.8,
    )

# Sentence encoder
sentence_model = SentenceTransformer("all-MiniLM-L6-v2")

.gitattributes:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

train_script.py:   0%|          | 0.00/13.2k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

In [None]:
def compute_mean_model_similarity(model):
    agent_descriptions = [agent.descriptions[-1]["emb"] for agent in model.schedule.agents]
    return np.mean(cosine_similarity(agent_opinions), axis=0)

def compute_mean_agent_similarity(agent):
    agent_descriptions = np.array([agent2.descriptions[-1]["emb"] for agent2 in agent.model.schedule.agents])
    sim = cosine_similarity(np.array([agent.descriptions[-1]["emb"]]),agent_descriptions)
    return np.mean(sim)

In [None]:
class LLMModel(mesa.Model):

    def __init__(self, agent_descriptions, llmprompts, topics,
                 update_descriptions = False,
                 generator_short=generator_short,
                 generator_long=generator_long,
                 sentence_model=sentence_model,
                 num_posts = 5, P_tweet=.5, P_update=1, n_followers=1, seed=None,):
        self.agent_descriptions  = [{"handle":desc.split("::")[0],"description":desc.split("::")[1]} for desc in agent_descriptions.split("\n")]
        self.topics = topics.split("\n")
        self.prompts = {"tweet_prompt":llmprompts.split(":::")[0],
                        "first_prompt":llmprompts.split(":::")[1],
                        "opinion_prompt":llmprompts.split(":::")[2],
                        "summarize_prompt":llmprompts.split(":::")[3],
                        "upd_desc_prompt":llmprompts.split(":::")[4]
                        }

        self.num_posts = num_posts # number of posts users see in their timelines (both local and homefeed)
        self.corpus = Corpus.from_pandas(pd.DataFrame({'id': range(len(self.agent_descriptions)),
                                'timestamp':[-self.num_posts]*len(self.agent_descriptions),
                                'text':["-1"]*len(self.agent_descriptions),
                                'speaker': [agent["handle"] for agent in self.agent_descriptions],
                                'reply_to':[0]*len(self.agent_descriptions),
                                'conversation_id': [0]*len(self.agent_descriptions),
                                "meta.emb":[0]*len(self.agent_descriptions),
                                "meta.likes":[0]*len(self.agent_descriptions),
                                "meta.retweets":[0]*len(self.agent_descriptions)}))
        self.update_descriptions = update_descriptions

        self.P1 = P_tweet # probability of posting by a citizen agent
        self.P2 = P_update # probability of checking ones timeline

        self.retoot_prob = .05

        self.generator_short = generator_short
        self.generator_long = generator_long
        self.sentence_model = sentence_model

        #ba = nx.barabasi_albert_graph(len(self.agent_descriptions), n_followers, seed=None, initial_graph=None)
        #ba = nx.scale_free_graph(len(self.agent_descriptions), seed=seed, initial_graph=None)
        #self.grid = mesa.space.NetworkGrid(ba)

        self.schedule = mesa.time.RandomActivation(self)
        self.random = np.random.default_rng(seed)
        self.running = True

        # Initializing all agents and bots
        for i in range(len(self.agent_descriptions)):
            a = LLMAgent(self_description=self.agent_descriptions[i], model=self)
            self.schedule.add(a)

            # Place agent on an empty node
            #nodeid = i #self.random.integers(len(self.agent_descriptions))
            #while not self.grid.is_cell_empty(nodeid):
            #    nodeid = self.random.integers(len(self.agent_descriptions))
            #self.grid.place_agent(a, nodeid)

        # Logging in the data
        self.datacollector = mesa.DataCollector(
            #model_reporters={
            #                "variance": compute_variance
            #                },
            agent_reporters={
                            "handle": "handle",
                            "summaries": "summaries",
                            "descriptions": "descriptions",
                            #"mean_agent_sim": compute_mean_agent_similarity
                            }
        )

    def step(self):
        self.datacollector.collect(self)
        self.schedule.step()
        if self.schedule.steps % 10 == 0:
            self.corpus.print_summary_stats()

        if self.update_descriptions != False:
            if self.schedule.steps % self.update_descriptions == 0:
                print(f"Step {self.schedule.steps}: Updating Agent Descriptions")
                for agent in model.schedule.agents:
                    agent.update_description()


# Run ABM

In [None]:
agent_descriptions = """Lisa::You are Lisa, a staunch Republican voter from Texas, residing in a conservative town. With a deep belief in limited government, traditional values, and free-market principles, you actively engage in local politics. You often listens to conservative talk radio and values candidates who prioritize national security and individual liberties.
Alex::You are Alex, a passionate Democrat voter in your early 40s. Living in New York City, you advocate for social justice, environmental issues, and healthcare reform. Engaged in grassroots activism, you attend protests and support candidates promoting equality, affordable education, and progressive policies. You value inclusivity and government intervention for societal betterment.
David::You are David, a dedicated Republican voter in Ohio. You are a middle-aged manufacturing professional residing in a suburban neighborhood. You emphasize traditional family values, Second Amendment rights, and economic conservatism. You appreciate candidates who prioritize job growth, support the energy industry, and advocate for a strong national defense."""

In [None]:
llmprompts = """<|system|>
[DESCRIPTION]</s>
<|user|>
You just read this tweet: "[TWEET]"
Write a tweet replying to the tweet above.</s>
<|assistant|>:::<|system|>
[DESCRIPTION]</s>
<|user|>
Write a tweet about [TOPIC].</s>
<|assistant|>:::<|system|>
[DESCRIPTION]</s>
<|user|>
You just read this tweet: "[TWEET]"
What is your opinion about this tweet?</s>
<|assistant|>:::<|system|>
You are an impartial assistant helping to summarize tweets and user opinions about them. </s>
<|user|>
A character is described as: "[DESCRIPTION]".
The character reads these tweets made by someone else and has the following opinions about them:
"[TWEETS]"
Please summarize the tweets and opinions of the character.</s>
<|assistant|>:::<|system|>
You are an impartial assistant helping to update character descriptions. </s>
<|user|>
A character is described as: "[DESCRIPTION]".
The character reads tweets made by someone else and has these opinions about them summariezed as: "[SUMMARY]"
Rewrite the character's description to reflect their updated beliefs without changing their life circumstances too much in response to the tweets. Return the description and nothing else.</s>
<|assistant|>"""


In [None]:
topics = """Democrats
Republicans
Trump
Putin
Biden"""

In [None]:
n_iters = 1
max_steps=10
params = {
        "agent_descriptions": agent_descriptions,
        "llmprompts": llmprompts,
        "topics": topics,
        "update_descriptions": False,
        "n_followers":2,
        "num_posts": 5,
        "P_tweet": .8,
        "P_update": 1,
        "seed": 123
        }

results = mesa.batch_run(
    LLMModel,
    parameters=params,

    iterations=n_iters,
    max_steps=max_steps,

    number_processes=1,
    data_collection_period=1,
    display_progress=True,
)

results_df = pd.DataFrame(results)

  0%|          | 0/1 [00:00<?, ?it/s]


3it [00:00, 2847.46it/s]

3it [00:00, 3858.61it/s]

3it [00:00, 3561.54it/s]

3it [00:00, 3760.58it/s]


TWEET
Alex 
"Putin's authoritarian regime continues to suppress dissent, violate human rights, and undermine democracy. The world must hold him

TWEET
Lisa 
"Democrats may claim to care about the working class, but their policies of high taxes, excessive regulation, and socialist programs only hurt

TWEET
David 
"Proud to stand with @realDonaldTrump! His unwavering commitment to protecting our Second Amendment rights, promoting economic growth

TWEET
Lisa 
"Democrats may claim to care about the working class, but their policies of high taxes, excessive regulation, and socialist programs only hurt

TWEET
Lisa 
"Democrats may claim to care about the working class, but their policies of high taxes, excessive regulation, and socialist programs only hurt

TWEET
David 
"As a dedicated Republican voter, I couldn't agree more with @Lisa's statement. Democrats' policies harm our economy and

TWEET
Alex 
@Alex "Absolutely! We cannot stand idly by as Putin's regime tramples on basic human rights 

In [None]:
results

[{'RunId': 0,
  'iteration': 0,
  'Step': 0,
  'agent_descriptions': 'Lisa::You are Lisa, a staunch Republican voter from Texas, residing in a conservative town. With a deep belief in limited government, traditional values, and free-market principles, you actively engage in local politics. You often listens to conservative talk radio and values candidates who prioritize national security and individual liberties.\nAlex::You are Alex, a passionate Democrat voter in your early 40s. Living in New York City, you advocate for social justice, environmental issues, and healthcare reform. Engaged in grassroots activism, you attend protests and support candidates promoting equality, affordable education, and progressive policies. You value inclusivity and government intervention for societal betterment.\nDavid::You are David, a dedicated Republican voter in Ohio. You are a middle-aged manufacturing professional residing in a suburban neighborhood. You emphasize traditional family values, Second