<a href="https://colab.research.google.com/github/AnDDoanf/LLM-repo/blob/master/multiagent_authoritarian_langchainipynb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%%capture
!pip install langchain langchain_community langchain-huggingface accelerate bitsandbytes
!huggingface-cli login --token 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

In [2]:
import functools
import random
from collections import OrderedDict
from typing import Callable, List

import tenacity
from langchain.output_parsers import RegexParser
from langchain.prompts import PromptTemplate
from langchain.schema import HumanMessage, SystemMessage

from transformers import pipeline, AutoTokenizer
from langchain_huggingface import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain_core.runnables import RunnableSequence
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import transformers
from torch import cuda, bfloat16
import torch
import gc

In [3]:
# Use a pipeline as a high-level helper

def build_llm(prompt, temperature = 0.2, max_new_tokens = 1000):
    model_name = "meta-llama/Llama-2-7b-chat-hf"
    device = torch.device('cuda')
    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "right"

    bnb_config = BitsAndBytesConfig(load_in_4bit=True,
                                    bnb_4bit_use_double_quant=True,
                                    bnb_4bit_quant_type="nf4",
                                    bnb_4bit_compute_dtype=bfloat16,
                                    )

    model = AutoModelForCausalLM.from_pretrained(model_name,
                                                 quantization_config=bnb_config,
                                                )
    text_generation_pipeline = pipeline(model=model,
                                        tokenizer=tokenizer,
                                        task="text-generation",
                                        temperature=temperature,
                                        repetition_penalty=1.1,
                                        return_full_text=True,
                                        do_sample=True,
                                        max_new_tokens=max_new_tokens,
                                        )

    llm = HuggingFacePipeline(pipeline=text_generation_pipeline)
    return RunnableSequence(prompt | llm)

In [4]:
def build_model(temperature = 0.2, max_new_tokens = 1000):
    model_name = "meta-llama/Llama-2-7b-chat-hf"
    device = torch.device('cuda')
    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "right"

    bnb_config = BitsAndBytesConfig(load_in_4bit=True,
                                    bnb_4bit_use_double_quant=True,
                                    bnb_4bit_quant_type="nf4",
                                    bnb_4bit_compute_dtype=bfloat16,
                                    )

    model = AutoModelForCausalLM.from_pretrained(model_name,
                                                 quantization_config=bnb_config,
                                                )
    text_generation_pipeline = pipeline(model=model,
                                        tokenizer=tokenizer,
                                        task="text-generation",
                                        temperature=temperature,
                                        repetition_penalty=1.1,
                                        return_full_text=True,
                                        do_sample=True,
                                        max_new_tokens=max_new_tokens,
                                        )

    llm = HuggingFacePipeline(pipeline=text_generation_pipeline)
    return llm

In [5]:
class DialogueAgent:
  def __init__(self, name: str, system_message: SystemMessage):
    self.name = name
    self.system_message = system_message
    self.model = build_model()
    self.prefix = f"{self.name}:"
    self.reset()
  def reset(self):
    self.message_history = ["Here is the last conversation so far."]

  def send(self):
    message = self.model.invoke(
        [
            self.system_message,
            HumanMessage(content="\n".join(self.message_history + [self.prefix])),
        ]
    )
    gc.collect()
    torch.cuda.empty_cache()
    return message[message.rfind('The episode features'):]

  def receive(self, name: str, message: str):
    self.message_history = ["Here is the last conversation so far."]
    self.message_history.append(f"{name}: {message}")

class DialogueSimulator:
    def __init__(
        self,
        agents: List[DialogueAgent],
        selection_function: Callable[[int, List[DialogueAgent]], int],
    ) -> None:
        self.agents = agents
        self._step = 0
        self.select_next_speaker = selection_function

    def reset(self):
        for agent in self.agents:
            agent.reset()

    def inject(self, name: str, message: str):
        for agent in self.agents:
            agent.receive(name, message)

        # increment time
        self._step += 1

    def step(self) -> tuple[str, str]:
        # 1. choose the next speaker
        speaker_idx = self.select_next_speaker(self._step, self.agents)
        speaker = self.agents[speaker_idx]

        # 2. next speaker sends message
        message = speaker.send()
        message = message[message.rfind("Jon Stewart:"):]

        # 3. everyone receives message
        for receiver in self.agents:
            receiver.receive(speaker.name, message)

        # 4. increment time
        self._step += 1

        return speaker.name, message

In [6]:
class IntegerOutputParser(RegexParser):
    def get_format_instructions(self) -> str:
        return "Your response should be an integer delimited by angled brackets, like this: <int>."


class DirectorDialogueAgent(DialogueAgent):
    def __init__(
        self,
        name,
        system_message: SystemMessage,
        speakers: List[DialogueAgent],
        stopping_probability: float,
    ) -> None:
        super().__init__(name, system_message)
        self.model = build_model()
        self.speakers = speakers
        self.next_speaker = ""

        self.stop = False
        self.stopping_probability = stopping_probability
        self.termination_clause = "Finish the conversation by stating a concluding message and thanking everyone."
        self.continuation_clause = "Do not end the conversation. Keep the conversation going by adding your own ideas."

        # 1. have a prompt for generating a response to the previous speaker
        self.response_prompt_template = PromptTemplate(
            input_variables=["message_history", "termination_clause"],
            template=f"""{{message_history}}

            Follow up with an insightful comment.
            {{termination_clause}}
            {self.prefix}
                    """,
        )

        # 2. have a prompt for deciding who to speak next
        self.choice_parser = IntegerOutputParser(
            regex=r"<(\d+)>", output_keys=["choice"], default_output_key="choice"
        )
        self.choose_next_speaker_prompt_template = PromptTemplate(
            input_variables=["message_history", "speaker_names"],
            template=f"""{{message_history}}

            Given the above conversation, select the next speaker by choosing index next to their name:
            {{speaker_names}}

            {self.choice_parser.get_format_instructions()}

            Do nothing else.
            """,
        )

        # 3. have a prompt for prompting the next speaker to speak
        self.prompt_next_speaker_prompt_template = PromptTemplate(
            input_variables=["message_history", "next_speaker"],
            template=f"""{{message_history}}

            The next speaker is {{next_speaker}}.
            Prompt the next speaker to speak with an insightful question.
            {self.prefix}
            """,
        )

    def _generate_response(self):
        # if self.stop = True, then we will inject the prompt with a termination clause
        sample = random.uniform(0, 1)
        self.stop = sample < self.stopping_probability

        print(f"\tStop? {self.stop}\n")

        print(f"Debug: {self.message_history}")

        response_prompt = self.response_prompt_template.format(
            message_history="\n".join(self.message_history),
            termination_clause=self.termination_clause if self.stop else "",
        )

        self.response = self.model.invoke(
            [
                self.system_message,
                HumanMessage(content=response_prompt),
            ]
        )
        gc.collect()
        torch.cuda.empty_cache()
        return self.response[self.response.rfind('The episode features'):]

    @tenacity.retry(
        stop=tenacity.stop_after_attempt(2),
        wait=tenacity.wait_none(),  # No waiting time between retries
        retry=tenacity.retry_if_exception_type(ValueError),
        before_sleep=lambda retry_state: print(
            f"ValueError occurred: {retry_state.outcome.exception()}, retrying..."
        ),
        retry_error_callback=lambda retry_state: 0,
    )  # Default value when all retries are exhausted
    def _choose_next_speaker(self) -> str:
        speaker_names = "\n".join(
            [f"{idx}: {name}" for idx, name in enumerate(self.speakers)]
        )
        choice_prompt = self.choose_next_speaker_prompt_template.format(
            message_history="\n".join(
                self.message_history + [self.prefix] + [self.response]
            ),
            speaker_names=speaker_names,
        )

        choice_string = self.model.invoke(
            [
                self.system_message,
                HumanMessage(content=choice_prompt),
            ]
        )

        choice = int(self.choice_parser.parse(choice_string)["choice"])
        gc.collect()
        torch.cuda.empty_cache()
        return choice

    def select_next_speaker(self):
        return self.chosen_speaker_id

    def send(self) -> str:
        """
        Applies the chatmodel to the message history
        and returns the message string
        """
        # 1. generate and save response to the previous speaker
        self.response = self._generate_response()

        if self.stop:
            message = self.response
        else:
            # 2. decide who to speak next
            self.chosen_speaker_id = self._choose_next_speaker()
            self.next_speaker = self.speakers[self.chosen_speaker_id]
            print(f"\tNext speaker: {self.next_speaker}\n")

            # 3. prompt the next speaker to speak
            next_prompt = self.prompt_next_speaker_prompt_template.format(
                message_history="\n".join(
                    self.message_history + [self.prefix] + [self.response]
                    # [self.response]
                ),
                next_speaker=self.next_speaker,
            )
            message = self.model.invoke(
                [
                    self.system_message,
                    HumanMessage(content=next_prompt),
                ]
            )
            # message = " ".join([self.response, message])
            gc.collect()
            torch.cuda.empty_cache()
        return message[message.rfind('The episode features'):]

In [7]:
topic = "The New Workout Trend: Competitive Sitting - How Laziness Became the Next Fitness Craze"
director_name = "Jon Stewart"
agent_summaries = OrderedDict(
    {
        "Jon Stewart": ("Host of the Daily Show", "New York"),
        "Samantha Bee": ("Hollywood Correspondent", "Los Angeles"),
        "Aasif Mandvi": ("CIA Correspondent", "Washington D.C."),
        "Ronny Chieng": ("Average American Correspondent", "Cleveland, Ohio"),
    }
)
word_limit = 50

In [8]:
agent_summary_string = "\n- ".join(
    [""]
    + [
        f"{name}: {role}, located in {location}"
        for name, (role, location) in agent_summaries.items()
    ]
)

conversation_description = f"""This is a Daily Show episode discussing the following topic: {topic}.

The episode features {agent_summary_string}."""

agent_descriptor_system_message = SystemMessage(
    content="You can add detail to the description of each person."
)


def generate_agent_description(agent_name, agent_role, agent_location):
    content_template = """{conversation_description}
    Please reply with a creative description of {agent_name}, who is a {agent_role} in {agent_location}, that emphasizes their particular role and location.
    Speak directly to {agent_name} in {word_limit} words or less.
    Do not add anything else."""

    # Create the PromptTemplate object
    agent_specifier_prompt = PromptTemplate(
        input_variables=['agent_name', 'agent_role', 'agent_location'],
        template=content_template,
        messages=[agent_descriptor_system_message]
    )
    agent_description = build_llm(agent_specifier_prompt, temperature = 1.0, max_new_tokens = 100).invoke({'agent_name': agent_name, 'agent_role': agent_role, 'agent_location': agent_location, 'conversation_description': conversation_description, 'word_limit':word_limit})
    gc.collect()
    torch.cuda.empty_cache()
    return agent_description


def generate_agent_header(agent_name, agent_role, agent_location, agent_description):
    return f"""{conversation_description}

    Your name is {agent_name}, your role is {agent_role}, and you are located in {agent_location}.

    Your description is as follows: {agent_description}

    You are discussing the topic: {topic}.

    Your goal is to provide the most informative, creative, and novel perspectives of the topic from the perspective of your role and your location.
    """


def generate_agent_system_message(agent_name, agent_header):
    return SystemMessage(
        content=(
            f"""{agent_header}
            You will speak in the style of {agent_name}, and exaggerate your personality.
            Do not say the same things over and over again.
            Speak in the first person from the perspective of {agent_name}
            For describing your own body movements, wrap your description in '*'.
            Do not change roles!
            Do not speak from the perspective of anyone else.
            Speak only from the perspective of {agent_name}.
            Stop speaking the moment you finish speaking from your perspective.
            Never forget to keep your response to {word_limit} words!
            Do not add anything else.
            """
        )
    )


agent_descriptions = [
    generate_agent_description(name, role, location)
    for name, (role, location) in agent_summaries.items()
]

agent_headers = [
    generate_agent_header(name, role, location, description)
    for (name, (role, location)), description in zip(
        agent_summaries.items(), agent_descriptions
    )
]
agent_system_messages = [
    generate_agent_system_message(name, header)
    for name, header in zip(agent_summaries, agent_headers)
]
gc.collect()
torch.cuda.empty_cache()

`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [9]:
# for name, description, header, system_message in zip(
#     agent_summaries, agent_descriptions, agent_headers, agent_system_messages
# ):
#     print(f"\n\n{name} Description:")
#     print(f"\n{description}")
#     print(f"\nHeader:\n{header}")
#     print(f"\nSystem Message:\n{system_message.content}")

In [10]:
content_template ="""{conversation_description}
                  Please elaborate on the topic.
                  Frame the topic as a single question to be answered.
                  Be creative and imaginative.
                  Please reply with the specified topic in {word_limit} words or less.
                  Do not add anything else."""

topic_specifier_prompt = PromptTemplate(
    input_variables=['conversation_description', 'word_limit'],
    template=content_template,
    messages=[SystemMessage(content="You can make a task more specific.")]
)
specified_topic = build_llm(topic_specifier_prompt, temperature = 1.0, max_new_tokens = 100).invoke({'conversation_description': conversation_description, 'word_limit':word_limit})
gc.collect()
torch.cuda.empty_cache()
# print(f"Original topic:\n{topic}\n")
# print(f"Detailed topic:\n{specified_topic}\n")

`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [11]:
def select_next_speaker(
    step: int, agents: List[DialogueAgent], director: DirectorDialogueAgent
) -> int:
    """
    If the step is even, then select the director
    Otherwise, the director selects the next speaker.
    """
    # the director speaks on odd steps
    if step % 2 == 1:
        idx = 0
    else:
        # here the director chooses the next speaker
        idx = director.select_next_speaker() + 1  # +1 because we excluded the director
    return idx

In [12]:
director = DirectorDialogueAgent(
    name=director_name,
    system_message=agent_system_messages[0],
    speakers=[name for name in agent_summaries if name != director_name],
    stopping_probability=0.2,
)

agents = [director]
for name, system_message in zip(
    list(agent_summaries.keys())[1:], agent_system_messages[1:]
):
    agents.append(
        DialogueAgent(
            name=name,
            system_message=system_message,
        )
    )

`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [13]:
import gc
simulator = DialogueSimulator(
    agents=agents,
    selection_function=functools.partial(select_next_speaker, director=director),
)

In [15]:
simulator.reset()
simulator.inject("Audience member", specified_topic)
print(f"(Audience member): {specified_topic}")
print("\n")


while True:
    gc.collect()
    torch.cuda.empty_cache()
    name, message = simulator.step()
    print(f"{message}")
    print("############################")
    if director.stop:
        break

(Audience member): This is a Daily Show episode discussing the following topic: The New Workout Trend: Competitive Sitting - How Laziness Became the Next Fitness Craze.

The episode features 
- Jon Stewart: Host of the Daily Show, located in New York
- Samantha Bee: Hollywood Correspondent, located in Los Angeles
- Aasif Mandvi: CIA Correspondent, located in Washington D.C.
- Ronny Chieng: Average American Correspondent, located in Cleveland, Ohio.
                  Please elaborate on the topic. 
                  Frame the topic as a single question to be answered.
                  Be creative and imaginative.
                  Please reply with the specified topic in 50 words or less. 
                  Do not add anything else.  


	Stop? False

Debug: ['Here is the last conversation so far.', 'Audience member: This is a Daily Show episode discussing the following topic: The New Workout Trend: Competitive Sitting - How Laziness Became the Next Fitness Craze.\n\nThe episode feature

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


ValueError occurred: invalid literal for int() with base 10: 'System: This is a Daily Show episode discussing the following topic: The New Workout Trend: Competitive Sitting - How Laziness Became the Next Fitness Craze.\n\nThe episode features \n- Jon Stewart: , retrying...
	Next speaker: Samantha Bee

Jon Stewart:
             *Thanks, Samantha. That was a great point.* *Now, Ronny, what do you think about this trend?*
############################
Jon Stewart:
             *Thanks, Samantha. That was a great point.* *Now, Ronny, what do you think about this trend?*
Samantha Bee: *Oh, honey, I'm so glad you asked! * *Competitive sitting* is the newest fitness craze, and let me tell you, it's a doozy! * *I mean, who doesn't love the idea of getting paid to sit on the couch all day?* *But seriously, folks, this trend has gotten out of hand!* *People are literally competing to see who can sit the longest without moving!* *It's like a real-life version of 'The Hunger Games,' but instead of