In [None]:
!pip uninstall -y --quiet torch
!pip install vllm
!git clone https://github.com/Green0-0/light_prompter

In [None]:
# Import required modules
from light_prompter.model import VLLMModel
from light_prompter import responder
from light_prompter.responders.prompt_basic import Prompt_Basic
from light_prompter.responders.aggregate_llm import Aggregate_LLM
from light_prompter.responders.critique_llm import Critique_LLM
from light_prompter.sane_defaults import *

In [None]:
from vllm import LLM, SamplingParams

# Initialize model
llm = LLM(model="Qwen/Qwen2.5-7B-Instruct",
          dtype='half',
          enforce_eager=True,
          gpu_memory_utilization=0.95,
          swap_space=4,
          max_model_len=4096,
          tensor_parallel_size=2)

sampling_params = SamplingParams(temperature=0.75,
                                 min_p=0.1,
                                 max_tokens=1500,
                                 stop=["<｜end▁of▁sentence｜>", "</s>", "<|eot_id|>", "<|im_end|>", "[/INST]"])

model = VLLMModel(llm, sampling_params)

In [None]:
# Set up MOA responders
aggregator_prompt = get_aggregate_prompt()

# Layer 1 aggregators
aggregator_1 = Aggregate_LLM(
    responders={Prompt_Basic(), Prompt_Basic(), Prompt_Basic()},
    aggregation_responder=Prompt_Basic(),
    aggregation_prompt=aggregator_prompt
)

aggregator_2 = Aggregate_LLM(
    responders={Prompt_Basic(), Prompt_Basic(), Prompt_Basic()},
    aggregation_responder=Prompt_Basic(),
    aggregation_prompt=aggregator_prompt
)

aggregator_3 = Aggregate_LLM(
    responders={Prompt_Basic(), Prompt_Basic(), Prompt_Basic()},
    aggregation_responder=Prompt_Basic(),
    aggregation_prompt=aggregator_prompt
)

# Critique layer
critique_prompt = get_critique_prompt()
rewrite_prompt = get_rewrite_prompt()

critique_1 = Critique_LLM(
    responder=aggregator_1,
    critique_responder=Prompt_Basic(),
    refine_responder=Prompt_Basic(),
    prompt_critique=critique_prompt,
    prompt_refine=rewrite_prompt
)

critique_2 = Critique_LLM(
    responder=aggregator_2,
    critique_responder=Prompt_Basic(),
    refine_responder=Prompt_Basic(),
    prompt_critique=critique_prompt,
    prompt_refine=rewrite_prompt
)

critique_3 = Critique_LLM(
    responder=aggregator_3,
    critique_responder=Prompt_Basic(),
    refine_responder=Prompt_Basic(),
    prompt_critique=critique_prompt,
    prompt_refine=rewrite_prompt
)

# Final aggregator
final_aggregator = Aggregate_LLM(
    responders={critique_1, critique_2, critique_3},
    aggregation_responder=Prompt_Basic(),
    aggregation_prompt=aggregator_prompt
)

In [None]:
# Define and execute query
query = "What are the most important facts about George Washington?"
resp = responder.execute(model, final_aggregator, query)

In [None]:
print("Verbose Output:\n" + resp.verbose_output)

In [None]:
print("Output:\n" + resp.output)