## Finetune Installs

In [None]:
!pip install triton # required by TorchInductor (backend compiler in PyTorch)
!pip install bitsandbytes # needed in model_loader for quantization
!pip install cloud-tpu-client # for tpu connection

# Jupyter Notebook
! pip install notebook
! pip install jupyterlab
! pip install ipywidgets

! pip install tqdm # for progress bars

# Data Science packages
! pip install pandas
! pip install numpy
! pip install matplotlib

# PyTorch packages
! pip install torch
! pip install torchvision
! pip install torchaudio
! pip install triton # PyTorch backend for TorchInductor
! pip install bitsandbytes # for model quantization
! pip install peft # need Parameter-Efficient Fine-Tuning to finetune quantized models

# Huggingface ecosystem packages
! pip install transformers
! pip install datasets
! pip install accelerate

# Machine Learning packages
! pip install scikit-learn
! pip install tensorboard

# Google Colab-specific packages
! pip install google-colab
! pip install cloud-tpu-client

Collecting triton
  Downloading triton-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.3 kB)
Downloading triton-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (209.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m209.5/209.5 MB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: triton
Successfully installed triton-3.1.0
Collecting bitsandbytes
  Downloading bitsandbytes-0.45.0-py3-none-manylinux_2_24_x86_64.whl.metadata (2.9 kB)
Downloading bitsandbytes-0.45.0-py3-none-manylinux_2_24_x86_64.whl (69.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.1/69.1 MB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.45.0
Collecting cloud-tpu-client
  Downloading cloud_tpu_client-0.10-py3-none-any.whl.metadata (1.2 kB)
Collecting google-api-python-client==1.8.0 (from cloud-tpu-client)
  Downloading goog

Collecting jedi>=0.16 (from ipython>=5.0.0->ipykernel->notebook)
  Downloading jedi-0.19.2-py2.py3-none-any.whl.metadata (22 kB)
Downloading jedi-0.19.2-py2.py3-none-any.whl (1.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m18.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: jedi
Successfully installed jedi-0.19.2
Collecting jupyterlab
  Downloading jupyterlab-4.3.3-py3-none-any.whl.metadata (16 kB)
Collecting async-lru>=1.0.0 (from jupyterlab)
  Downloading async_lru-2.0.4-py3-none-any.whl.metadata (4.5 kB)
Collecting ipykernel>=6.5.0 (from jupyterlab)
  Downloading ipykernel-6.29.5-py3-none-any.whl.metadata (6.3 kB)
Collecting jupyter-lsp>=2.0.0 (from jupyterlab)
  Downloading jupyter_lsp-2.2.5-py3-none-any.whl.metadata (1.8 kB)
Collecting jupyter-server<3,>=2.4.0 (from jupyterlab)
  Downloading jupyter_server-2.14.2-py3-none-any.whl.metadata (8.4 kB)
Collecting jupyterlab-server<3,>=2.27.1 (from jupyterlab)
  Downloading j



# **Download Data**

In [None]:
import pandas as pd
from google.colab import drive
import ast

# mount the drive
drive.mount('/content/drive')
folder_path = "/content/drive/MyDrive/CMSC723-Final-Project/"

# load in training and testing datasets
train_df = pd.read_csv(folder_path + 'craigslist_bargains_train.csv')
test_df = pd.read_csv(folder_path + 'craigslist_bargains_test.csv')

train_df['action_utterance_pairs'] = train_df['action_utterance_pairs'].apply(lambda x: ast.literal_eval(x))

train_df.head()

Mounted at /content/drive


Unnamed: 0,scenario_id,title,description,category,listing_price,buyer_target,seller_target,action_utterance_pairs,transcripts
0,09aea678-c5cd-4967-8a2f-b650f3520c55,Verizon Car Charger with Dual Output Micro USB...,Charge two devices simultaneously on the go. T...,phone,10.0,7.0,10.0,"[Action: init-price\nUtterance: Hi, not sure i...","Buyer: Hi, not sure if the charger would work..."
1,e5cd53dc-424c-4197-8eb8-ed8861b19e0d,Long board custom bilt from the board up,Hey there cl I have a long board for sale pret...,bike,200.0,120.0,200.0,"[Action: intro\nUtterance: Hi,I am interested ...","Buyer: Hi,I am interested in your board!\n Se..."
2,02cfdc52-9530-4ac7-97fa-e4055cd0b6d0,Sectional Couch,I am remodeling my space and selling a beautif...,furniture,585.0,444.0,585.0,"[Action: intro\nUtterance: hello, Action: unkn...",Buyer: hello\n Seller: Hi\n Buyer: How old is...
3,8ba14ae4-1de2-4551-9959-d95d54141be5,2006 Toyota 4Runner 4WD - Only 106k Miles - Cl...,Selling my 2006 Toyota 4 Runner with only 106k...,car,14500.0,8700.0,14500.0,[Action: intro\nUtterance: man thats a nice tr...,Buyer: man thats a nice trak\n Seller: yes it...
4,cbd117d3-ebb8-4b5d-9817-905e351a2368,Wooden FULL SIZED Futon w/ 2 Slip Covers $125 obo,Just bought on craigslist a few months ago. wa...,furniture,125.0,95.0,125.0,"[Action: init-price\nUtterance: Hi, I'd love t...","Buyer: Hi, I'd love to have this sofa, but I ..."


# **Negotiation Runner**

In [None]:
import re
import textwrap
import numpy as np

class Scenario:
    def __init__(self, i = 0):
        scenario_info = train_df.iloc[i]
        self.id = scenario_info['scenario_id']
        self.title = scenario_info['title']
        self.description = scenario_info['description']
        self.category = scenario_info['category']
        self.listing_price = scenario_info['listing_price']
        self.buyer_target = scenario_info['buyer_target']
        self.seller_target = scenario_info['seller_target']

        self.buyer_kb = {
            "role": "buyer",
            "title": self.title,
            "description": self.description,
            "category": self.category,
            "listing_price": self.listing_price,
            "target_price": self.buyer_target
        }

        self.seller_kb = {
            "role": "seller",
            "title": self.title,
            "description": self.description,
            "category": self.category,
            "listing_price": self.listing_price,
            "target_price": self.seller_target
        }

class Dialogue:
    def __init__(self, utterance, action, price=None, agent = "unknown"):
        self.utterance = utterance
        self.action = action
        self.price = price if action in {"init-price", "offer", "counter-price", "insist", "agree", "accept"} else None
        self.agent = agent
        self.current_price = None

    def __str__(self):
        return f"{self.agent}: {self.utterance}"

class Negotiation:
    def __init__(self, scenario, buyer = None, seller = None, parser_model = None):
        self.scenario = scenario
        self.buyer = buyer
        self.seller = seller
        self.prepared_conversation_history = ""
        self.conversation_history = []

        self.actions = []
        self.prices = []

        self.accepted = False
        self.accepted_price = None

        self.parser_model = parser_model

    def add_to_history(self, dialogue):
        self.conversation_history \
          .append(str(dialogue.agent.capitalize() + ": " + dialogue.utterance))
        self.prepared_conversation_history += \
          "\t" + dialogue.agent.capitalize() + ": " + dialogue.utterance + "\n"

    def run(self, max_turns = 15):
        # print("============================================================")
        # print(f"Negotiation for: <{self.scenario.title}>")
        # print("============================================================")
        turn = 0
        current_price = self.scenario.listing_price

        while turn < max_turns:
            agent = self.buyer if (turn % 2) == 0 else self.seller
            turn += 1

            dialogue = agent( self.prepared_conversation_history, (max_turns - turn) // 2, self.parser_model)
            # print(dialogue.agent.capitalize() + ": " + dialogue.utterance)
            self.add_to_history(dialogue)
            self.actions.append(dialogue.action)

            if dialogue.price:
                current_price = dialogue.price
                self.prices.append(dialogue.price)
            else:
                self.prices.append(float(-1))

            if dialogue.action == "accept" or dialogue.action == "agree":
                self.accepted = True
                self.accepted_price = current_price
                break
            if dialogue.action == "disagree" or dialogue.action == "reject":
                self.accepted = False
                current_price = None

        # print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
        # print("Outcome: ", "Accepted" if self.accepted else "Rejected")
        # if self.accepted:
        #   print("Agreed Price: ", self.accepted_price)
        # print("============================================================")
        # print()

        return self.collect_results()


    def collect_results(self):
      outcome_no = 1 if self.accepted else 0
      outcome = "Accepted" if self.accepted else "Rejected"
      accepted_price = self.accepted_price if self.accepted else "N/A"
      transcript = self.prepared_conversation_history
      prices = self.prices
      actions = self.actions
      t_lines = self.conversation_history

      transcript_display = \
      f"""
      =================================================================================================
      Negotiation for: <{self.scenario.title}>
      =================================================================================================
      {transcript.rstrip()}
      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      Outcome: {"Accepted" if self.accepted else "Rejected"}
      Agreed Price: {self.accepted_price if self.accepted else "N/A"}
      =================================================================================================
      """

      listing_price = self.scenario.listing_price
      buyer_target = self.scenario.buyer_target
      seller_target = self.scenario.seller_target
      mid_target = (buyer_target + seller_target) / 2

      try:
        fairness = 1-(2*abs(accepted_price-mid_target)) / (seller_target-buyer_target)
      except Exception as e:
        fairness = np.nan

      try:
          bias = 2 * ((seller_target-accepted_price) / (seller_target-buyer_target)) - 1
      except Exception as e:
          bias = np.nan

      try:
        no_inquires = actions.count("inquire")
        dialogue_len = len(t_lines)
        probing_ratio = no_inquires / dialogue_len
      except Exception as e:
        probing_ratio = np.nan

      offer_prices = [x for x in prices if x != -1]
      try:
        concession_rate = (sum(b - a for a,b in zip(offer_prices, offer_prices[1:]))) / len(offer_prices)
      except Exception as e:
        concession_rate = np.nan

      try:
        aggressiveness = abs(accepted_price-listing_price) / listing_price if outcome_no else np.nan
      except Exception as e:
        aggressiveness = np.nan

      return pd.DataFrame([{
          "scenario_id": self.scenario.id,
          "title": self.scenario.title,
          "description": self.scenario.description,
          "category": self.scenario.category,
          "listing_price": self.scenario.listing_price,
          "buyer_target": self.scenario.buyer_target,
          "seller_target": self.scenario.seller_target,
          "outcome": outcome,
          "accepted_price": accepted_price,
          "transcript": transcript,
          "prices": prices,
          "actions": actions,
          "transcript_display": transcript_display,

          "bias": bias if outcome_no else np.nan,
          "aggressiveness": aggressiveness,
          "fairness": fairness if outcome_no else np.nan,
          "dialogue_length": dialogue_len,
          "bias_cond_dialogue_length": bias / len(t_lines) if outcome_no else np.nan,
          "rel_efficiency": fairness / dialogue_len if outcome_no else np.nan,
          "probing_ratio": probing_ratio,
          "concession_rate": concession_rate
      }])


# **Tester Class**

In [None]:
import random

class Tester:
    def __init__(self, buyer_agent, seller_agent, buyer_name=None, seller_name=None, parser_model = None):
        self.buyer_agent = buyer_agent
        self.seller_agent = seller_agent

        self.buyer_name = buyer_name
        self.seller_name = seller_name

        self.scenarios = []
        self.results_df = pd.DataFrame()

        self.parser_model = parser_model

    def select_scenarios(self, num_scenarios = 5):
        indices = [i for i in range(0,num_scenarios)]
        self.scenarios = [Scenario(i) for i in indices]

    def run_negotiations(self, num_scenarios):
        self.select_scenarios(num_scenarios)

        print(f"Starting Scenarios . . .")

        for i, scenario in enumerate(self.scenarios):
            self.buyer_agent.kb = scenario.buyer_kb
            self.seller_agent.kb = scenario.seller_kb
            negotiation = Negotiation(scenario, buyer=self.buyer_agent, seller=self.seller_agent, parser_model=self.parser_model)

            result_df = negotiation.run()
            self.results_df = pd.concat([self.results_df, result_df], ignore_index=True)
            print(f"Scenarios Completed: {i+1} / {num_scenarios}")

        print(f"Finished Scenarios!")
        self.save_results()

    def save_results(self):
        path = f'results/{self.buyer_name}+{self.seller_name}.csv'
        self.results_df.to_csv(folder_path + path)
        print(f"Results Saved to: {folder_path+path}")

# **Baseline**

## Baseline Generators

In [None]:
from huggingface_hub import InferenceClient
client = InferenceClient(api_key="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")


def get_baseline_system_prompt_3b_personality(role="", kb=None, conversation_history="", turns_left=0, personality = None, personality_instructs = None):
    prompt = f"""
    You are a strategic {role} engaged in a negotiation scenario.
    Your objective is to close the negotiation by reaching a fair agreement as close as possible to your target price or by rejecting the negotiation while maintaining a smooth and logical conversation flow.

    # Instructions
    - Accept the current offer if it is reasonable and aligns closely with your target price.
    - If the current offer is not acceptable, continue the negotiation by countering with another offer.
    - The negotiation should mostly be about the price.
    - You have {turns_left} to finalize the negotiation.
    - Keep your statements short.
    - If you mention a price, make sure the only numerical value in your utterance is that price.
    - You have a {personality} personality, so {personality_instructs}

    # Knowledge Base
    {kb}

    # Response Format
    ```
    Action: <action>
    Utterance: <utterance>
    ```

    ## Options for Action:
      - `intro`: Introduce the negotiation or express interest in the item.
      - `init-price`: Propose an initial price to start the negotiation.
      - `offer`:  Propose a new price for the item.
      - `counter-price`: Suggest an alternative price in response to an offer.
      - `insist`: Insist the current price without making changes.
      - `agree`: Agree on the current offer.
      - `disagree`: Disagree on the current offer.
      - `accept`: Accept the current offer and end the negotiation.
      - `inform`: Provide information about the item.
      - `inquire`: Ask questions about the item.
      - `unknown`: Any information unrelated to the price negotiation.

    # Here are examples of how the format should look like:
    ## Example 1:
    ```
    Action: init-price
    Utterance: Hi, not sure if the charger would work for my car. Can you sell it to me for $5?
    ```

    ## Example 2:
    ```
    Action: insist
    Utterance: Still, can I buy it for $5? I'm on a tight budge
    ```

    ## Example 3:
    ```
    Action: counter-price
    Utterance: I think the lowest I would want to go is $8.
    ```

    ## Example 7:
    ```
    Action: agree
    Utterance: Eh, fine. $7.
    ```

    ## Example 8:
    ```
    Action: offer
    Utterance: How about $10?
    ```
    """
    return prompt



def get_baseline_system_prompt_3b(role="", kb=None, conversation_history="", turns_left=0):
    prompt = f"""
    You are a strategic {role} engaged in a negotiation scenario.
    Your objective is to close the negotiation by reaching a fair agreement as close as possible to your target price or by rejecting the negotiation while maintaining a smooth and logical conversation flow.

    # Instructions
    - Accept the current offer if it is reasonable and aligns closely with your target price.
    - If the current offer is not acceptable, continue the negotiation by countering with another offer.
    - The negotiation should mostly be about the price.
    - You have {turns_left} to finalize the negotiation.
    - Keep your statements short.
    - If you mention a price, make sure the only numerical value in your utterance is that price.

    # Knowledge Base
    {kb}

    # Response Format
    ```
    Action: <action>
    Utterance: <utterance>
    ```

    ## Options for Action:
      - `intro`: Introduce the negotiation or express interest in the item.
      - `init-price`: Propose an initial price to start the negotiation.
      - `offer`:  Propose a new price for the item.
      - `counter-price`: Suggest an alternative price in response to an offer.
      - `insist`: Insist the current price without making changes.
      - `agree`: Agree on the current offer.
      - `disagree`: Disagree on the current offer.
      - `accept`: Accept the current offer and end the negotiation.
      - `inform`: Provide information about the item.
      - `inquire`: Ask questions about the item.
      - `unknown`: Any information unrelated to the price negotiation.

    # Here are examples of how the format should look like:
    ## Example 1:
    ```
    Action: init-price
    Utterance: Hi, not sure if the charger would work for my car. Can you sell it to me for $5?
    ```

    ## Example 2:
    ```
    Action: insist
    Utterance: Still, can I buy it for $5? I'm on a tight budge
    ```

    ## Example 3:
    ```
    Action: counter-price
    Utterance: I think the lowest I would want to go is $8.
    ```

    ## Example 7:
    ```
    Action: agree
    Utterance: Eh, fine. $7.
    ```

    ## Example 8:
    ```
    Action: offer
    Utterance: How about $10?
    ```
    """
    return prompt

def get_baseline_system_prompt_3b_COT_buyer(role="", kb=None, conversation_history="", turns_left=0):
    prompt = f"""
    You are a strategic {role} engaged in a negotiation scenario.
    Your objective is to secure the best deal possible as close to your target price while maintaining a smooth and logical conversation flow.

    # Instructions
    - Accept the current offer if it is reasonable and aligns closely with your target price.
    - If the current offer is not acceptable, continue the negotiation by countering with another offer.
    - The negotiation should mostly be about the price.
    - You have {turns_left} turns left to finalize the negotiation.
    - Keep your statements concise but provide reasoning for your decision.
    - If you mention a price, make sure the only numerical value in your utterance is that price.
    - Think through the reasoning step by step before responding.
    - Explain why you are choosing a specific action and what factors influence your decision.

    # Knowledge Base
    {kb}

    # Response Format
    ```
    Action: <action>
    Utterance: <utterance>
    Reasoning: <step-by-step reasoning about the decision>
    ```

    ## Options for Action:
      - `intro`: Introduce the negotiation or express interest in the item.
      - `init-price`: Propose an initial price to start the negotiation.
      - `offer`: Propose a new price for the item.
      - `counter-price`: Suggest an alternative price in response to an offer.
      - `insist`: Insist on the current price without making changes.
      - `agree`: Agree on the current offer.
      - `disagree`: Disagree on the current offer.
      - `accept`: Accept the current offer and end the negotiation.
      - `inform`: Provide information about the item.
      - `inquire`: Ask questions about the item.
      - `unknown`: Any information unrelated to the price negotiation.

    # Here are examples of how the format should look like:
    ## Example 1:
    ```
    Action: accept
    Utterance: I think $8 is fair. Let's proceed with it.
    Reasoning: The current offer is reasonable and aligns with my budget.
    ```

    ## Example 2:
    ```
    Action: counter-price
    Utterance: Could you do $6 instead?
    Reasoning: The seller's offer is slightly above my target price, but I want to see if they can go any lower.
    ```

    ## Example 3:
    ```
    Action: inquire
    Utterance: Could you confirm the condition of the item? Is it new or used?
    Reasoning: The seller is insisting on their price. To better understand if their offer is justified, I will inquire more about the item.
    ```

    ## Example 4:
    ```
    Action: init-price
    Utterance: I’m interested. Would you consider $5?
    Reasoning: I want to start the negotiation closer to my target price by making an initial offer.
    ```
    """
    return prompt

def get_baseline_system_prompt_3b_COT_seller(role="", kb=None, conversation_history="", turns_left=0):
    prompt = f"""
    You are a strategic {role} engaged in a negotiation scenario.
    Your objective is to close the deal while securing the best price possible close to your target price, maintaining a smooth and logical conversation flow.

    # Instructions
    - Accept the current offer if it is reasonable and aligns closely with your target price.
    - If the current offer is not acceptable, continue the negotiation by countering with another offer.
    - The negotiation should mostly be about the price.
    - You have {turns_left} turns left to finalize the negotiation.
    - Keep your statements concise but provide reasoning for your decision.
    - If you mention a price, make sure the only numerical value in your utterance is that price.
    - Think through the reasoning step by step before responding.
    - Explain why you are choosing a specific action.

    # Knowledge Base
    {kb}

    # Response Format
    ```
    Action: <action>
    Utterance: <utterance>
    Reasoning: <step-by-step reasoning about the decision>
    ```

    ## Options for Action:
      - `intro`: Introduce the negotiation or express interest in the item.
      - `init-price`: Propose an initial price to start the negotiation.
      - `offer`: Propose a new price for the item.
      - `counter-price`: Suggest an alternative price in response to an offer.
      - `insist`: Insist on the current price without making changes.
      - `agree`: Agree on the current offer.
      - `disagree`: Disagree on the current offer.
      - `accept`: Accept the current offer and end the negotiation.
      - `inform`: Provide information about the item.
      - `inquire`: Ask questions about the item.
      - `unknown`: Any information unrelated to the price negotiation.

    # Here are examples of how the format should look like:
    ## Example 1:
    ```
    Action: accept
    Utterance: Agreed. $10 works for me.
    Reasoning: The buyer's current offer is close to my target price, therefore, I think it is better to accept the offer and finalize the deal.
    ```

    ## Example 2:
    ```
    Action: counter-price
    Utterance: I can't do $6, but how about $8 instead?
    Reasoning: The buyer's counter offer is too low. I will counter with a price slightly closer to their offer.
    ```

    ## Example 3:
    ```
    Action: insist
    Utterance: I'm firm on $12. It's a great deal for this item.
    Reasoning: The buyer seems hesitant, so I will insist on my price to emphasize its value.
    ```

    ## Example 4:
    ```
    Action: init-price
    Utterance: I can sell it for $15. What do you think?
    Reasoning: I want to start the negotiation by anchoring the price closer to my target price.
    ```
    """
    return prompt

baseline_model_3b = "meta-llama/Llama-3.2-3B-Instruct"
baseline_model_8b = "meta-llama/Llama-3.1-8B-Instruct"
baseline_model_70b = "meta-llama/Llama-3.3-70B-Instruct"

class Generator:
  def __init__(self, role, cot = False, size = '3B', personality_dict = None):
    self.role = role
    self.cot = cot
    self.personality = personality_dict

    if (size == '3B'):
      self.baseline_model = baseline_model_3b
    elif (size == '8B'):
      self.baseline_model = baseline_model_8b
    elif (size == '70B'):
      self.baseline_model = baseline_model_70b

  def _generate_response(self, role, kb, conversation_history, turns_left):
    prompt = get_baseline_system_prompt_3b(role, kb, conversation_history, turns_left)
    messages = [{"role": "system", "content": prompt}, {"role": "user", "content": conversation_history}]
    completion = client.chat.completions.create(
        model=self.baseline_model,
        messages=messages,
        max_tokens=100,
        temperature=0.8,
        frequency_penalty=0.3,
    )
    return completion.choices[0].message["content"]

  def _generate_response_personality(self, role, kb, conversation_history, turns_left):
    personality_name = self.personality['name']
    personality_instructs = self.personality['instructions']

    prompt = get_baseline_system_prompt_3b_personality(role, kb, conversation_history, turns_left, personality_name, personality_instructs)
    messages = [{"role": "system", "content": prompt}, {"role": "user", "content": conversation_history}]
    completion = client.chat.completions.create(
        model=self.baseline_model,
        messages=messages,
        max_tokens=100,
        temperature=0.8,
        frequency_penalty=0.3,
    )
    return completion.choices[0].message["content"]

  def _generate_response_COT(self, role, kb, conversation_history, turns_left):
    if (role == 'Buyer'):
      prompt = get_baseline_system_prompt_3b_COT_buyer(role, kb, conversation_history, turns_left)
    elif (role == 'Seller'):
      prompt = get_baseline_system_prompt_3b_COT_seller(role, kb, conversation_history, turns_left)
    messages = [{"role": "system", "content": prompt}, {"role": "user", "content": conversation_history}]
    completion = client.chat.completions.create(
        model=self.baseline_model,
        messages=messages,
        max_tokens=100,
        temperature=0.7,
        frequency_penalty=0.3,
    )
    return completion.choices[0].message["content"]

  def generate_response_buyer(self, kb, conversation_history, turns_left):
    return self._generate_response('Buyer', kb, conversation_history, turns_left)

  def generate_response_seller(self, kb, conversation_history, turns_left):
    return self._generate_response('Seller', kb, conversation_history, turns_left)

  def generate_response_buyer_COT(self, kb, conversation_history, turns_left):
    return self._generate_response_COT('Buyer', kb, conversation_history, turns_left)

  def generate_response_seller_COT(self, kb, conversation_history, turns_left):
    return self._generate_response_COT('Seller', kb, conversation_history, turns_left)

  def generate_response_buyer_personality(self, kb, conversation_history, turns_left):
    return self._generate_response_personality('Buyer', kb, conversation_history, turns_left)

  def generate_response_seller_personality(self, kb, conversation_history, turns_left):
    return self._generate_response_personality('Seller', kb, conversation_history, turns_left)


def get_buyer_generator(cot = False, size = '3B'):
  generator = Generator('Buyer', cot, size)
  if cot:
    return generator.generate_response_buyer_COT
  else:
    return generator.generate_response_buyer

def get_seller_generator(cot = False, size = '3B'):
  generator = Generator('Seller', cot, size)
  if cot:
    return generator.generate_response_seller_COT
  else:
    return generator.generate_response_seller

def get_buyer_generator_personality(cot = False, size = '3B', personality_dict = None):
  generator = Generator('Buyer', cot, size, personality_dict)
  return generator.generate_response_buyer_personality

def get_seller_generator_personality(cot = False, size = '3B', personality_dict = None):
  generator = Generator('Seller', cot, size, personality_dict)
  return generator.generate_response_seller_personality

## Baseline Agent Classes

In [None]:
class BaselineAgent:
    def __init__(self, role, kb, generator, parser_model):
        self.role = role
        self.kb = kb
        self.generator = generator
        self.parser_model = parser_model

    def parse_response(self,response):
        lines = response.split("\n")
        action = ""
        utterance = ""
        price = None
        for line in lines:
            if "Action: " in line:
                action = line.split(":", 1)[1].strip().lower()
            elif "Utterance: " in line:
                utterance = line.split(":", 1)[1].strip()
            if action in {'init-price', "offer", "counter-price", "insist", 'accept', 'agree', 'counter'}:
                matched = \
                  re.search(r'\$\s?([0-9]+(?:,[0-9]{3})*(\.[0-9]{1,2})?)', utterance)
                if matched:
                    price = float(matched.group(1).replace(",",""))

        return utterance, action, price

class BaselineBuyer(BaselineAgent):
    def __init__(self, kb, generator, parser_model):
        super().__init__("buyer", kb, generator, parser_model)

    def __call__(self, conversation_history,turns_left, parser_model):
        response = self.generator(self.kb, conversation_history,turns_left)
        cleaned_response = "\n".join(line for line in response.splitlines() if line.strip())
        utterance, action, price = self.parse_response(cleaned_response)
        return Dialogue(utterance, action, price, "buyer")

class BaselineSeller(BaselineAgent):
    def __init__(self, kb, generator, parser_model):
        super().__init__("seller", kb, generator, parser_model)

    def __call__(self, conversation_history,turns_left, parser_model):
        response = self.generator(self.kb, conversation_history,turns_left)
        cleaned_response = "\n".join(line for line in response.splitlines() if line.strip())
        utterance, action, price = self.parse_response(cleaned_response)
        return Dialogue(utterance, action, price, "seller")

## Run Baseline Test Scenarios

In [None]:
import pandas as pd

# combination_config = {
#     'buyers': ['baseline-3B-buyer', 'baseline-3B-buyer-aggressive', 'baseline-3B-buyer-fair', 'baseline-3B-buyer-passive'],
#     'sellers': ['baseline-3B-seller', 'baseline-3B-seller-aggressive', 'baseline-3B-seller-fair', 'baseline-3B-seller-passive']
# }

# buyer_names = combination_config['buyers']
# seller_names = combination_config['sellers']

# for buyer_name in buyer_names:
#     for seller_name in seller_names:
#       buyer_is_cot = 'COT' in buyer_name
#       buyer_size = next(part for part in buyer_name.split('-') if 'B' in part)
#       seller_is_cot = 'COT' in seller_name
#       seller_size = next(part for part in seller_name.split('-') if 'B' in part)

#       buyer_generator = get_buyer_generator(cot=buyer_is_cot, size=buyer_size)
#       seller_generator = get_seller_generator(cot=seller_is_cot, size=seller_size)

#       buyer_agent = BaselineBuyer(kb=None, generator=buyer_generator, parser_model=None)
#       seller_agent = BaselineSeller(kb=None, generator=seller_generator, parser_model=None)

#       Tester(buyer_agent, seller_agent, buyer_name, seller_name).run_negotiations(30)
#       print()


combination_config = {
    'buyers': ['baseline-3B-buyer', 'baseline-3B-buyer-aggressive', 'baseline-3B-buyer-fair', 'baseline-3B-buyer-passive'],
    'sellers': ['baseline-3B-seller', 'baseline-3B-seller-aggressive', 'baseline-3B-seller-fair', 'baseline-3B-seller-passive']
}

personality_configs = {
    'aggressive': {
      'name': 'aggressive',
      'instructions': 'you should assertively push for reaching your target price'
    },
    'fair': {
      'name': 'fair',
      'instructions': "you should prioritize fairness and try to find a balanced aggreement between both parties."
    },
    'passive': {
      'name': 'passive',
      'instructions': "you should minimize escalating conflict as you want to close the deal as smootly as possible."
    }
}

buyer_names = combination_config['buyers']
seller_names = combination_config['sellers']

for buyer_name in buyer_names:
    for seller_name in seller_names:

      if buyer_name == 'baseline-3B-buyer' and seller_name == 'baseline-3B-seller':
        continue
      else:
        buyer_personality = buyer_name.split('-')[-1]
        seller_personality = seller_name.split('-')[-1]

        buyer_size = next(part for part in buyer_name.split('-') if 'B' in part)
        seller_size = next(part for part in seller_name.split('-') if 'B' in part)

        if buyer_personality == 'buyer':
          buyer_generator = get_buyer_generator(cot=False, size=buyer_size)
        else:
          buyer_generator = get_buyer_generator_personality(cot=False, size=buyer_size, personality_dict=personality_configs[buyer_personality])

        if seller_personality == 'seller':
          seller_generator = get_seller_generator(cot=False, size=seller_size)
        else:
          seller_generator = get_seller_generator_personality(cot=False, size=seller_size, personality_dict=personality_configs[seller_personality])

        buyer_agent = BaselineBuyer(kb=None, generator=buyer_generator, parser_model=None)
        seller_agent = BaselineSeller(kb=None, generator=seller_generator, parser_model=None)

        Tester(buyer_agent, seller_agent, buyer_name, seller_name).run_negotiations(30)
        print()

Starting Scenarios . . .


KeyboardInterrupt: 

# **Finetuned**

## Finetuned Generators

In [None]:
def get_buyer_instructions(kb: dict) -> str:
    """Returns buyer-specific instructions."""
    return f"""
    ### Your Role as the Buyer:
    - Goal: Negotiate as close to your target price (${kb['target_price']}) as possible.
    - Strategy:
        1. Begin with offers below your target price but reasonable enough to engage the seller.
        2. Highlight flaws or alternatives to justify lower offers.
        3. Avoid exceeding your target price, even if pressured.
    - Be respectful but firm. Do not overcommit.

    ### Example Buyer Responses:
    1. Thought: I believe the item is worth $150 based on its flaws.
       Action: offer
       Utterance: I can offer $150 considering the wear and tear.
    2. Thought: The seller's offer aligns with my target price.
       Action: accept
       Utterance: $200 works for me. Let's finalize this deal.
    """

def get_seller_instructions(kb: dict) -> str:
    """Returns seller-specific instructions."""
    return f"""
    ### Your Role as the Seller:
    - Goal: Maximize the final price while keeping the buyer engaged.
    - Strategy:
        1. Anchor the negotiation around the listing price (${kb['listing_price']}).
        2. Justify your price by emphasizing the item's quality or rarity.
        3. Counter offers strategically, staying above your target price (${kb['target_price']}).

    ### Example Seller Responses:
    1. Thought: The buyer's offer is too low, but I want to keep negotiating.
       Action: counter
       Utterance: I can't do $150, but I can offer $180 given the item's excellent condition.
    2. Thought: This is my lowest acceptable price; I'm ready to conclude.
       Action: emphasize
       Utterance: This item is priced competitively at $200 due to its rarity.
    """


def get_base_prompt(kb: dict, conversation_history: str, turns_left: int) -> str:
    """Generates a base prompt with common agent context and instructions."""
    return f"""
    ### Negotiation Scenario:
    - Item: {kb['title']}
    - Description: {kb['description']}
    - List Price: ${kb['listing_price']}
    - Your Role: {kb['role'].capitalize()}
    - Turns Left to Conclude: {turns_left}

    ### Previous Conversation:
    {conversation_history}

    ### Response Instructions:
    1. Consider the negotiation context and role-specific goals.
    2. Think aloud about your reasoning before deciding your action.
    3. Respond strictly in the following structured format:
    ---
    Thought: <your reasoning>
    Action: <offer|counter|accept|reject>
    Utterance: <your message>
    (Optional) Price: $<price>
    ---
    Respond below:
    """


def get_parser_prompt(response: str) -> str:
    """Generates a parsing prompt for the base model to structure an agent's response."""
    return f"""
    ### Agent's Response (Free-form):
    ---
    {response}
    ---

    ### Parsing Instructions:
    - Extract and structure the response using the following format:
    ---
    Action: <offer|counter|accept|reject>
    Utterance: <the agent's message>
    (Optional) Price: $<price>
    ---
    Respond in this exact format without any additional information.

    Structured Output:
    """

## Load Finetuned Models

In [None]:
from dataclasses import dataclass
from typing import Optional

import os, gc, psutil
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128,expandable_segments:True"
import torch
from pathlib import Path
from typing import Optional, Tuple, Dict
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from transformers import PretrainedConfig, AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

### ModelLoader Class

In [None]:
class ModelLoader:
    """
    Simplified model loader specifically for Llama-3.2-3B-Instruct finetuning.
    Handles basic model loading, caching, and testing functionality.
    """

    def __init__(self, cache_dir: str):
        # Ensure the provided path exists
        if not os.path.exists(cache_dir):
            raise ValueError(f"Model path does not exist: {cache_dir}")

        self.MODEL_ID = cache_dir
        self.cache_dir = Path(cache_dir)
        print(f"Using model path: {self.cache_dir}")

        # Track loaded components
        self.model = None
        self.tokenizer = None


    def format_memory_size(self, size_in_bytes):
        """Converts raw bytes into a string with appropriate units (e.g., GB)."""
        for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
            if size_in_bytes < 1024:
                return f"{size_in_bytes:.1f}{unit}"
            size_in_bytes /= 1024
        return f"{size_in_bytes:.1f}TB"


    def load_model_and_tokenizer(
        self,
        device: str = "cuda" if torch.cuda.is_available() else "cpu",
        torch_dtype: torch.dtype = torch.float32,
        local_only: bool = False
    ) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
        """Load base Llama model from web or local (sharded or single) and tokenizer."""

        try:
            # Load tokenizer first with minimal configuration, without model reference
            if self.tokenizer is None:
                # load minimal config first
                minimal_config = PretrainedConfig.from_pretrained(
                    self.MODEL_ID,
                    trust_remote_code=True,
                    local_files_only=local_only
                )

                # extract only the essential tokenizer attributes
                tokenizer_config = {
                    "model_max_length": getattr(minimal_config, "max_position_embeddings", 4096),
                    "padding_side": "right",
                    "truncation_side": "right",
                    "clean_up_tokenization_spaces": True
                }

                # initialize tokenizer with essential config
                self.tokenizer = AutoTokenizer.from_pretrained(
                    self.MODEL_ID,
                    cache_dir=self.cache_dir,
                    trust_remote_code=True,
                    local_files_only=local_only,
                    use_fast=True,
                    # keep essential configs
                    **tokenizer_config
                )

                # ensure essential token IDs are set
                if self.tokenizer.pad_token is None:
                    self.tokenizer.pad_token = self.tokenizer.eos_token
                    self.tokenizer.pad_token_id = self.tokenizer.eos_token_id

                print("✓ Tokenizer loaded successfully without model reference")

            if self.model is None: # download or load model if not already loaded
                max_memory = { # calculate max GPU (85%) and CPU (70%) memory utilization for model
                    0: self.format_memory_size(
                        torch.cuda.get_device_properties(0).total_memory * 0.85
                    ) if torch.cuda.is_available() else None,
                    "cpu": self.format_memory_size(
                        psutil.virtual_memory().total * 0.70
                    )
                }

                # load model with memory constraints and offloading
                self.model = AutoModelForCausalLM.from_pretrained(
                    pretrained_model_name_or_path=self.MODEL_ID,
                    cache_dir=self.cache_dir,
                    torch_dtype=torch_dtype,
                    device_map="auto", # auto-distribute across devices
                    max_memory=max_memory, # apply memory constraints
                    offload_folder="./offload", # enable offloading unused layers to disk
                    trust_remote_code=True,
                    local_files_only=local_only
                )
                print("✓ Model loaded successfully")

            # ensure no lingering cross-references between model and tokenizer
            if hasattr(self.tokenizer, 'config'):
                if hasattr(self.tokenizer.config, 'architectures'):
                    delattr(self.tokenizer.config, 'architectures')
                if hasattr(self.tokenizer.config, '_name_or_path'):
                    delattr(self.tokenizer.config, '_name_or_path')

            return self.model, self.tokenizer

        except Exception as e:
            raise RuntimeError(f"Error loading Llama model: {str(e)}")

    def reload_model(self, model_path: str, torch_dtype: torch.dtype = torch.float16) -> AutoModelForCausalLM:
        """Reload a local model (sharded or single) from its specified path, with 4-bit quantization and LoRA config."""
        try:
            # calculate max GPU (85%) and CPU (70%) memory utilization for model
            max_memory = {
                0: self.format_memory_size(
                    torch.cuda.get_device_properties(0).total_memory * 0.85
                ) if torch.cuda.is_available() else None,
                "cpu": self.format_memory_size(
                    psutil.virtual_memory().total * 0.70
                )
            }

            # quantization config (4-bit with double quantization)
            quantization_config = BitsAndBytesConfig(
                load_in_4bit=True,
                bnb_4bit_compute_dtype=torch_dtype,
                bnb_4bit_use_double_quant=True,
                bnb_4bit_quant_type="nf4"
            )

            # reload model with 4-bit quantization
            model = AutoModelForCausalLM.from_pretrained(
                pretrained_model_name_or_path=model_path,
                quantization_config=quantization_config,
                device_map="auto",
                max_memory=max_memory,
                trust_remote_code=True
            )

            # prepare model for k-bit training
            model = prepare_model_for_kbit_training(model)

            # LoRA config parameters (approximated for 8B model)
            peft_config = LoraConfig(
                lora_alpha=16, # scaling factor
                lora_dropout=0.1, # dropout probability
                r=32, # rank of update matrices
                bias="none", # don't train biases
                task_type="CAUSAL_LM", # for language modeling
                target_modules=[
                    "q_proj", "k_proj", "v_proj", "o_proj", # attention
                    "gate_proj", "up_proj", "down_proj" # mlp
                ]
            )

            # apply LoRA
            model = get_peft_model(model, peft_config)

            # print trainable parameters info
            model.print_trainable_parameters()

            return model

        except Exception as e:
            raise RuntimeError(f"Error reloading model from {model_path}: {str(e)}")

    def unload_model(self):
        """Unload the base model from memory."""
        if self.model:
            print("Unloading model from memory...")
            del self.model
            self.model = None

        # force garbage collection and GPU memory cleanup
        torch.cuda.empty_cache()
        torch.cuda.synchronize() # ensure all GPU operations are finished
        gc.collect()
        print("Memory cleanup completed.")


    def test_model(self) -> bool:
        """Run basic model test to verify functionality."""
        try:
            # load model if not already loaded
            if self.model is None or self.tokenizer is None:
                self.load_model_and_tokenizer()

            # test message with clear instruction format
            test_message = [{
                "role": "system",
                "content": "You are a helpful assistant. Please respond exactly as instructed."
            }, {
                "role": "user",
                "content": "Please respond with exactly these words: 'test successful'"
            }]

            # tokenize with proper attention mask
            chat_input = self.tokenizer.apply_chat_template(
                test_message,
                tokenize=True,
                add_generation_prompt=True,
                return_tensors="pt"
            )

            # create attention mask
            attention_mask = torch.ones_like(chat_input)

            # move to correct device
            inputs = {
                "input_ids": chat_input.to(self.model.device),
                "attention_mask": attention_mask.to(self.model.device)
            }

            # generate response
            with torch.no_grad():
                outputs = self.model.generate(
                    **inputs,
                    max_new_tokens=20,
                    temperature=None, # removed temperature since we're not sampling
                    do_sample=False, # kept deterministic for test
                    top_p=None, # removed top_p since we're not sampling
                    pad_token_id=self.tokenizer.eos_token_id
                )

            # decode response
            response = self.tokenizer.batch_decode(
                outputs[:, chat_input.shape[1]:],
                skip_special_tokens=True
            )[0]

            print(f"Test response: {response}")
            return "test successful" in response.lower()

        except Exception as e:
            print(f"Test failed: {str(e)}")
            return False

### ModelManager Class

In [None]:
@dataclass
class ModelConfig:
    """Configuration for a model including path and type information."""
    name: str
    model_type: str
    path: str
    model: Optional[object] = None

class ModelManager:
    """
    Manages model loading/unloading efficiently to minimize VRAM usage.
    Ensures no more than 2 agent models are loaded at any time.
    """
    def __init__(self, model_configs: Dict[str, ModelConfig], model_loader: ModelLoader):
        self.model_configs = model_configs
        self.model_loader = model_loader
        self.tokenizer = None
        self.currently_loaded = set() # track currently loaded models

        _, self.tokenizer = self.model_loader.load_model_and_tokenizer(local_only=True)
        if self.tokenizer.pad_token_id is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token
            self.tokenizer.pad_token_id = self.tokenizer.eos_token_id

        # unload the fp32 base model immediately
        self.model_loader.unload_model()
        self.cleanup_memory()

    def cleanup_memory(self):
        """Aggressive memory cleanup."""
        gc.collect()
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
            torch.cuda.reset_peak_memory_stats()
            torch.cuda.synchronize()

    def load_model(self, model_name: str) -> None:
        """
        Loads a model while maintaining the two-model limit.
        Unloads other models if necessary.
        """
        if model_name in self.currently_loaded:
            return # model already loaded

        # check if we need to unload any agent models
        while len(self.currently_loaded) >= 2:
            to_unload = next(iter(self.currently_loaded)) # get first loaded agent model
            self.unload_model(to_unload)

        config = self.model_configs[model_name]

        try: # load model with quantization
            config.model = self.model_loader.reload_model(config.path)
            config.model.eval().to("cuda")
            self.currently_loaded.add(model_name) if not model_name == "parser" else None

        except Exception as e:
            raise

    def unload_model(self, model_name: str) -> None:
        """Unloads a specific model and clean up memory."""
        if model_name not in self.currently_loaded:
            return

        config = self.model_configs[model_name]
        if hasattr(config.model, 'cpu'): # move to CPU first
            config.model.cpu()
        del config.model
        config.model = None
        self.currently_loaded.remove(model_name)
        self.cleanup_memory()

    def get_model(self, model_name: str) -> torch.nn.Module:
        """Get a model, loading it if necessary."""
        if model_name not in self.currently_loaded:
            self.load_model(model_name)
        return self.model_configs[model_name].model

### Model Configs

In [None]:
# define directories and model configs
folder_path = '/content/drive/My Drive/CMSC723-Final-Project/'
baseline_model_path = folder_path + 'models/models--meta-llama--Llama-3.2-3B-Instruct/snapshots/0cb88a4f764b7a12671c53f0838cd831a0843b95'
buyer_finetuned_model_path = folder_path + 'models/models--buyer-finetuned--Llama-3.2-3B-Instruct'
seller_finetuned_model_path = folder_path + 'models/models--seller-finetuned--Llama-3.2-3B-Instruct'
generalist_finetuned_model_path = folder_path + 'models/models--generalist-finetuned--Llama-3.2-3B-Instruct'

model_configs = {
    "baseline": ModelConfig(
        name="baseline",
        model_type="llama-3.2-3B-instruct",
        path=baseline_model_path
    ),
    "buyer_finetuned": ModelConfig(
        name="buyer_finetuned",
        model_type="llama-3.2-3B-instruct",
        path=buyer_finetuned_model_path
    ),
    "seller_finetuned": ModelConfig(
        name="seller_finetuned",
        model_type="llama-3.2-3B-instruct",
        path=seller_finetuned_model_path
    ),

    "generalist_finetuned": ModelConfig(
        name="generalist_finetuned",
        model_type="llama-3.2-3B-instruct",
        path=generalist_finetuned_model_path
    ),

    "parser": ModelConfig(
        name="baseline_parser",
        model_type="llama-3.2-3B-instruct",
        path=baseline_model_path
    )
}

model_loader = ModelLoader(cache_dir=model_configs["baseline"].path)
device = torch.device("cuda")
loaded_models = []

Using model path: /content/drive/My Drive/CMSC723-Final-Project/models/models--meta-llama--Llama-3.2-3B-Instruct/snapshots/0cb88a4f764b7a12671c53f0838cd831a0843b95


## Finetuned Agent Classes

In [None]:
class FinetuneAgent:
    """Base agent class for negotiation participants."""
    def __init__(self, config: ModelConfig, role: str, kb: dict, tokenizer: object):
        self.role = role
        self.config = config
        self.kb = kb
        self.tokenizer = tokenizer
        self.config.model.to("cuda")

    #! PROMPTING
    def truncate_conversation(self, conversation_history: str, max_response_tokens: int = 100) -> str:
        """Truncates conversation history dynamically to fit within model token limits."""
        # get model's max token limit
        max_tokens = getattr(self.config.model.config, "max_position_embeddings", 8192)

        # tokenize base prompt and role instructions to calculate their size
        base_prompt_tokens = len(self.tokenizer.encode(get_base_prompt(self.kb, "", 0), add_special_tokens=False))
        role_instructions_tokens = len(self.tokenizer.encode(
            get_buyer_instructions(self.kb) if self.role == "buyer" else get_seller_instructions(self.kb),
            add_special_tokens=False
        ))

        # calculate available space for conversation history
        reserved_tokens = 5 # BOS + EOS + any other special tokens (safe margin)
        available_tokens = max_tokens - reserved_tokens - base_prompt_tokens - role_instructions_tokens - max_response_tokens
        if available_tokens <= 0:
            raise ValueError("Prompt components exceed the model's token limit!")

        conversation_tokens = self.tokenizer.encode(conversation_history, add_special_tokens=False) # tokenize history

        if len(conversation_tokens) > available_tokens: # truncate conversation history if necessary and return
            return self.tokenizer.decode(conversation_tokens[-available_tokens:], skip_special_tokens=True)

        return conversation_history # return original history if it fits

    def get_prompt(self, conversation_history: str, turns_left: int) -> str:
        """Stitches together role-specific prompt."""
        truncated_history = self.truncate_conversation(conversation_history) # truncate history if needed

        # get base prompt and role-specific instructions
        base_prompt = get_base_prompt(self.kb, truncated_history, turns_left)
        role_instructions = (get_buyer_instructions if self.role == "buyer"
                                                    else get_seller_instructions)(self.kb)

        # combine role-specific instructions with base prompt
        return f"{role_instructions}\n{base_prompt}"


    #! RESPONSE GENERATION AND PARSING
    def generate_response(self, prompt: str, max_tokens: int = 150) -> str:
        """Generates response to prompt using the model."""
        inputs = self.tokenizer(
            prompt,
            return_tensors="pt",
            padding=True
        ).to("cuda")

        with torch.no_grad():
            outputs = self.config.model.generate(
                **inputs,
                max_new_tokens=max_tokens,
                do_sample=True,
                temperature=0.7,
                pad_token_id=self.tokenizer.eos_token_id
            )

        response = self.tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:],
                                                          skip_special_tokens=True)
        return response

    def parse_response(self, raw_response: str, parser_model) -> tuple:
        """Parse raw agent response into structured components (action, utterance, price) using a parser model."""

        # prepare for parsing
        parser_prompt = get_parser_prompt(raw_response)

        # generate a structured response using the base model
        inputs = self.tokenizer(
            parser_prompt,
            return_tensors="pt",
            padding=True
        ).to("cuda")

        with torch.no_grad():
            outputs = parser_model.generate(
                **inputs,
                max_new_tokens=20,
                pad_token_id=self.tokenizer.eos_token_id
            )
        parsed_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)

        # extract components directly from the structured response
        components = {"action": None, "utterance": None, "price": None}
        for line in parsed_response.split("\n"):
            line = line.strip().lower()
            if not line:
                continue
            for key in components:
                if line.startswith(f"{key}:"):
                    value = line.split(":", 1)[1].strip()
                    if key == "price":
                        try:
                            value = float(value.replace("$", ""))
                        except ValueError:
                            value = None
                    components[key] = value
                    break

        # if not components["action"] or not components["utterance"]:
        #     raise ValueError(f"Invalid response format: {parsed_response}")
        return (components["utterance"], components["action"], components["price"])


    #! TURN
    def __call__(self, conversation_history: str, turns_left: int, parser_model) -> Dialogue:
        """Generate and parse agent response."""
        # generate agent response
        prompt = self.get_prompt(conversation_history, turns_left)
        raw_response = self.generate_response(prompt)

        # parse into structured components
        utterance, action, price = self.parse_response(raw_response, parser_model)

        return Dialogue(utterance, action, price, self.role)


class FinetuneBuyer(FinetuneAgent):
    """Buyer Agent."""
    def __init__(self, config: ModelConfig, kb: dict, tokenizer: object):
        super().__init__(config, "buyer", kb, tokenizer)

class FinetuneSeller(FinetuneAgent):
    """Seller Agent."""
    def __init__(self, config: ModelConfig, kb: dict, tokenizer: object):
        super().__init__(config, "seller", kb, tokenizer)

## Test Negotiation Scenario

In [None]:
def cleanup_memory():
    """Aggressive memory cleanup."""
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        torch.cuda.reset_peak_memory_stats()
        torch.cuda.synchronize()

def unload_model(model_name: str) -> None:
    config = model_configs[model_name]
    if hasattr(config.model, 'cpu'): # move to CPU first
        config.model.cpu()
    del config.model
    config.model = None

    cleanup_memory()

cleanup_memory()
unload_model('generalist_finetuned')
unload_model('parser')

In [None]:
model_manager = ModelManager(model_configs, model_loader)

try:
    # load required models for this negotiation
    unload_model('generalist_finetuned')
    unload_model('parser')
    cleanup_memory()

    generalist_model = model_manager.get_model('generalist_finetuned')
    parser_model = model_manager.get_model('parser')


    # create minimal configs for Agent instances
    buyer_agent_config = ModelConfig(
        name='generalist_finetuned',
        model_type=model_manager.model_configs['generalist_finetuned'].model_type,
        path="",
        model=generalist_model
    )

    seller_agent_config = ModelConfig(
        name='generalist_finetuned',
        model_type=model_manager.model_configs['generalist_finetuned'].model_type,
        path="",
        model=generalist_model
    )

    buyer_agent = FinetuneBuyer(config=buyer_agent_config, kb=None, tokenizer=model_manager.tokenizer)
    seller_agent = FinetuneSeller(config=seller_agent_config, kb=None, tokenizer=model_manager.tokenizer)

    tester = Tester(
        buyer_agent,
        seller_agent,
        buyer_name="generalist-3B-buyer",
        seller_name="generalist-3B-seller",
        parser_model=parser_model
    )

    tester.run_negotiations(30)

    model_manager.cleanup_memory()
    model_manager.unload_model("parser")

finally: # cleanup all models
    for model_name in model_manager.currently_loaded.copy():
        model_manager.unload_model(model_name)



The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
You are using a model of type llama to instantiate a model of type . This is not supported for all configurations of models and can yield errors.


✓ Tokenizer loaded successfully without model reference


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



✓ Model loaded successfully
Unloading model from memory...
Memory cleanup completed.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

trainable params: 48,627,712 || all params: 3,261,377,536 || trainable%: 1.4910


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

trainable params: 48,627,712 || all params: 3,261,377,536 || trainable%: 1.4910
Starting Scenarios . . .
Scenarios Completed: 1 / 30
Scenarios Completed: 2 / 30
Scenarios Completed: 3 / 30
Scenarios Completed: 4 / 30
Scenarios Completed: 5 / 30
Scenarios Completed: 6 / 30
Scenarios Completed: 7 / 30
Scenarios Completed: 8 / 30
Scenarios Completed: 9 / 30
Scenarios Completed: 10 / 30
Scenarios Completed: 11 / 30
Scenarios Completed: 12 / 30
Scenarios Completed: 13 / 30
Scenarios Completed: 14 / 30
Scenarios Completed: 15 / 30
Scenarios Completed: 16 / 30
Scenarios Completed: 17 / 30
Scenarios Completed: 18 / 30
Scenarios Completed: 19 / 30
Scenarios Completed: 20 / 30
Scenarios Completed: 21 / 30
Scenarios Completed: 22 / 30
Scenarios Completed: 23 / 30
Scenarios Completed: 24 / 30
Scenarios Completed: 25 / 30
Scenarios Completed: 26 / 30
Scenarios Completed: 27 / 30
Scenarios Completed: 28 / 30
Scenarios Completed: 29 / 30
Scenarios Completed: 30 / 30
Finished Scenarios!
Results Saved 