## NegotiableAI: Enabling Agents to Negotiate Deals using Multi-Agent Reinforcement Learning

## Submitted by:
Sada Kakarla - 50605634 - sadakaka
<br> Shivansh Gupta - 50604127 - sgupta67
<br> Aditi Sinha - 50593917 - asinha25

### Importing libraries

In [97]:
import gymnasium as gym
from gymnasium import spaces 
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F


### RL-Driven Automated Buyer-Seller Negotiations

#### Simulates a buyer–seller interaction environment over a fixed number of rounds & observes the rewards received by each agent

In [111]:
class Negotiation_Agent(gym.Env):
    def __init__(self, seller_min_amt, max_rounds=20, initial_selling_price=None, 
                 gamma_seller=1.0, gamma_buyer=1.0, shaping_lambda=1.0):
    
        super(Negotiation_Agent, self).__init__()
        self.max_rounds = max_rounds
        self.seller_min_amt = seller_min_amt
        if initial_selling_price is None:
            
            initial_selling_price = np.random.randint(2000, 10000)
        self.initial_selling_price = initial_selling_price
        self.gamma_seller = gamma_seller
        self.gamma_buyer = gamma_buyer
        self.shaping_lambda = shaping_lambda
        
        # Observation space: [current_offer, round, turn, deal_status]
        low = np.array([seller_min_amt, 0, 0, 0], dtype=np.float32)
        high = np.array([self.initial_selling_price, max_rounds, 1, 1], dtype=np.float32)
        self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32)
        

        # Accept: 1, Reject: 2, Counteroffer: ("Counteroffer", new_price)
        self.action_space = spaces.Discrete(3)  
        
    def reset(self):
        self.round = 0
        
        # Start at the target price/ initial selling price
        self.current_offer = self.initial_selling_price
        
        # Record seller's last counteroffer
        self.last_seller_offer = self.initial_selling_price
        
        # Initialize buyer's last counteroffer to None
        self.last_buyer_offer = None
        
        self.turn = 0  # 0: Buyer and 1: Seller
        self.deal_status = False
        return self.get_observation()
    
    def _buyer_potential(self, offer):
        return (self.initial_selling_price - offer) / (self.initial_selling_price - self.seller_min_amt)
    
    def _seller_potential(self, offer):
        return (offer - self.seller_min_amt) / (self.initial_selling_price - self.seller_min_amt)
    
    def step(self, action):
        done = False
        info = {}
        reward = {'buyer': 0, 'seller': 0}
        
        if self.round >= self.max_rounds:
            done = True
            reward['buyer'] = -10
            reward['seller'] = -10  
            return self.get_observation(), reward, done, info
        
        # Accept action: negotiation ends
        if action == 1:
            print(f"Seller accepts the offer")
            done = True
            self.deal_status = True
            final_price = self.current_offer
            max_margin = self.initial_selling_price - self.seller_min_amt
            if max_margin == 0:
                max_margin = 1
            seller_profit_norm = (final_price - self.seller_min_amt) / max_margin
            buyer_savings_norm = (self.initial_selling_price - final_price) / max_margin
            reward['seller'] = self.gamma_seller * seller_profit_norm
            reward['buyer']  = self.gamma_buyer * buyer_savings_norm
            
        elif action == 2:
            # Reject action: negotiation ends with a penalty
            done = True
            reward['buyer'] = -5
            reward['seller'] = -5
            
        elif isinstance(action, tuple) and action[0] == "Counteroffer":
            new_offer = action[1]
            if self.turn == 0:
                
                # Buyer’s counteroffer: must be strictly lower than current_offer and not below seller_min_amt.
                # If buyer has already made a counteroffer, the new offer must be >= last buyer offer.
                
                if new_offer >= self.current_offer or new_offer < self.seller_min_amt:
                    reward['buyer'] = -3
                elif self.last_buyer_offer is not None and new_offer < self.last_buyer_offer:
                    
                    # Enforcing non-decreasing buyer counteroffers
                    reward['buyer'] = -3
                else:
                    self.current_offer = new_offer
                    self.last_buyer_offer = new_offer
                    self.turn = 1  # Pass turn to seller.
                    reward['buyer'] = -1
                    reward['seller'] = -1
                    self.round += 1
            elif self.turn == 1:
                # Seller’s counteroffer must be higher than buyer's counteroffer and cannot exceed his previous (last_seller_offer)
                
                if new_offer <= self.current_offer or new_offer > self.last_seller_offer:
                    reward['seller'] = -3
                else:
                    self.current_offer = new_offer
                    self.last_seller_offer = new_offer  # update seller's last counteroffer
                    self.turn = 0  # Pass turn to buyer
                    reward['buyer'] = -1
                    reward['seller'] = -1
                    self.round += 1
        else:
            reward['buyer'] = -2
            reward['seller'] = -2
        
        if self.round >= self.max_rounds:
            done = True
            reward['buyer'] = -10
            reward['seller'] = -20
        
        return self.get_observation(), reward, done, info
    
    def get_observation(self):
        return np.array([self.current_offer, self.round, self.turn, int(self.deal_status)], dtype=np.float32)
    
    def render(self, mode='human'):
        turn_str = "Buyer" if self.turn == 0 else "Seller"
        print(f"Round: {self.round}, Turn: {turn_str}, Current Offer: {self.current_offer}, Deal Status: {self.deal_status}")


# Automated the buyer and seller policies for self-play.
def buyer_policy(state, env):
    # The buyer's goal is to lower the price
    current_offer = int(state[0])
    if current_offer - env.seller_min_amt <= 10:
        return 1  # accept
    if env.last_buyer_offer is None:
        new_price = np.random.randint(env.seller_min_amt, current_offer)
    else:
        new_price = np.random.randint(env.last_buyer_offer, current_offer)
    return ("Counteroffer", new_price)

def seller_policy(state, env):
    current_offer = int(state[0])
    last_offer = int(env.last_seller_offer)
    if last_offer - current_offer <= 10:
        return 1  # Accept if the gap is too small
    if current_offer >= last_offer:
        return 2  # Invalid move
    new_price = np.random.randint(current_offer+1, last_offer+1)
    return ("Counteroffer", new_price)


In [106]:
if __name__ == "__main__":
    
    # Generating a random initial selling price between 800 and 1000.
    random_initial_price = np.random.randint(800, 1000)
    print(f"Initial Selling Price: {random_initial_price}")
    
    Negotiation_RL_Agent_env = Negotiation_Agent(seller_min_amt=700, max_rounds=20, initial_selling_price=random_initial_price,
                                   gamma_seller=1.0, gamma_buyer=1.0, shaping_lambda=1.0)
    
    state = Negotiation_RL_Agent_env.reset()
    Negotiation_RL_Agent_env.render()
    
    done = False
    
    # Self-play loop: Both buyer and seller act in an automated manner
    while not done:
        if state[2] == 0:
            action = buyer_policy(state, Negotiation_RL_Agent_env)
            print(f"Buyer action: {action}")
        else:
            action = seller_policy(state, Negotiation_RL_Agent_env)
            print(f"Seller action: {action}")
        
        state, reward, done, info = Negotiation_RL_Agent_env.step(action)
        Negotiation_RL_Agent_env.render()
        print("Reward:", reward)
    
    print("Negotiation ended.")

Initial Selling Price: 997
Round: 0, Turn: Buyer, Current Offer: 997, Deal Status: False
Buyer action: ('Counteroffer', 924)
Round: 1, Turn: Seller, Current Offer: 924, Deal Status: False
Reward: {'buyer': -1, 'seller': -1}
Seller action: ('Counteroffer', 982)
Round: 2, Turn: Buyer, Current Offer: 982, Deal Status: False
Reward: {'buyer': -1, 'seller': -1}
Buyer action: ('Counteroffer', 944)
Round: 3, Turn: Seller, Current Offer: 944, Deal Status: False
Reward: {'buyer': -1, 'seller': -1}
Seller action: ('Counteroffer', 966)
Round: 4, Turn: Buyer, Current Offer: 966, Deal Status: False
Reward: {'buyer': -1, 'seller': -1}
Buyer action: ('Counteroffer', 959)
Round: 5, Turn: Seller, Current Offer: 959, Deal Status: False
Reward: {'buyer': -1, 'seller': -1}
Seller action: 1
Seller accepts the offer
Round: 5, Turn: Seller, Current Offer: 959, Deal Status: True
Reward: {'buyer': 0.12794612794612795, 'seller': 0.8720538720538721}
Negotiation ended.


### Testing the Negotiation Agent by performing random actions

In [113]:
class Negotiation_Random_Agent(gym.Env):
    def __init__(self, seller_min_amt, max_rounds=20, initial_selling_price=None, 
                 gamma_seller=1.0, gamma_buyer=1.0, shaping_lambda=1.0):
        
        super(Negotiation_Random_Agent, self).__init__()
        self.max_rounds = max_rounds
        self.seller_min_amt = seller_min_amt
        if initial_selling_price is None:
        
            initial_selling_price = np.random.randint(2000, 10000)
        self.initial_selling_price = initial_selling_price
        self.gamma_seller = gamma_seller
        self.gamma_buyer = gamma_buyer
        
        # Observation space: [current_offer, round, turn, deal_status]
        low = np.array([seller_min_amt, 0, 0, 0], dtype=np.float32)
        high = np.array([self.initial_selling_price, max_rounds, 1, 1], dtype=np.float32)
        self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32)
        
        # Accept: 1, Reject: 2, Counteroffer: (0, new_price)
        self.action_space = spaces.Discrete(3)  
        self.shaping_lambda = shaping_lambda
        
    def reset(self):
        self.round = 0

        self.current_offer = self.initial_selling_price
        # Record seller's last counteroffer 
        self.last_seller_offer = self.initial_selling_price
        self.turn = 0  # 0: Buyer, 1: Seller.
        self.deal_status = False
        return self.get_observation()
    
    def _buyer_potential(self, offer):
        # Normalized potential for buyer: higher is better for buyer
        return (self.initial_selling_price - offer) / (self.initial_selling_price - self.seller_min_amt)
    
    def _seller_potential(self, offer):
        # Normalized potential for seller: higher is better for seller
        return (offer - self.seller_min_amt) / (self.initial_selling_price - self.seller_min_amt)
    
    
    def step(self, action):
        done = False
        info = {}
        reward = {'buyer': 0, 'seller': 0}
        
        if self.round >= self.max_rounds:
            done = True
            reward['buyer'] = -10
            reward['seller'] = -10  
            return self.get_observation(), reward, done, info
        
        # Accept action: negotiation ends
        if action == 1:
            done = True
            self.deal_status = True
            final_price = self.current_offer
            
            max_margin = self.initial_selling_price - self.seller_min_amt
    
            if max_margin == 0:
                max_margin = 1
            seller_profit_norm = (final_price - self.seller_min_amt) / max_margin
            buyer_savings_norm = (self.initial_selling_price - final_price) / max_margin
            reward['seller'] = self.gamma_seller * seller_profit_norm
            reward['buyer']  = self.gamma_buyer * buyer_savings_norm
            
        # Reject action: negotiation ends with penalty.
        elif action == 2:
            done = True
            reward['buyer'] = -5
            reward['seller'] = -5
        
        
        # Counteroffer action: action is a tuple (0, new_price)
        # elif isinstance(action, tuple) and action[0] == 0:
        elif isinstance(action, tuple) and (action[0] == 0 or action[0] == "Counteroffer"):

            new_offer = action[1]
            if self.turn == 0:
                # Buyer’s counteroffer: must be strictly lower than current_offer and not below seller_min_amt.
                if new_offer >= self.current_offer or new_offer < self.seller_min_amt:
                    reward['buyer'] = -3  # invalid counteroffer by buyer.
                else:
                    self.current_offer = new_offer
                    self.turn = 1  # Pass turn to seller
                    reward['buyer'] = -1
                    reward['seller'] = -1
                    self.round += 1
            elif self.turn == 1:
                # Seller’s counteroffer: must be strictly higher than buyer's current counteroffer & cannot exceed his previous (last_seller_offer)
                if new_offer <= self.current_offer or new_offer > self.last_seller_offer:
                    reward['seller'] = -3  # invalid seller counteroffer.
                else:
                    self.current_offer = new_offer
                    self.last_seller_offer = new_offer  # update seller's last counteroffer
                    self.turn = 0  # Pass turn to buyer
                    reward['buyer'] = -1
                    reward['seller'] = -1
                    self.round += 1
        else:
            reward['buyer'] = -2
            reward['seller'] = -2
        
        
        if self.round >= self.max_rounds:
            done = True
            reward['buyer'] = -10
            reward['seller'] = -20
        
        return self.get_observation(), reward, done, info
    
    def get_observation(self):
        return np.array([self.current_offer, self.round, self.turn, int(self.deal_status)], dtype=np.float32)
    
    def render(self, mode='human'):
        turn_str = "Buyer" if self.turn == 0 else "Seller"
        print(f"Round: {self.round}, Turn: {turn_str}, Current Offer: {self.current_offer}, Deal Status: {self.deal_status}")

#### Case (i) When the buyer accepts the current offer without any negotiation

In [108]:
if __name__ == "__main__":
    # Generate a random initial selling price between 1000 and 5000.
    random_initial_price = np.random.randint(800, 1000)
    print(f"Initial Selling Price: {random_initial_price}")

    Negotiation_RL_Random_Agent_env = Negotiation_Random_Agent(seller_min_amt=700, max_rounds=20, initial_selling_price=random_initial_price,
                     gamma_seller=1.0, gamma_buyer=1.0)
    
    state = Negotiation_RL_Random_Agent_env.reset()
    Negotiation_RL_Random_Agent_env.render()
    
    human_buyer_mode = True  # Buyer is controlled manually by human input
    done = False
    
    while not done:
        if state[2] == 0 and human_buyer_mode:
            inp = input("Buyer - Enter action: accept (1), reject (2), or counteroffer (0 new_price): ")
            parts = inp.strip().split()
            if parts[0] == "0":
                if len(parts) < 2:
                    print("Please provide a new price for your counteroffer.")
                    continue
                try:
                    new_price = int(float(parts[1]))
                except:
                    print("Invalid price. Try again.")
                    continue
                action = (0, new_price)
            elif parts[0] in ["1", "2"]:
                action = int(parts[0])
            else:
                print("Invalid input. Try again.")
                continue
        else:
            if state[2] == 1:
                delta = 10
                low_bound = int(state[0]) + 1
                proposed_offer = Negotiation_RL_Random_Agent_env.last_seller_offer - np.random.randint(1, delta+1)
                new_price = max(low_bound, proposed_offer)
                new_price = min(new_price, Negotiation_RL_Random_Agent_env.last_seller_offer)
                print(f"Seller proposes counteroffer: {new_price}")
            else:
                action = Negotiation_RL_Random_Agent_env.action_space.sample()
        
        state, reward, done, info = Negotiation_RL_Random_Agent_env.step(action)
        Negotiation_RL_Random_Agent_env.render()
        print("Reward:", reward)
    
    print("Negotiation ended.")

Initial Selling Price: 828
Round: 0, Turn: Buyer, Current Offer: 828, Deal Status: False
Round: 0, Turn: Buyer, Current Offer: 828, Deal Status: True
Reward: {'buyer': 0.0, 'seller': 1.0}
Negotiation ended.


#### Case (ii) When the buyer rejects the current offer without any negotiation

In [109]:
if __name__ == "__main__":
    # Generate a random initial selling price between 1000 and 5000.
    random_initial_price = np.random.randint(800, 1000)
    print(f"Initial Selling Price: {random_initial_price}")

    Negotiation_RL_Random_Agent_env = Negotiation_Random_Agent(seller_min_amt=700, max_rounds=20, initial_selling_price=random_initial_price,
                     gamma_seller=1.0, gamma_buyer=1.0)
    
    state = Negotiation_RL_Random_Agent_env.reset()
    Negotiation_RL_Random_Agent_env.render()
    
    human_buyer_mode = True  # Buyer is controlled manually by human input
    done = False
    
    while not done:
        if state[2] == 0 and human_buyer_mode:
            inp = input("Buyer - Enter action: accept (1), reject (2), or counteroffer (0 new_price): ")
            parts = inp.strip().split()
            if parts[0] == "0":
                if len(parts) < 2:
                    print("Please provide a new price for your counteroffer.")
                    continue
                try:
                    new_price = int(float(parts[1]))
                except:
                    print("Invalid price. Try again.")
                    continue
                action = (0, new_price)
            elif parts[0] in ["1", "2"]:
                action = int(parts[0])
            else:
                print("Invalid input. Try again.")
                continue
        else:
            if state[2] == 1:
                delta = 10
                low_bound = int(state[0]) + 1
                proposed_offer = Negotiation_RL_Random_Agent_env.last_seller_offer - np.random.randint(1, delta+1)
                new_price = max(low_bound, proposed_offer)
                new_price = min(new_price, Negotiation_RL_Random_Agent_env.last_seller_offer)
                print(f"Seller proposes counteroffer: {new_price}")
            else:
                action = Negotiation_RL_Random_Agent_env.action_space.sample()
        
        state, reward, done, info = Negotiation_RL_Random_Agent_env.step(action)
        Negotiation_RL_Random_Agent_env.render()
        print("Reward:", reward)
    
    print("Negotiation ended.")

Initial Selling Price: 972
Round: 0, Turn: Buyer, Current Offer: 972, Deal Status: False
Round: 0, Turn: Buyer, Current Offer: 972, Deal Status: False
Reward: {'buyer': -5, 'seller': -5}
Negotiation ended.


#### Case (iii) When the buyer counteroffers the seller's price through negotiation

In [117]:
if __name__ == "__main__":
    # Generate a random initial selling price between 1000 and 5000.
    random_initial_price = np.random.randint(800, 1000)
    print(f"Initial Selling Price: {random_initial_price}")

    Negotiation_RL_Random_Agent_env = Negotiation_Random_Agent(seller_min_amt=700, max_rounds=20, initial_selling_price=random_initial_price,
                     gamma_seller=1.0, gamma_buyer=1.0)
    
    state = Negotiation_RL_Random_Agent_env.reset()
    Negotiation_RL_Random_Agent_env.render()
    
    human_buyer_mode = True  # Buyer is controlled manually by human input
    done = False
    
    while not done:
        if state[2] == 0 and human_buyer_mode:
            inp = input("Buyer - Enter action: accept (1), reject (2), or counteroffer (0 new_price): ")
            parts = inp.strip().split()
            if parts[0] == "0":
                if len(parts) < 2:
                    print("Please provide a new price for your counteroffer.")
                    continue
                try:
                    new_price = int(float(parts[1]))
                except:
                    print("Invalid price. Try again.")
                    continue
                action = (0, new_price)
            elif parts[0] in ["1", "2"]:
                action = int(parts[0])
            else:
                print("Invalid input. Try again.")
                continue
        else:
            if state[2] == 1:
                delta = 10
                low_bound = int(state[0]) + 1
                proposed_offer = Negotiation_RL_Random_Agent_env.last_seller_offer - np.random.randint(1, delta+1)
                new_price = max(low_bound, proposed_offer)
                new_price = min(new_price, Negotiation_RL_Random_Agent_env.last_seller_offer)
                action = ("Counteroffer", new_price)
                print(f"Seller proposes counteroffer: {new_price}")
            else:
                action = Negotiation_RL_Random_Agent_env.action_space.sample()
        
        state, reward, done, info = Negotiation_RL_Random_Agent_env.step(action)
        Negotiation_RL_Random_Agent_env.render()
        print("Reward:", reward)
    
    print("Negotiation ended.")

Initial Selling Price: 850
Round: 0, Turn: Buyer, Current Offer: 850, Deal Status: False
Round: 1, Turn: Seller, Current Offer: 750, Deal Status: False
Reward: {'buyer': -1, 'seller': -1}
Seller proposes counteroffer: 848
Round: 2, Turn: Buyer, Current Offer: 848, Deal Status: False
Reward: {'buyer': -1, 'seller': -1}
Round: 3, Turn: Seller, Current Offer: 780, Deal Status: False
Reward: {'buyer': -1, 'seller': -1}
Seller proposes counteroffer: 840
Round: 4, Turn: Buyer, Current Offer: 840, Deal Status: False
Reward: {'buyer': -1, 'seller': -1}
Round: 5, Turn: Seller, Current Offer: 800, Deal Status: False
Reward: {'buyer': -1, 'seller': -1}
Seller proposes counteroffer: 837
Round: 6, Turn: Buyer, Current Offer: 837, Deal Status: False
Reward: {'buyer': -1, 'seller': -1}
Round: 7, Turn: Seller, Current Offer: 820, Deal Status: False
Reward: {'buyer': -1, 'seller': -1}
Seller proposes counteroffer: 836
Round: 8, Turn: Buyer, Current Offer: 836, Deal Status: False
Reward: {'buyer': -1, '

#### As we can see in the above negotiation, the seller proposes 831 towards the end of the rounds and the buyer accepts it, and the reward received by the seller (seller reward = 0.8733) is higher than the buyer (buyer reward = 0.1266)