In [6]:
import gymnasium as gym
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F


In [7]:
from gymnasium import spaces 

Negotiation environment - simulates a buyer–seller interaction over a fixed number of rounds and captures essential elements such as turns, offers, and rewards

In [8]:
class nego(gym.Env):
    def __init__(self, seller_min_amt, max_rounds=20, initial_selling_price=None, 
                 gamma_seller=1.0, gamma_buyer=1.0, shaping_lambda=1.0):
        """
        Parameters:
          seller_min_amt: The seller's true cost (floor); the seller won't sell for less.
          max_rounds: Maximum rounds of negotiation.
          initial_selling_price: The seller's target price (ceiling for seller counteroffers).
          gamma_seller: Reward multiplier for seller on acceptance.
          gamma_buyer: Reward multiplier for buyer on acceptance.
        """
        super(nego, self).__init__()
        self.max_rounds = max_rounds
        self.seller_min_amt = seller_min_amt
        if initial_selling_price is None:
            # If not provided, randomly generate a target price between 2000 and 10000.
            initial_selling_price = np.random.randint(2000, 10000)
        self.initial_selling_price = initial_selling_price
        self.gamma_seller = gamma_seller
        self.gamma_buyer = gamma_buyer
        
        # Observation space: [current_offer, round, turn, deal_status]
        low = np.array([seller_min_amt, 0, 0, 0], dtype=np.float32)
        high = np.array([self.initial_selling_price, max_rounds, 1, 1], dtype=np.float32)
        self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32)
        
        # Action space (conceptually): 
        # Accept: 1, Reject: 2, Counteroffer: (0, new_price)
        self.action_space = spaces.Discrete(3)  # (Placeholder)
        self.shaping_lambda = shaping_lambda
        
    def reset(self):
        self.round = 0
        # Start at the target price.
        self.current_offer = self.initial_selling_price
        # Record seller's last counteroffer (initially, the target).
        self.last_seller_offer = self.initial_selling_price
        self.turn = 0  # 0: Buyer, 1: Seller.
        self.deal_status = False
        return self.get_observation()
    
    def _buyer_potential(self, offer):
        # Normalized potential for buyer: higher is better for buyer.
        return (self.initial_selling_price - offer) / (self.initial_selling_price - self.seller_min_amt)
    
    def _seller_potential(self, offer):
        # Normalized potential for seller: higher is better for seller.
        return (offer - self.seller_min_amt) / (self.initial_selling_price - self.seller_min_amt)
    
    
    def step(self, action):
        done = False
        info = {}
        reward = {'buyer': 0, 'seller': 0}
        
        if self.round >= self.max_rounds:
            done = True
            reward['buyer'] = -10
            reward['seller'] = -10  
            return self.get_observation(), reward, done, info
        
        # Accept action: negotiation ends.
        if action == 1:
            done = True
            self.deal_status = True
            final_price = self.current_offer
            # reward['seller'] = self.gamma_seller * (final_price - self.seller_min_amt)
            # reward['buyer']  = self.gamma_buyer * (self.initial_selling_price - final_price)
            
            max_margin = self.initial_selling_price - self.seller_min_amt
            # Avoid division by zero (if target equals cost, though that is an unusual setup)
            if max_margin == 0:
                max_margin = 1
            seller_profit_norm = (final_price - self.seller_min_amt) / max_margin
            buyer_savings_norm = (self.initial_selling_price - final_price) / max_margin
            reward['seller'] = self.gamma_seller * seller_profit_norm
            reward['buyer']  = self.gamma_buyer * buyer_savings_norm
            
        # Reject action: negotiation ends with penalty.
        elif action == 2:
            done = True
            reward['buyer'] = -5
            reward['seller'] = -5
            
        # Counteroffer action: action is a tuple (0, new_price)
        elif isinstance(action, tuple) and action[0] == 0:
            new_offer = action[1]
            if self.turn == 0:
                # Buyer’s counteroffer: must be strictly lower than current_offer and not below seller_min_amt.
                if new_offer >= self.current_offer or new_offer < self.seller_min_amt:
                    reward['buyer'] = -3  # invalid counteroffer by buyer.
                else:
                    self.current_offer = new_offer
                    self.turn = 1  # Pass turn to seller.
                    reward['buyer'] = -1
                    reward['seller'] = -1
                    self.round += 1
            elif self.turn == 1:
                # Seller’s counteroffer: must be strictly higher than buyer's current counteroffer
                # AND cannot exceed his previous (last_seller_offer).
                if new_offer <= self.current_offer or new_offer > self.last_seller_offer:
                    reward['seller'] = -3  # invalid seller counteroffer.
                else:
                    self.current_offer = new_offer
                    self.last_seller_offer = new_offer  # update seller's last counteroffer.
                    self.turn = 0  # Pass turn to buyer.
                    reward['buyer'] = -1
                    reward['seller'] = -1
                    self.round += 1
        else:
            reward['buyer'] = -2
            reward['seller'] = -2
        
        if self.round >= self.max_rounds:
            done = True
            reward['buyer'] = -10
            reward['seller'] = -20
        
        return self.get_observation(), reward, done, info
    
    def get_observation(self):
        return np.array([self.current_offer, self.round, self.turn, int(self.deal_status)], dtype=np.float32)
    
    def render(self, mode='human'):
        turn_str = "Buyer" if self.turn == 0 else "Seller"
        print(f"Round: {self.round}, Turn: {turn_str}, Current Offer: {self.current_offer}, Deal Status: {self.deal_status}")

In [4]:
if __name__ == "__main__":
    # Generate a random initial selling price between 1000 and 5000.
    random_initial_price = np.random.randint(800, 1000)
    print(f"Initial Selling Price (target): {random_initial_price}")
    # Example: set seller_min_amt to 1500.
    env = nego(seller_min_amt=700, max_rounds=20, initial_selling_price=random_initial_price,
                     gamma_seller=1.0, gamma_buyer=1.0)
    
    state = env.reset()
    env.render()
    
    human_buyer_mode = True  # Buyer controlled by human input.
    done = False
    
    while not done:
        if state[2] == 0 and human_buyer_mode:
            inp = input("Buyer - Enter action: accept (1), reject (2), or counteroffer (0 new_price): ")
            parts = inp.strip().split()
            if parts[0] == "0":
                if len(parts) < 2:
                    print("Please provide a new price for your counteroffer.")
                    continue
                try:
                    new_price = int(float(parts[1]))
                except:
                    print("Invalid price. Try again.")
                    continue
                action = (0, new_price)
            elif parts[0] in ["1", "2"]:
                action = int(parts[0])
            else:
                print("Invalid input. Try again.")
                continue
        else:
            if state[2] == 1:
                delta = 10
                low_bound = int(state[0]) + 1
                proposed_offer = env.last_seller_offer - np.random.randint(1, delta+1)
                new_price = max(low_bound, proposed_offer)
                new_price = min(new_price, env.last_seller_offer)
                action = (0, new_price)
                print(f"Seller proposes counteroffer: {new_price}")
            else:
                action = env.action_space.sample()
        
        state, reward, done, info = env.step(action)
        env.render()
        print("Reward:", reward)
    
    print("Negotiation ended.")

Initial Selling Price (target): 911
Round: 0, Turn: Buyer, Current Offer: 911, Deal Status: False
Round: 1, Turn: Seller, Current Offer: 850, Deal Status: False
Reward: {'buyer': -1, 'seller': -1}
Seller proposes counteroffer: 902
Round: 2, Turn: Buyer, Current Offer: 902, Deal Status: False
Reward: {'buyer': -1, 'seller': -1}
Round: 3, Turn: Seller, Current Offer: 860, Deal Status: False
Reward: {'buyer': -1, 'seller': -1}
Seller proposes counteroffer: 898
Round: 4, Turn: Buyer, Current Offer: 898, Deal Status: False
Reward: {'buyer': -1, 'seller': -1}
Round: 5, Turn: Seller, Current Offer: 870, Deal Status: False
Reward: {'buyer': -1, 'seller': -1}
Seller proposes counteroffer: 892
Round: 6, Turn: Buyer, Current Offer: 892, Deal Status: False
Reward: {'buyer': -1, 'seller': -1}
Round: 7, Turn: Seller, Current Offer: 875, Deal Status: False
Reward: {'buyer': -1, 'seller': -1}
Seller proposes counteroffer: 889
Round: 8, Turn: Buyer, Current Offer: 889, Deal Status: False
Reward: {'buye

In [31]:
class nego(gym.Env):
    def __init__(self, seller_min_amt, max_rounds=20, initial_selling_price=None, 
                 gamma_seller=1.0, gamma_buyer=1.0, shaping_lambda=1.0):
        """
        Parameters:
          seller_min_amt: The seller's true cost (floor); the seller won't sell for less.
          max_rounds: Maximum rounds of negotiation.
          initial_selling_price: The seller's target price (ceiling for seller counteroffers).
          gamma_seller: Reward multiplier for seller on acceptance.
          gamma_buyer: Reward multiplier for buyer on acceptance.
        """
        super(nego, self).__init__()
        self.max_rounds = max_rounds
        self.seller_min_amt = seller_min_amt
        if initial_selling_price is None:
            # If not provided, randomly generate a target price between 2000 and 10000.
            initial_selling_price = np.random.randint(2000, 10000)
        self.initial_selling_price = initial_selling_price
        self.gamma_seller = gamma_seller
        self.gamma_buyer = gamma_buyer
        
        # Observation space: [current_offer, round, turn, deal_status]
        low = np.array([seller_min_amt, 0, 0, 0], dtype=np.float32)
        high = np.array([self.initial_selling_price, max_rounds, 1, 1], dtype=np.float32)
        self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32)
        
        # Action space (conceptually): 
        # Accept: 1, Reject: 2, Counteroffer: (0, new_price)
        self.action_space = spaces.Discrete(3)  # (Placeholder for accepted integer actions)
        self.shaping_lambda = shaping_lambda
        
    def reset(self):
        self.round = 0
        # Start at the target price.
        self.current_offer = self.initial_selling_price
        # Record seller's last counteroffer (initially, the target).
        self.last_seller_offer = self.initial_selling_price
        self.turn = 0  # 0: Buyer, 1: Seller.
        self.deal_status = False
        return self.get_observation()
    
    def _buyer_potential(self, offer):
        # Normalized potential for buyer: higher is better for buyer.
        return (self.initial_selling_price - offer) / (self.initial_selling_price - self.seller_min_amt)
    
    def _seller_potential(self, offer):
        # Normalized potential for seller: higher is better for seller.
        return (offer - self.seller_min_amt) / (self.initial_selling_price - self.seller_min_amt)
    
    def step(self, action):
        done = False
        info = {}
        reward = {'buyer': 0, 'seller': 0}
        
        if self.round >= self.max_rounds:
            done = True
            reward['buyer'] = -10
            reward['seller'] = -10  
            return self.get_observation(), reward, done, info
        
        # Accept action: negotiation ends.
        if action == 1:
            done = True
            self.deal_status = True
            final_price = self.current_offer
            
            max_margin = self.initial_selling_price - self.seller_min_amt
            # Avoid division by zero
            if max_margin == 0:
                max_margin = 1
            seller_profit_norm = (final_price - self.seller_min_amt) / max_margin
            buyer_savings_norm = (self.initial_selling_price - final_price) / max_margin
            reward['seller'] = self.gamma_seller * seller_profit_norm
            reward['buyer']  = self.gamma_buyer * buyer_savings_norm
            
        # Reject action: negotiation ends with penalty.
        elif action == 2:
            done = True
            reward['buyer'] = -5
            reward['seller'] = -5
            
        # Counteroffer action: action is a tuple (0, new_price)
        elif isinstance(action, tuple) and action[0] == 0:
            new_offer = action[1]
            if self.turn == 0:
                # Buyer's counteroffer: must be strictly lower than current_offer and not below seller_min_amt.
                if new_offer >= self.current_offer or new_offer < self.seller_min_amt:
                    reward['buyer'] = -3  # invalid counteroffer by buyer.
                else:
                    self.current_offer = new_offer
                    self.turn = 1  # Pass turn to seller.
                    reward['buyer'] = -1
                    reward['seller'] = -1
                    self.round += 1
            elif self.turn == 1:
                # Seller's counteroffer: must be strictly higher than buyer's last counteroffer
                # AND cannot exceed his previous (last_seller_offer).
                if new_offer <= self.current_offer or new_offer > self.last_seller_offer:
                    reward['seller'] = -3  # invalid seller counteroffer.
                else:
                    self.current_offer = new_offer
                    self.last_seller_offer = new_offer  # update seller's last counteroffer.
                    self.turn = 0  # Pass turn to buyer.
                    reward['buyer'] = -1
                    reward['seller'] = -1
                    self.round += 1
        else:
            reward['buyer'] = -2
            reward['seller'] = -2
        
        if self.round >= self.max_rounds:
            done = True
            reward['buyer'] = -10
            reward['seller'] = -20
        
        return self.get_observation(), reward, done, info
    
    def get_observation(self):
        return np.array([self.current_offer, self.round, self.turn, int(self.deal_status)], dtype=np.float32)
    
    def render(self, mode='human'):
        turn_str = "Buyer" if self.turn == 0 else "Seller"
        print(f"Round: {self.round}, Turn: {turn_str}, Current Offer: {self.current_offer}, Deal Status: {self.deal_status}")

In [32]:
if __name__ == "__main__":
    # Set parameters for smaller updates.
    update_fraction = 0.2    # Fraction to update bids (lower value means smaller steps).
    accept_threshold = 5     # Auto-accept if the gap is less than or equal to this value.
    
    # Generate a random initial selling price between 800 and 1000.
    random_initial_price = np.random.randint(800, 1000)
    print(f"Initial Selling Price (target): {random_initial_price}")
    # Example: set seller_min_amt to 700.
    env = nego(seller_min_amt=700, max_rounds=20, initial_selling_price=random_initial_price,
               gamma_seller=1.0, gamma_buyer=1.0)
    
    state = env.reset()
    env.render()
    
    done = False
    
    # Keep track of the buyer's last bid.
    last_buyer_bid = None  
    # Precompute buyer's target price.
    buyer_target = env.seller_min_amt + 0.3 * (env.initial_selling_price - env.seller_min_amt)
    
    while not done:
        if state[2] == 0:  # Buyer's turn.
            if last_buyer_bid is None:
                # First buyer proposal: weighted average between buyer_target and current offer.
                candidate = int(buyer_target + update_fraction * (env.current_offer - buyer_target))
            else:
                # Increase last bid by update_fraction of the gap.
                candidate = last_buyer_bid + max(1, int(update_fraction * (env.current_offer - last_buyer_bid)))
                if candidate >= env.current_offer:
                    candidate = env.current_offer - 1
            if env.current_offer - candidate <= accept_threshold:
                print("Buyer accepts because offers are close.")
                action = 1  # Accept the deal.
            else:
                print(f"Buyer proposes counteroffer: {candidate}")
                action = (0, candidate)
                last_buyer_bid = candidate
        elif state[2] == 1:  # Seller's turn.
            if last_buyer_bid is None:
                candidate = env.last_seller_offer
            else:
                # Seller reduces its previous offer by a small fraction of the gap.
                candidate = env.last_seller_offer - max(1, int(update_fraction * (env.last_seller_offer - last_buyer_bid)))
                # Ensure candidate is still strictly above the buyer's last bid.
                if candidate <= last_buyer_bid:
                    candidate = last_buyer_bid + 1
            if candidate - last_buyer_bid <= accept_threshold:
                print("Seller accepts because offers are close.")
                action = 1  # Accept the deal.
            else:
                print(f"Seller proposes counteroffer: {candidate}")
                action = (0, candidate)
                env.last_seller_offer = candidate  # Update seller's last offer.
        else:
            action = env.action_space.sample()
        
        state, reward, done, info = env.step(action)
        env.render()
        print("Reward:", reward)
    
    print("Negotiation ended.")

Initial Selling Price (target): 812
Round: 0, Turn: Buyer, Current Offer: 812, Deal Status: False
Buyer proposes counteroffer: 749
Round: 1, Turn: Seller, Current Offer: 749, Deal Status: False
Reward: {'buyer': -1, 'seller': -1}
Seller proposes counteroffer: 800
Round: 2, Turn: Buyer, Current Offer: 800, Deal Status: False
Reward: {'buyer': -1, 'seller': -1}
Buyer proposes counteroffer: 759
Round: 3, Turn: Seller, Current Offer: 759, Deal Status: False
Reward: {'buyer': -1, 'seller': -1}
Seller proposes counteroffer: 792
Round: 4, Turn: Buyer, Current Offer: 792, Deal Status: False
Reward: {'buyer': -1, 'seller': -1}
Buyer proposes counteroffer: 765
Round: 5, Turn: Seller, Current Offer: 765, Deal Status: False
Reward: {'buyer': -1, 'seller': -1}
Seller proposes counteroffer: 787
Round: 6, Turn: Buyer, Current Offer: 787, Deal Status: False
Reward: {'buyer': -1, 'seller': -1}
Buyer proposes counteroffer: 769
Round: 7, Turn: Seller, Current Offer: 769, Deal Status: False
Reward: {'buye