In [2]:
pip install pandas

Defaulting to user installation because normal site-packages is not writeable
Collecting pandas
  Downloading pandas-2.2.3-cp312-cp312-win_amd64.whl.metadata (19 kB)
Collecting pytz>=2020.1 (from pandas)
  Downloading pytz-2024.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Downloading tzdata-2024.2-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pandas-2.2.3-cp312-cp312-win_amd64.whl (11.5 MB)
   ---------------------------------------- 0.0/11.5 MB ? eta -:--:--
   ---------------------------------------- 0.0/11.5 MB ? eta -:--:--
    --------------------------------------- 0.3/11.5 MB ? eta -:--:--
    --------------------------------------- 0.3/11.5 MB ? eta -:--:--
    --------------------------------------- 0.3/11.5 MB ? eta -:--:--
   - -------------------------------------- 0.5/11.5 MB 493.7 kB/s eta 0:00:23
   - -------------------------------------- 0.5/11.5 MB 493.7 kB/s eta 0:00:23
   -- ------------------------------------- 0.8/11.5 MB 

In [3]:
import pandas as pd

In [None]:
curr_data = pd.read_csv('../Notebooks/experiment/curr_data.csv')
curr_data_bb = pd.read_csv('../Notebooks/experiment/curr_data_bb.csv')
curr_data_rs = pd.read_csv('../Notebooks/experiment/curr_data_rs.csv')

In [11]:
curr_data_rs.head()

Unnamed: 0,Experiment,Period,Player ID,Demand,Wholesale p.,Revenue Share,Player ID.1,Stock,Sales,Leftovers,...,Expected Sales,Expected_Leftovers,Expected Retailer Profit,Expected Mfg Profit,Expected Mfg. Profit Share,Predicted Sales,Predicted Leftovers,Predicted Retailer Profit,Predicted Mfg Profit,Predicted Mfg. Profit Share
0,1,1,Manufacturer 1,122,3,5,Retailer 1,90,90,0,...,82.2,24.8,305.4,411.0,0.573702,91.04,15.96,316.28,455.2,0.590035
1,1,2,Manufacturer 1,144,0,8,Retailer 1,150,144,6,...,100.5,49.5,402.0,354.0,0.468254,100.5,49.5,402.0,354.0,0.468254
2,1,3,Manufacturer 1,57,2,7,Retailer 1,0,0,0,...,0.0,110.0,0.0,0.0,,92.3,17.7,241.5,536.1,0.689429
3,1,4,Manufacturer 1,149,2,6,Retailer 1,100,100,0,...,87.75,29.25,326.5,426.5,0.566401,94.89,22.11,335.34,452.34,0.574269
4,1,5,Manufacturer 1,135,3,4,Retailer 1,110,110,0,...,92.3,19.7,408.4,369.2,0.474794,93.09,18.91,408.72,372.36,0.476725


In [15]:


# Define categorization thresholds
def categorize_behavior(row):
    deviation = row["Under/Overstock_wrt_Optimal"]  # Deviation from optimal (expected sales)
    if row['Stock'] == 0:
        return 'Reject'
    elif deviation < -15:
        return 'Significant Underorder'
    elif -15 <= deviation <= 15:
        return 'Optimal'
    elif deviation > 15:
        return 'Overorder'
    else:
        return 'Unknown'

# Apply categorization to each dataset
curr_data['Behavioral Category'] = curr_data.apply(categorize_behavior, axis=1)
curr_data_bb['Behavioral Category'] = curr_data_bb.apply(categorize_behavior, axis=1)
curr_data_rs['Behavioral Category'] = curr_data_rs.apply(categorize_behavior, axis=1)

curr_data.to_csv('../Notebooks/experiment/curr_data.csv', index = False)
curr_data_bb.to_csv('../Notebooks/experiment/curr_data_bb.csv', index = False)
curr_data_rs.to_csv('../Notebooks/experiment/curr_data_rs.csv', index = False)


In [None]:
# Redefine the environment class and test setup
import numpy as np
from gym import Env, spaces

class SupplyChainEnv(Env):
    """
    Custom Environment for the supply chain game.
    - Manufacturer sets contract parameters.
    - Retailer decides order quantities.
    - Rewards are profits based on contract type.
    """
    def __init__(self, contract_type="wholesale"):
        super(SupplyChainEnv, self).__init__()
        self.contract_type = contract_type
        self.max_stock = 150  # Maximum stock level allowed
        self.max_price = 12  # Maximum price allowed
        self.max_rounds = 40  # Maximum number of rounds

        # Define action and observation spaces
        if self.contract_type == "wholesale":
            self.manufacturer_action_space = spaces.Discrete(self.max_price + 1)  # Wholesale price
            self.retailer_action_space = spaces.Discrete(self.max_stock + 1)  # Stock order quantity
        elif self.contract_type == "buyback":
            self.manufacturer_action_space = spaces.MultiDiscrete([self.max_price + 1, self.max_price + 1])  # Wholesale, buyback
            self.retailer_action_space = spaces.Discrete(self.max_stock + 1)  # Stock order quantity
        elif self.contract_type == "revenue-sharing":
            self.manufacturer_action_space = spaces.MultiDiscrete([self.max_price + 1, self.max_price + 1])  # Wholesale, revenue share
            self.retailer_action_space = spaces.Discrete(self.max_stock + 1)  # Stock order quantity
        else:
            raise ValueError("Invalid contract type. Choose from 'wholesale', 'buyback', or 'revenue-sharing'.")

        # Observation space: demand, contract parameters, and past profits
        self.observation_space = spaces.Box(low=0, high=150, shape=(5,), dtype=np.float32)

        # Initialize state variables
        self.reset()

    def reset(self):
        self.demand = np.random.randint(50, 151)
        self.state = np.array([self.demand, 0, 0, 0, 0])  # [demand, w, b/r, retailer profit, manufacturer profit]
        self.current_round = 0  # Initialize round counter
        return self.state

    def step(self, actions):
        """
        Perform a step in the environment.
        :param actions: (manufacturer_action, retailer_action)
        :return: observation, reward, done, info
        """
        manufacturer_action, retailer_action = actions
        Q = retailer_action  # Retailer order quantity
        sales = min(Q, self.demand)  # Actual sales
        leftovers = Q - sales  # Unsold stock
        c = 5  # Manufacturer's production cost (example value)
        p = self.max_price  # Retail price

        if self.contract_type == "wholesale":
            # Extract wholesale price
            w = manufacturer_action
            b = 0
            r = 0

            # Retailer payoff
            retailer_profit = p * sales - w * Q

            # Manufacturer payoff
            manufacturer_profit = (w - c) * Q

        elif self.contract_type == "buyback":
            # Extract wholesale price and buyback price
            w, b = manufacturer_action
            r = 0

            # Enforce constraint: buyback price must not exceed wholesale price
            if b > w:
                b = w  # Adjust buyback price to wholesale price

            # Retailer payoff
            retailer_profit = p * sales - w * Q + b * leftovers

            # Manufacturer payoff
            manufacturer_profit = (w - c) * Q - b * leftovers

        elif self.contract_type == "revenue-sharing":
            # Extract wholesale price and revenue share
            w, r = manufacturer_action
            b = 0

            # Enforce constraint: revenue share must not exceed (retail price - wholesale price)
            max_revenue_share = p - w
            if r > max_revenue_share:
                r = max_revenue_share

            # Retailer payoff
            retailer_profit = p * sales - w * Q - r * sales

            # Manufacturer payoff
            manufacturer_profit = (w - c) * Q + r * sales

        else:
            raise ValueError("Invalid contract type.")

        # Update state
        self.state = np.array([self.demand, w, b, r, retailer_profit, manufacturer_profit])

        # Define rewards
        manufacturer_reward = manufacturer_profit
        retailer_reward = retailer_profit

        # Update round counter
        self.current_round += 1

        # Randomize next demand
        self.demand = np.random.randint(50, 151)

        # Check if the maximum number of rounds has been reached
        done = self.current_round >= self.max_rounds

        return self.state, (manufacturer_reward, retailer_reward), done, {}




Round: 1, State: [  54   11    5    0 -432  405], Rewards: (405, -432)
Round: 2, State: [144  11   5   0  64 384], Rewards: (384, 64)
Round: 3, State: [142  11   5   0 133 798], Rewards: (798, 133)
Round: 4, State: [103  11   5   0  91 620], Rewards: (620, 91)
Round: 5, State: [  53   11    5    0 -283  374], Rewards: (374, -283)
Round: 6, State: [ 53  11   5   0 -13 329], Rewards: (329, -13)
Round: 7, State: [143  11   5   0 140 840], Rewards: (840, 140)
Round: 8, State: [134  11   5   0 124 744], Rewards: (744, 124)
Round: 9, State: [141  11   5   0  65 390], Rewards: (390, 65)
Round: 10, State: [144  11   5   0 140 840], Rewards: (840, 140)
Round: 11, State: [144  11   5   0  59 354], Rewards: (354, 59)
Round: 12, State: [125  11   5   0  53 318], Rewards: (318, 53)
Round: 13, State: [101  11   5   0  61 366], Rewards: (366, 61)
Round: 14, State: [106  11   5   0  51 306], Rewards: (306, 51)
Round: 15, State: [ 92  11   5   0  51 306], Rewards: (306, 51)
Round: 16, State: [148  11  

In [41]:
import numpy as np
import random
from collections import defaultdict

class QLearningAgent:
    def __init__(self, action_space, personality_traits, learning_rate=0.1, discount_factor=0.99, epsilon=1.0, epsilon_decay=0.995, min_epsilon=0.01):
        self.action_space = action_space
        self.personality_traits = personality_traits  # Dictionary of traits (e.g., risk aversion, fairness)
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.epsilon = epsilon
        self.epsilon_decay = epsilon_decay
        self.min_epsilon = min_epsilon
        self.q_table = defaultdict(float)

    def get_action(self, state):
        if np.random.rand() < self.epsilon:
            return self.action_space.sample()  # Explore
        else:
            state_key = self._state_to_key(state)
            q_values = [self.q_table[(state_key, a)] for a in range(self.action_space.n)]
            return np.argmax(q_values)  # Exploit

    def update(self, state, action, reward, next_state, done):
        state_key = self._state_to_key(state)
        next_state_key = self._state_to_key(next_state)

        # Modify reward based on personality traits
        if 'risk_aversion' in self.personality_traits:
            reward -= abs(reward - np.mean(reward)) * self.personality_traits['risk_aversion']

        # Q-Learning update
        if done:
            target = reward
        else:
            max_next_q = max(self.q_table[(next_state_key, a)] for a in range(self.action_space.n))
            target = reward + self.discount_factor * max_next_q

        self.q_table[(state_key, action)] += self.learning_rate * (target - self.q_table[(state_key, action)])
        self.epsilon = max(self.min_epsilon, self.epsilon * self.epsilon_decay)

    def _state_to_key(self, state):
        return tuple(state)





In [110]:
behavioral_data = pd.read_csv('../adjusted_reponse_survey.csv')
historical_data_wholesale = pd.read_csv('../Notebooks/experiment/curr_data.csv')
historical_data_buyback = pd.read_csv('../Notebooks/experiment/curr_data_bb.csv')
historical_data_revenue_sharing = pd.read_csv('../Notebooks/experiment/curr_data_rs.csv')

In [111]:
behavioral_data.columns = behavioral_data.columns.str.strip()
historical_data_wholesale.columns = historical_data_wholesale.columns.str.strip()
historical_data_buyback.columns = historical_data_buyback.columns.str.strip()
historical_data_revenue_sharing.columns = historical_data_revenue_sharing.columns.str.strip()

In [112]:
historical_data_wholesale.columns = historical_data_wholesale.columns.str.replace(' ', '_')
historical_data_buyback.columns = historical_data_buyback.columns.str.replace(' ', '_')
historical_data_revenue_sharing.columns = historical_data_revenue_sharing.columns.str.replace(' ', '_')

In [None]:

# Extract behavioral data
behavioral_data = behavioral_data[['PLAYER NAME', 
                                   'Manufacturer_Self Esteem Average', 
                                   'Manufacturer_Regret Scale Average', 
                                   'Manufacturer_Risk Averse Coefficient', 
                                   'Manufacturer_Fairness Index', 
                                   'Retailer_Self Esteem Average', 
                                   'Retailer_Regret Scale Average', 
                                   'Retailer_Risk Averse Coefficient', 
                                   'Retailer_Fairness Index']].set_index('PLAYER NAME')

# Function to extract historical data for Q-learning
def preprocess_historical_data(df, contract_type):
    data = []
    for _, row in df.iterrows():
        # State variables
        state = [
            row['Demand'], 
            row['Wholesale_p.'], 
            row.get('Buyback_p.', 0),  # Default to 0 if not present
            row.get('Revenue_Share', 0)  # Default to 0 if not present
        ]
        
        # Actions
        manufacturer_action = (
            row['Wholesale_p.'], 
            row.get('Buyback_p.', row.get('Revenue_Share', 0))
        )
        retailer_action = row['Stock']
        
        # Rewards
        manufacturer_reward = row['Realized_Mfg_Profit']
        retailer_reward = row['Realized_Retailer_Profit']
        
        # Next state (mocked as same for simplicity, typically inferred)
        next_state = state
        
        data.append((state, manufacturer_action, retailer_action, (manufacturer_reward, retailer_reward), next_state))
    return data

# Process historical data for each contract type
historical_data_wholesale = preprocess_historical_data(historical_data_wholesale, "wholesale")
historical_data_buyback = preprocess_historical_data(historical_data_buyback, "buyback")
historical_data_revenue_sharing = preprocess_historical_data(historical_data_revenue_sharing, "revenue-sharing")

# Combine historical data
historical_data = historical_data_wholesale + historical_data_buyback + historical_data_revenue_sharing


# Initialize agents with personality traits
manufacturer_agent = QLearningAgent(env.manufacturer_action_space, personality_traits={'risk_aversion': 0.7, 'fairness': 0.5})
retailer_agent = QLearningAgent(env.retailer_action_space, personality_traits={'risk_aversion': 0.3, 'self_esteem': 0.8})

# Pre-train Q-table using historical data
for record in historical_data:
    state, manufacturer_action, retailer_action, rewards, next_state = record
    manufacturer_reward, retailer_reward = rewards

    # Update Q-table for manufacturer
    manufacturer_agent.q_table[(manufacturer_agent._state_to_key(state), manufacturer_action)] = manufacturer_reward

    # Update Q-table for retailer
    retailer_agent.q_table[(retailer_agent._state_to_key(state), retailer_action)] = retailer_reward

In [116]:
def extract_traits(behavioral_data):
    """
    Extract traits for all manufacturers and retailers from the behavioral data.
    Returns dictionaries of traits for manufacturers and retailers indexed by PLAYER NAME.
    """
    manufacturers = behavioral_data[[
        'Manufacturer_Self Esteem Average',
        'Manufacturer_Regret Scale Average',
        'Manufacturer_Risk Averse Coefficient',
        'Manufacturer_Fairness Index'
    ]].rename(lambda col: col.replace('Manufacturer_', ''), axis=1)

    retailers = behavioral_data[[
        'Retailer_Self Esteem Average',
        'Retailer_Regret Scale Average',
        'Retailer_Risk Averse Coefficient',
        'Retailer_Fairness Index'
    ]].rename(lambda col: col.replace('Retailer_', ''), axis=1)

    # Return as dictionaries indexed by PLAYER NAME
    manufacturer_traits = manufacturers.to_dict(orient='index')
    retailer_traits = retailers.to_dict(orient='index')

    return manufacturer_traits, retailer_traits


# Extract traits
manufacturer_traits, retailer_traits = extract_traits(behavioral_data)

# Sample traits for inspection
sample_manufacturer = list(manufacturer_traits.keys())[0]
sample_retailer = list(retailer_traits.keys())[0]

sample_manufacturer_traits = manufacturer_traits[sample_manufacturer]
sample_retailer_traits = retailer_traits[sample_retailer]

In [117]:
(sample_manufacturer, sample_manufacturer_traits, sample_retailer, sample_retailer_traits)

(('manufacturer1  ', 'retailer1   '),
 {'Self Esteem Average': 1.9,
  'Regret Scale Average': 3.6,
  'Risk Averse Coefficient': -0.5,
  'Fairness Index': 0.4333333333333333},
 ('manufacturer1  ', 'retailer1   '),
 {'Self Esteem Average': 2.8,
  'Regret Scale Average': 5.4,
  'Risk Averse Coefficient': 2.5,
  'Fairness Index': 0.3333333333333333})

In [118]:
class QLearningAgent:
    def __init__(self, action_space, personality_traits=None, learning_rate=0.1, discount_factor=0.99, epsilon=1.0, epsilon_decay=0.995, min_epsilon=0.01):
        """
        Initialize the Q-Learning agent with personality traits.
        """
        self.action_space = action_space
        self.personality_traits = personality_traits or {}
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.epsilon = epsilon
        self.epsilon_decay = epsilon_decay
        self.min_epsilon = min_epsilon
        self.q_table = defaultdict(float)

    def _state_to_key(self, state):
        """
        Convert the state to a hashable key, including personality traits.
        """
        # Combine state with traits to create a unique key
        trait_values = tuple(self.personality_traits.values())
        return tuple(state) + trait_values

    def get_action(self, state):
        """
        Select an action based on the epsilon-greedy policy.
        """
        if np.random.rand() < self.epsilon:
            # Explore: choose a random action
            return self.action_space.sample()
        else:
            # Exploit: choose the action with the highest Q-value
            state_key = self._state_to_key(state)
            q_values = [self.q_table[(state_key, a)] for a in range(self.action_space.n)]
            return np.argmax(q_values)

    def update_q_table(self, state, action, reward, next_state):
        """
        Update the Q-value for the given state-action pair.
        """
        state_key = self._state_to_key(state)
        next_state_key = self._state_to_key(next_state)

        # Calculate the maximum Q-value for the next state
        max_next_q = max([self.q_table[(next_state_key, a)] for a in range(self.action_space.n)])

        # Update Q-value using Q-learning formula
        self.q_table[(state_key, action)] += self.learning_rate * (
            reward + self.discount_factor * max_next_q - self.q_table[(state_key, action)]
        )

        # Decay epsilon
        self.epsilon = max(self.min_epsilon, self.epsilon * self.epsilon_decay)

# Initialize manufacturer and retailer agents with personality traits
sample_manufacturer_traits = manufacturer_traits[sample_manufacturer]
sample_retailer_traits = retailer_traits[sample_retailer]

manufacturer_agent = QLearningAgent(env.manufacturer_action_space, personality_traits=sample_manufacturer_traits)
retailer_agent = QLearningAgent(env.retailer_action_space, personality_traits=sample_retailer_traits)

In [120]:
from gym import Env, spaces

class SupplyChainEnv(Env):
    """
    Custom Environment for the supply chain game.
    """
    def __init__(self, contract_type="wholesale"):
        super(SupplyChainEnv, self).__init__()
        self.contract_type = contract_type
        self.max_stock = 150
        self.max_price = 12
        self.max_rounds = 40
        self.current_round = 0

        if self.contract_type == "wholesale":
            self.manufacturer_action_space = spaces.Discrete(self.max_price + 1)
            self.retailer_action_space = spaces.Discrete(self.max_stock + 1)
        elif self.contract_type == "buyback":
            self.manufacturer_action_space = spaces.MultiDiscrete([self.max_price + 1, self.max_price + 1])
            self.retailer_action_space = spaces.Discrete(self.max_stock + 1)
        elif self.contract_type == "revenue-sharing":
            self.manufacturer_action_space = spaces.MultiDiscrete([self.max_price + 1, self.max_price + 1])
            self.retailer_action_space = spaces.Discrete(self.max_stock + 1)
        else:
            raise ValueError("Invalid contract type.")

        self.observation_space = spaces.Box(low=0, high=150, shape=(5,), dtype=np.float32)
        self.reset()

    def reset(self):
        self.demand = np.random.randint(50, 151)
        self.state = np.array([self.demand, 0, 0, 0, 0])
        self.current_round = 0
        return self.state

    def step(self, actions):
        manufacturer_action, retailer_action = actions
        Q = retailer_action
        sales = min(Q, self.demand)
        leftovers = Q - sales
        c = 5
        p = self.max_price

        if self.contract_type == "wholesale":
            w = manufacturer_action
            b, r = 0, 0
            retailer_profit = p * sales - w * Q
            manufacturer_profit = (w - c) * Q
        elif self.contract_type == "buyback":
            w, b = manufacturer_action
            r = 0
            retailer_profit = p * sales - w * Q + b * leftovers
            manufacturer_profit = (w - c) * Q - b * leftovers
        elif self.contract_type == "revenue-sharing":
            w, r = manufacturer_action
            b = 0
            retailer_profit = p * sales - w * Q - r * sales
            manufacturer_profit = (w - c) * Q + r * sales
        else:
            raise ValueError("Invalid contract type.")

        self.state = np.array([self.demand, w, b, r, retailer_profit, manufacturer_profit])
        self.current_round += 1
        self.demand = np.random.randint(50, 151)
        done = self.current_round >= self.max_rounds

        return self.state, (manufacturer_profit, retailer_profit), done, {}

# Initialize the environment
env = SupplyChainEnv(contract_type="wholesale")

# Initialize agents with personality traits
manufacturer_agent = QLearningAgent(env.manufacturer_action_space, personality_traits=sample_manufacturer_traits)
retailer_agent = QLearningAgent(env.retailer_action_space, personality_traits=sample_retailer_traits)

In [95]:
historical_data

[Ellipsis]