In [14]:
import pandas as pd
import numpy as np
from collections import defaultdict
from gym import Env, spaces
import random
import itertools
import pickle

In [15]:
historical_data_wholesale = pd.read_csv('../Notebooks/experiment/curr_data.csv')
historical_data_buyback = pd.read_csv('../Notebooks/experiment/curr_data_bb.csv')
historical_data_revenue_sharing = pd.read_csv('../Notebooks/experiment/curr_data_rs.csv')
behavioral_data = pd.read_csv('../adjusted_reponse_survey.csv')

In [16]:
behavioral_data.columns = behavioral_data.columns.str.strip()
historical_data_wholesale.columns = historical_data_wholesale.columns.str.strip()
historical_data_buyback.columns = historical_data_buyback.columns.str.strip()
historical_data_revenue_sharing.columns = historical_data_revenue_sharing.columns.str.strip()

In [17]:
historical_data_wholesale.columns = historical_data_wholesale.columns.str.replace(' ', '_')
historical_data_buyback.columns = historical_data_buyback.columns.str.replace(' ', '_')
historical_data_revenue_sharing.columns = historical_data_revenue_sharing.columns.str.replace(' ', '_')

In [18]:
# Function to categorize retailer behavior based on stock and optimal stock
def categorize_behavior(row):
    """
    Categorizes retailer behavior as understocking, optimal, or overstocking.

    Args:
        row: A row from the historical data DataFrame.

    Returns:
        An integer representing the behavioral category:
        0: Understocking (Stock < Optimal * 0.8)
        1: Optimal Stocking (Optimal * 0.8 <= Stock <= Optimal * 1.2)
        2: Overstocking (Stock > Optimal * 1.2)
    """
    stock = row['Stock']
    optimal_stock = row["Optimal_Stock"]

    try:
        optimal_stock = float(optimal_stock)  # Try converting to float
    except ValueError:
        optimal_stock = 0  # Set to 0 if conversion fails

    if stock < optimal_stock * 0.8:
        return 0  # Understocking
    
    elif stock <= optimal_stock * 1.2:
        return 1  # Optimal Stocking
    
    else:
        return 2  # Overstocking

# Apply categorization to each dataset
historical_data_wholesale['Behavioral_Category'] = historical_data_wholesale.apply(categorize_behavior, axis=1)
historical_data_buyback['Behavioral_Category'] = historical_data_buyback.apply(categorize_behavior, axis=1)
historical_data_revenue_sharing['Behavioral_Category'] = historical_data_revenue_sharing.apply(categorize_behavior, axis=1)

# 2. Remove Extra 'Behavioral_Category' Columns
def remove_extra_columns(df, column_name):
    """Removes extra columns with the given name, keeping only the last one."""
    cols = df.columns.tolist()
    count = cols.count(column_name)
    if count > 1:
        indices = [i for i, x in enumerate(cols) if x == column_name]
        for i in indices[:-1]:  # Remove all but the last one
            df.drop(df.columns[i], axis=1, inplace=True)

remove_extra_columns(historical_data_wholesale, "Behavioral_Category")
remove_extra_columns(historical_data_buyback, "Behavioral_Category")
remove_extra_columns(historical_data_revenue_sharing, "Behavioral_Category")

# Save the updated data
historical_data_wholesale.to_csv('../Notebooks/experiment/curr_data.csv', index=False)
historical_data_buyback.to_csv('../Notebooks/experiment/curr_data_bb.csv', index=False)
historical_data_revenue_sharing.to_csv('../Notebooks/experiment/curr_data_rs.csv', index=False)

In [19]:
# Extract Traits
def extract_traits(behavioral_data):
    # Ensure that column names match exactly
    # Also note: If your columns in behavioral_data start with 'Manufacturer_' or 'Retailer_',
    # you should rename them consistently. If you don't need to strip the prefixes completely,
    # consider using them as is. Adjust as necessary.
    manufacturers = behavioral_data[[
        'Manufacturer_Self Esteem Average',
        'Manufacturer_Regret Scale Average',
        'Manufacturer_Risk Averse Coefficient',
        'Manufacturer_Fairness Index',
        'PLAYER NAME'
    ]]

    retailers = behavioral_data[[
        'Retailer_Self Esteem Average',
        'Retailer_Regret Scale Average',
        'Retailer_Risk Averse Coefficient',
        'Retailer_Fairness Index',
        'PLAYER NAME'
    ]]

    manufacturer_traits = {}
    for _, row in manufacturers.iterrows():
        # Here 'PLAYER NAME' should be a single, hashable value (string or int)
        pid = row['PLAYER NAME'].iloc[0]
        manufacturer_traits[pid] = {
            'Self Esteem Average': row['Manufacturer_Self Esteem Average'],
            'Regret Scale Average': row['Manufacturer_Regret Scale Average'],
            'Risk Averse Coefficient': row['Manufacturer_Risk Averse Coefficient'],
            'Fairness Index': row['Manufacturer_Fairness Index']
        }

    retailer_traits = {}
    for _, row in retailers.iterrows():
        pid = row['PLAYER NAME'].iloc[1]
        retailer_traits[pid] = {
            'Self Esteem Average': row['Retailer_Self Esteem Average'],
            'Regret Scale Average': row['Retailer_Regret Scale Average'],
            'Risk Averse Coefficient': row['Retailer_Risk Averse Coefficient'],
            'Fairness Index': row['Retailer_Fairness Index']
        }

    return manufacturer_traits, retailer_traits

manufacturer_traits_dict, retailer_traits_dict = extract_traits(behavioral_data)

In [20]:
potential_demand = np.arange(50, 151)  # Possible demand values from 50 to 150
stock_list = np.arange(50, 151)       # Possible stock values from 50 to 150
expected_sales = {}                   # Dictionary to store expected sales


for stock in stock_list:
    cumulative_expected_sales = 0
    for demand_realization in potential_demand:
        sales = min(demand_realization, stock)
        cumulative_expected_sales += sales

    # Calculate the average sales and store it in the dictionary
    expected_sales[stock] = cumulative_expected_sales / len(potential_demand)

In [21]:
class SupplyChainEnv(Env):
    def __init__(self, contract_type="wholesale", expected_sales=None):
        super(SupplyChainEnv, self).__init__()
        self.contract_type = contract_type
        self.max_stock = 150
        self.min_w = 3
        self.max_price = 12
        self.max_rounds = 40
        self.current_round = 0
        self.demand = 0
        self.expected_sales = expected_sales

        if self.contract_type == "wholesale":
            self.manufacturer_action_space = spaces.Discrete(
                self.max_price - self.min_w + 1
            )
            self.retailer_action_space = spaces.Discrete(3)
        elif self.contract_type == "buyback":
            self.manufacturer_action_space = spaces.MultiDiscrete(
                [self.max_price - self.min_w + 1, self.max_price + 1]
            )
            self.retailer_action_space = spaces.Discrete(3)
        elif self.contract_type == "revenue-sharing":
            self.manufacturer_action_space = spaces.MultiDiscrete(
                [self.max_price - self.min_w + 1, self.max_price + 1]
            )
            self.retailer_action_space = spaces.Discrete(3)
        else:
            raise ValueError("Invalid contract type.")

        self.observation_space = spaces.Box(
            low=0, high=150, shape=(3,), dtype=np.float32
        )
        self.reset()

    def reset(self):
        self.state = np.array([0, 0, 0])
        self.current_round = 0
        self.demand = 0
        return self.state

    def manufacturer_step(self, action):
        if self.contract_type == "wholesale":
            w = action + self.min_w
            b = 0
            r = 0
        elif self.contract_type == "buyback":
            w = action[0] + self.min_w
            b = action[1]
            r = 0
        elif self.contract_type == "revenue-sharing":
            w = action[0] + self.min_w
            r = action[1]
            b = 0

        self.state = np.array([w, b, r])
        return self.state

    def get_optimal_stock(self):
        w, b, r = self.state
        if self.contract_type == "wholesale":
            optimal_stock = 100 * ((12 - w) / 12) + 50
        elif self.contract_type == "buyback":
            if b == 12:
                optimal_stock = 150
            else:
                optimal_stock = 100 * ((12 - w) / (12 - b)) + 50
        elif self.contract_type == "revenue-sharing":
            if r == 12:
                optimal_stock = 0
            else:
                optimal_stock = 100 * ((12 - w - r) / (12 - r)) + 50
        else:
            optimal_stock = 100
        return optimal_stock

    def retailer_step(self, action):
        w, b, r = self.state

        if self.contract_type == "wholesale":
            optimal_stock = 100 * ((12 - w) / 12) + 50
        elif self.contract_type == "buyback":
            if b == 12:
                optimal_stock = 150
            else:
                optimal_stock = 100 * ((12 - w) / (12 - b)) + 50
        elif self.contract_type == "revenue-sharing":
            if r == 12:
                optimal_stock = 0
            else:
                optimal_stock = 100 * ((12 - w - r) / (12 - r)) + 50
        else:
            optimal_stock = 100

        if action == 0:
            Q = optimal_stock * 0.8
        elif action == 1:
            Q = optimal_stock
        elif action == 2:
            Q = optimal_stock * 1.2
        else:
            Q = optimal_stock

        Q = int(round(Q))
        Q = max(0, min(Q, self.max_stock))

        sales = self.expected_sales.get(Q, 0)
        leftovers = Q - sales
        c = 3
        p = 12

        if self.contract_type == "wholesale":
            retailer_profit = p * sales - w * Q
            manufacturer_profit = (w - c) * Q
        elif self.contract_type == "buyback":
            if b > w:
                b = w
            retailer_profit = p * sales - w * Q + b * leftovers
            manufacturer_profit = (w - c) * Q - b * leftovers
        elif self.contract_type == "revenue-sharing":
            max_revenue_share = p - w
            if r > max_revenue_share:
                r = max_revenue_share
            retailer_profit = (p - r) * sales - w * Q
            manufacturer_profit = (w - c) * Q + r * sales

        self.current_round += 1
        done = self.current_round >= self.max_rounds
        return self.state, (manufacturer_profit, retailer_profit), done, {}

In [22]:
# Q-Learning Agent without fixed personality, but using opponent traits
class QLearningAgent:
    def __init__(
        self,
        action_space,
        learning_rate=0.1,
        discount_factor=0.99,
        epsilon=1.0,
        epsilon_decay=0.995,
        min_epsilon=0.01,
    ):
        self.action_space = action_space
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.epsilon = epsilon
        self.initial_epsilon = epsilon
        self.epsilon_decay = epsilon_decay
        self.min_epsilon = min_epsilon
        self.q_table = defaultdict(float)

        # When playing against random humans (unknown traits), we will estimate them.
        # Start with some default (neutral) trait estimates:
        self.estimated_opponent_traits = {
            "Risk Averse Coefficient": 0.5,
            "Fairness Index": 0.5,
            "Regret Scale Average": 0.0,
            "Self Esteem Average": 0.0,
        }

        self.human_actions = []
        self.opponent_action_history = []
        self.behavior_change_threshold = 2

    def _state_to_key(self, state, opponent_traits):
        # Incorporate opponent traits into the Q-state key
        # Sort keys for consistency
        trait_items = tuple(sorted(opponent_traits.items()))
        return tuple(state) + trait_items

    def get_possible_actions(self):
        if isinstance(self.action_space, spaces.Discrete):
            return range(self.action_space.n)
        elif isinstance(self.action_space, spaces.MultiDiscrete):
            ranges = [range(n) for n in self.action_space.nvec]
            return list(itertools.product(*ranges))

    def get_action(self, state, opponent_traits=None):
        if opponent_traits is None:
            opponent_traits = self.estimated_opponent_traits

        # Adjust epsilon using opponent's traits:
        # Example: If opponent is very risk-averse, we exploit more (lower epsilon).
        # If Risk Averse Coefficient > 0.5, reduce epsilon more aggressively.
        risk_aversion = opponent_traits.get("Risk Averse Coefficient", 0.5)
        self.epsilon = max(
            self.min_epsilon,
            self.epsilon * self.epsilon_decay * (1.0 - (risk_aversion - 0.5))
        )

        if np.random.rand() < self.epsilon:
            return self.action_space.sample() if not isinstance(self.action_space, spaces.MultiDiscrete) else tuple(self.action_space.sample())
        else:
            state_key = self._state_to_key(state, opponent_traits)
            possible_actions = self.get_possible_actions()
            q_values = [self.q_table[(state_key, a)] for a in possible_actions]
            max_q = max(q_values)
            max_actions = [a for a, q in zip(possible_actions, q_values) if q == max_q]
            return random.choice(max_actions)

    def update_q_table(self, state, action, reward, next_state, current_opponent_traits, next_opponent_traits):
        state_key = self._state_to_key(state, current_opponent_traits)
        next_state_key = self._state_to_key(next_state, next_opponent_traits)
        possible_actions = self.get_possible_actions()
        max_next_q = max([self.q_table[(next_state_key, a)] for a in possible_actions], default=0)
        # No reward shaping by traits, just regular Q-update
        self.q_table[(state_key, action)] += self.learning_rate * (
            reward + self.discount_factor * max_next_q - self.q_table[(state_key, action)]
        )

    def update_opponent_model(self, opponent_action):
        self.human_actions.append(opponent_action)
        # Example heuristic: If variance of actions is low, opponent is more risk-averse
        if len(self.human_actions) > 10:
            recent_actions = self.human_actions[-10:]
            action_variance = np.var(recent_actions)
            max_variance = ((self.action_space.n - 1) ** 2 / 12 if isinstance(self.action_space, spaces.Discrete) else 1)
            # Adjust estimated opponent risk aversion based on action variance
            new_risk = max(0, min(1, 1 - action_variance / max_variance))
            self.estimated_opponent_traits["Risk Averse Coefficient"] = new_risk

    def save_agent(self, filename):
        agent_data = {
            "q_table": self.q_table,
            "learning_rate": self.learning_rate,
            "discount_factor": self.discount_factor,
            "epsilon": self.epsilon,
            "initial_epsilon": self.initial_epsilon,
            "epsilon_decay": self.epsilon_decay,
            "min_epsilon": self.min_epsilon,
        }
        with open(filename, "wb") as f:
            pickle.dump(agent_data, f)

    def load_agent(self, filename):
        with open(filename, "rb") as f:
            agent_data = pickle.load(f)
        self.q_table = agent_data["q_table"]
        self.learning_rate = agent_data["learning_rate"]
        self.discount_factor = agent_data["discount_factor"]
        self.epsilon = agent_data["epsilon"]
        self.initial_epsilon = agent_data["initial_epsilon"]
        self.epsilon_decay = agent_data["epsilon_decay"]
        self.min_epsilon = agent_data["min_epsilon"]


In [23]:
def preprocess_historical_data(df, contract_type, manufacturer_traits_dict, retailer_traits_dict):
    # We assume df has 'Manufacturer_ID' and 'Retailer_ID' columns to match traits
    data = []
    num_rows = len(df)
    rows_per_game = 40
    for start_row in range(0, num_rows, rows_per_game):
        game_data = df.iloc[start_row : start_row + rows_per_game]
        for _, row in game_data.iterrows():
            wholesale_price = float(row["Wholesale_p."])
            buyback_price = float(row["Buyback_p."]) if "Buyback_p." in row and not pd.isnull(row["Buyback_p."]) else 0.0
            revenue_share = float(row["Revenue_Share"]) if "Revenue_Share" in row and not pd.isnull(row["Revenue_Share"]) else 0.0

            state = [wholesale_price, buyback_price, revenue_share]

            if contract_type == "wholesale":
                manufacturer_action = wholesale_price - 3
            elif contract_type == "buyback":
                manufacturer_action = (wholesale_price - 3, buyback_price)
            elif contract_type == "revenue-sharing":
                manufacturer_action = (wholesale_price - 3, revenue_share)
            else:
                manufacturer_action = (wholesale_price - 3, 0.0)

            retailer_action = int(row["Behavioral_Category"])

            try:
                manufacturer_reward = float(row["Realized_Mfg_Profit"])
            except ValueError:
                manufacturer_reward = 0
            try:
                retailer_reward = float(row["Realized_Retailer_Profit"])
            except ValueError:
                retailer_reward = 0

            next_state = [wholesale_price, buyback_price, revenue_share]

            # Extract player IDs
            mfg_id = row["Manufacturer_ID"] if "Manufacturer_ID" in row else None
            ret_id = row["Retailer_ID"] if "Retailer_ID" in row else None

            # Default neutral traits if not found
            default_traits = {
                'Self Esteem Average': 0.0,
                'Regret Scale Average': 0.0,
                'Risk Averse Coefficient': 0.5,
                'Fairness Index': 0.5
            }

            # Identify which side is the "opponent"
            # If we are training a manufacturer agent, the opponent is retailer, and vice versa.
            # Let's store both sets for flexibility:
            mfg_traits = manufacturer_traits_dict.get(mfg_id, default_traits)
            ret_traits = retailer_traits_dict.get(ret_id, default_traits)

            data.append(
                (
                    state,
                    manufacturer_action,
                    retailer_action,
                    (manufacturer_reward, retailer_reward),
                    next_state,
                    mfg_traits,
                    ret_traits
                )
            )
    return data

In [24]:
historical_data_wholesale_processed = preprocess_historical_data(historical_data_wholesale, "wholesale", manufacturer_traits_dict, retailer_traits_dict)
historical_data_buyback_processed = preprocess_historical_data(historical_data_buyback, "buyback", manufacturer_traits_dict, retailer_traits_dict)
historical_data_rs_processed = preprocess_historical_data(historical_data_revenue_sharing, "revenue-sharing", manufacturer_traits_dict, retailer_traits_dict)
historical_data = historical_data_wholesale_processed + historical_data_buyback_processed + historical_data_rs_processed

# Create environments
env_wholesale = SupplyChainEnv(contract_type="wholesale", expected_sales=expected_sales)
env_buyback = SupplyChainEnv(contract_type="buyback", expected_sales=expected_sales)
env_revenue_sharing = SupplyChainEnv(contract_type="revenue-sharing", expected_sales=expected_sales)

In [25]:
# # Initialize 6 agents:
# # For simplicity, we train:
# # - manufacturer and retailer agents for each of the three contract types.
# manufacturer_agent_wholesale = QLearningAgent(env_wholesale.manufacturer_action_space)
# retailer_agent_wholesale = QLearningAgent(env_wholesale.retailer_action_space)
# manufacturer_agent_buyback = QLearningAgent(env_buyback.manufacturer_action_space)
# retailer_agent_buyback = QLearningAgent(env_buyback.retailer_action_space)
# manufacturer_agent_revenue_sharing = QLearningAgent(env_revenue_sharing.manufacturer_action_space)
# retailer_agent_revenue_sharing = QLearningAgent(env_revenue_sharing.retailer_action_space)

# num_episodes = 1000000

# # Training logic:
# # We need to separate training sets for each scenario and train each pair of agents on their historical data.
# # Let's assume historical_data_* for each scenario is used respectively to train those scenario's agents.

# def train_agents(env, manufacturer_agent, retailer_agent, historical_data, contract_type, num_episodes):
#     for episode in range(num_episodes):
#         # Each episode: we go through historical_data randomly or sequentially
#         # For simplicity, we just go through a random sample of 40 steps (one game)
#         # If historical_data is large, you may sample from it.
#         start_idx = random.randint(0, len(historical_data) - 40)
#         game_data = historical_data[start_idx:start_idx+40]
#         env.reset()
#         done = False
#         for i, (state, mfg_action, ret_action, (mfg_reward, ret_reward), next_state, mfg_traits, ret_traits) in enumerate(game_data):
#             # Determine who is agent and who is opponent:
#             # If we are training the manufacturer_agent, then the retailer is the opponent (ret_traits)
#             # If we are training the retailer_agent, then the manufacturer is the opponent (mfg_traits)

#             # Manufacturer update:
#             # State is known, manufacturer_action chosen historically = mfg_action
#             # Opponent traits for manufacturer agent: ret_traits (opponent is retailer)
#             manufacturer_agent.update_q_table(
#                 state,
#                 mfg_action,
#                 mfg_reward,
#                 next_state,
#                 current_opponent_traits=ret_traits,
#                 next_opponent_traits=ret_traits
#             )

#             # Retailer update:
#             # Opponent traits for retailer agent: mfg_traits (opponent is manufacturer)
#             retailer_agent.update_q_table(
#                 state,
#                 ret_action,
#                 ret_reward,
#                 next_state,
#                 current_opponent_traits=mfg_traits,
#                 next_opponent_traits=mfg_traits
#             )

#         if (episode+1) % 100 == 0:
#             print(f"{contract_type.capitalize()} - Episode {episode+1} completed.")


# hist_wholesale = historical_data_wholesale_processed
# hist_buyback = historical_data_buyback_processed
# hist_revenue_sharing = historical_data_rs_processed

# train_agents(env_wholesale, manufacturer_agent_wholesale, retailer_agent_wholesale, hist_wholesale, "wholesale", num_episodes)
# train_agents(env_buyback, manufacturer_agent_buyback, retailer_agent_buyback, hist_buyback, "buyback", num_episodes)
# train_agents(env_revenue_sharing, manufacturer_agent_revenue_sharing, retailer_agent_revenue_sharing, hist_revenue_sharing, "revenue-sharing", num_episodes)

# # Save trained agents
# manufacturer_agent_wholesale.save_agent("manufacturer_agent_wholesale.pkl")
# retailer_agent_wholesale.save_agent("retailer_agent_wholesale.pkl")
# manufacturer_agent_buyback.save_agent("manufacturer_agent_buyback.pkl")
# retailer_agent_buyback.save_agent("retailer_agent_buyback.pkl")
# manufacturer_agent_revenue_sharing.save_agent("manufacturer_agent_revenue_sharing.pkl")
# retailer_agent_revenue_sharing.save_agent("retailer_agent_revenue_sharing.pkl")

In [None]:
import sys
import random
import PySimpleGUI as sg

def check_escape(value):
    if value.strip().lower() == "escape":
        sg.popup("Exiting game.")
        sys.exit()

# Load trained agents (adjust as needed)
manufacturer_agent_wholesale = QLearningAgent(env_wholesale.manufacturer_action_space)
retailer_agent_wholesale = QLearningAgent(env_wholesale.retailer_action_space)
manufacturer_agent_buyback = QLearningAgent(env_buyback.manufacturer_action_space)
retailer_agent_buyback = QLearningAgent(env_buyback.retailer_action_space)
manufacturer_agent_revenue_sharing = QLearningAgent(env_revenue_sharing.manufacturer_action_space)
retailer_agent_revenue_sharing = QLearningAgent(env_revenue_sharing.retailer_action_space)

manufacturer_agent_wholesale.load_agent("manufacturer_agent_wholesale.pkl")
retailer_agent_wholesale.load_agent("retailer_agent_wholesale.pkl")
manufacturer_agent_buyback.load_agent("manufacturer_agent_buyback.pkl")
retailer_agent_buyback.load_agent("retailer_agent_buyback.pkl")
manufacturer_agent_revenue_sharing.load_agent("manufacturer_agent_revenue_sharing.pkl")
retailer_agent_revenue_sharing.load_agent("retailer_agent_revenue_sharing.pkl")

# Default traits
human_traits_estimate = {
    'Self Esteem Average': 0.0,
    'Regret Scale Average': 0.0,
    'Risk Averse Coefficient': 0.5,
    'Fairness Index': 0.5
}

layout = [
    [sg.Text("Choose a contract type:")],
    [sg.Combo(["wholesale", "buyback", "revenue-sharing"], default_value="wholesale", key="-CONTRACT-")],
    [sg.Text("Do you want to be the manufacturer or the retailer?")],
    [sg.Combo(["manufacturer", "retailer"], default_value="manufacturer", key="-ROLE-")],
    [sg.Button("Start Game"), sg.Button("Exit")]
]

window = sg.Window("Supply Chain Game", layout)

contract_type = None
role = None
env = None
manufacturer_agent = None
retailer_agent = None
game_started = False
round_num = 0
done = False
total_human_profit = 0
total_agent_profit = 0
state = None

def update_round_and_profits(window, round_num, human_profit, agent_profit):
    window["-ROUND-"].update(f"Round: {round_num}")
    window["-PROFITS-"].update(f"Your Total Profit: {human_profit} | Agent's Total Profit: {agent_profit}")

while True:
    event, values = window.read()
    if event in (sg.WIN_CLOSED, "Exit"):
        break

    if event == "Start Game" and not game_started:
        contract_type = values["-CONTRACT-"].lower()
        role = values["-ROLE-"].lower()

        if contract_type == "wholesale":
            env = env_wholesale
            manufacturer_agent = manufacturer_agent_wholesale
            retailer_agent = retailer_agent_wholesale
        elif contract_type == "buyback":
            env = env_buyback
            manufacturer_agent = manufacturer_agent_buyback
            retailer_agent = retailer_agent_buyback
        elif contract_type == "revenue-sharing":
            env = env_revenue_sharing
            manufacturer_agent = manufacturer_agent_revenue_sharing
            retailer_agent = retailer_agent_revenue_sharing
        else:
            sg.popup("Invalid contract type.")
            continue

        state = env.reset()
        done = False
        total_human_profit = 0
        total_agent_profit = 0
        round_num = 1
        game_started = True

        window.close()

        layout_game = [
            [sg.Text(size=(25,1), key="-ROUND-", font=("Helvetica", 14, "bold")),
             sg.Text(size=(50,1), key="-PROFITS-", font=("Helvetica", 14, "bold"))],
            [sg.Multiline(size=(80, 20), key="-OUTPUT-", disabled=True, autoscroll=True, font=('Courier', 10))],
            [sg.Input(key="-INPUT-", size=(20,1)), sg.Button("Submit"), sg.Button("Exit")]
        ]

        window = sg.Window("Supply Chain Game", layout_game, finalize=True)
        window.metadata = {"current_step": None}
        update_round_and_profits(window, round_num, total_human_profit, total_agent_profit)

        def gui_print(text):
            current = window["-OUTPUT-"].get()
            window["-OUTPUT-"].update(current + text + "\n")

        gui_print(f"--- Round {round_num} ---")

        if role == "manufacturer":
            window.metadata["current_step"] = "ask_wholesale"
            gui_print("Enter wholesale price:")
        else:
            # Human is retailer, do manufacturer move and show their decisions
            manufacturer_action = manufacturer_agent.get_action(state, opponent_traits=human_traits_estimate)
            state = env.manufacturer_step(manufacturer_action)
            w, b, r = state
            if contract_type == "wholesale":
                gui_print(f"Manufacturer's wholesale price: {w}")
            elif contract_type == "buyback":
                gui_print(f"Manufacturer's wholesale price: {w}, Buyback price: {b}")
            elif contract_type == "revenue-sharing":
                gui_print(f"Manufacturer's wholesale price: {w}, Revenue share: {r}")

            window.metadata["current_step"] = "ask_stock"
            gui_print(f"Enter retailer stock (0 to {env.max_stock}):")

        continue

    if game_started:
        if event in (sg.WIN_CLOSED, "Exit"):
            break

        if done:
            continue

        if event == "Submit":
            user_input = values["-INPUT-"]
            check_escape(user_input)
            window["-INPUT-"].update("")

            def gui_print(text):
                window["-OUTPUT-"].update(text + "\n", append=True)

            current_step = window.metadata["current_step"]

            if role == "manufacturer":
                # Manufacturer logic
                if current_step == "ask_wholesale":
                    try:
                        w = int(user_input)
                        if not (env.min_w <= w <= env.max_price):
                            gui_print("Invalid wholesale price. Try again:")
                        else:
                            if contract_type == "wholesale":
                                human_action = w - env.min_w
                                state = env.manufacturer_step(human_action)
                                agent_action = retailer_agent.get_action(state, opponent_traits=human_traits_estimate)
                                optimal_stock = env.get_optimal_stock()
                                agent_stock_choice = int(round(
                                    optimal_stock * (0.8 if agent_action == 0 else (1.2 if agent_action == 2 else 1.0))
                                ))

                                next_state, rewards, done, _ = env.retailer_step(agent_action)
                                env.demand = random.randint(50, 150)
                                w_used = w
                                human_profit = (w_used - 3) * min(agent_stock_choice, env.demand)
                                agent_profit = 12 * min(agent_stock_choice, env.demand) - w_used * agent_stock_choice

                                total_human_profit += human_profit
                                total_agent_profit += agent_profit
                                retailer_agent.update_opponent_model(human_action)

                                gui_print(f"  Demand this round: {env.demand}")
                                gui_print(f"  Your profit this round: {human_profit}")
                                gui_print(f"  Agent's profit this round: {agent_profit}")

                                state = next_state
                                round_num += 1
                                if round_num > env.max_rounds:
                                    done = True
                                    gui_print("Game Over!")
                                    gui_print(f"Final Score - You: {total_human_profit}, Agent: {total_agent_profit}")
                                else:
                                    gui_print(f"--- Round {round_num} ---")
                                    window.metadata["current_step"] = "ask_wholesale"
                                    gui_print("Enter wholesale price:")

                                update_round_and_profits(window, round_num, total_human_profit, total_agent_profit)

                            elif contract_type == "buyback":
                                window.metadata["wholesale_price"] = w
                                window.metadata["current_step"] = "ask_buyback"
                                gui_print("Enter buyback price:")
                            elif contract_type == "revenue-sharing":
                                window.metadata["wholesale_price"] = w
                                window.metadata["current_step"] = "ask_revenue"
                                gui_print("Enter revenue share:")
                    except ValueError:
                        gui_print("Invalid input. Please enter an integer for wholesale price:")

                elif current_step == "ask_buyback":
                    w = window.metadata["wholesale_price"]
                    try:
                        b = int(user_input)
                        if not (0 <= b <= env.max_price):
                            gui_print("Invalid buyback price. Try again:")
                        else:
                            human_action = (w - env.min_w, b)
                            if (0 <= human_action[0] <= env.manufacturer_action_space.nvec[0]-1 and
                                0 <= human_action[1] <= env.manufacturer_action_space.nvec[1]-1):

                                state = env.manufacturer_step(human_action)
                                agent_action = retailer_agent.get_action(state, opponent_traits=human_traits_estimate)
                                optimal_stock = env.get_optimal_stock()
                                agent_stock_choice = int(round(
                                    optimal_stock * (0.8 if agent_action == 0 else (1.2 if agent_action == 2 else 1.0))
                                ))

                                next_state, rewards, done, _ = env.retailer_step(agent_action)
                                env.demand = random.randint(50, 150)
                                w_used = w
                                if b > w_used:
                                    b = w_used
                                human_profit = (w_used - 3)*min(agent_stock_choice, env.demand) - b*(agent_stock_choice - min(agent_stock_choice, env.demand))
                                agent_profit = 12*min(agent_stock_choice, env.demand) - w_used*agent_stock_choice + b*(agent_stock_choice - min(agent_stock_choice, env.demand))

                                total_human_profit += human_profit
                                total_agent_profit += agent_profit
                                retailer_agent.update_opponent_model(human_action)

                                gui_print(f"  Demand this round: {env.demand}")
                                gui_print(f"  Your profit this round: {human_profit}")
                                gui_print(f"  Agent's profit this round: {agent_profit}")

                                state = next_state
                                round_num += 1
                                if round_num > env.max_rounds:
                                    done = True
                                    gui_print("Game Over!")
                                    gui_print(f"Final Score - You: {total_human_profit}, Agent: {total_agent_profit}")
                                else:
                                    gui_print(f"--- Round {round_num} ---")
                                    window.metadata["current_step"] = "ask_wholesale"
                                    gui_print("Enter wholesale price:")

                                update_round_and_profits(window, round_num, total_human_profit, total_agent_profit)
                            else:
                                gui_print("Invalid buyback price. Try again:")
                    except ValueError:
                        gui_print("Invalid input. Please enter an integer for buyback price:")

                elif current_step == "ask_revenue":
                    w = window.metadata["wholesale_price"]
                    try:
                        r = int(user_input)
                        if not (0 <= r <= env.max_price):
                            gui_print("Invalid revenue share. Try again:")
                        else:
                            human_action = (w - env.min_w, r)
                            if (0 <= human_action[0] <= env.manufacturer_action_space.nvec[0]-1 and
                                0 <= human_action[1] <= env.manufacturer_action_space.nvec[1]-1):

                                state = env.manufacturer_step(human_action)
                                agent_action = retailer_agent.get_action(state, opponent_traits=human_traits_estimate)
                                optimal_stock = env.get_optimal_stock()
                                agent_stock_choice = int(round(
                                    optimal_stock * (0.8 if agent_action == 0 else (1.2 if agent_action == 2 else 1.0))
                                ))

                                next_state, rewards, done, _ = env.retailer_step(agent_action)
                                env.demand = random.randint(50, 150)
                                w_used = w
                                max_revenue_share = 12 - w_used
                                if r > max_revenue_share:
                                    r = max_revenue_share
                                human_profit = (w_used - 3)*min(agent_stock_choice, env.demand) + r*min(agent_stock_choice, env.demand)
                                agent_profit = 12*min(agent_stock_choice, env.demand) - w_used*agent_stock_choice - r*min(agent_stock_choice, env.demand)

                                total_human_profit += human_profit
                                total_agent_profit += agent_profit
                                retailer_agent.update_opponent_model(human_action)

                                gui_print(f"  Demand this round: {env.demand}")
                                gui_print(f"  Your profit this round: {human_profit}")
                                gui_print(f"  Agent's profit this round: {agent_profit}")

                                state = next_state
                                round_num += 1
                                if round_num > env.max_rounds:
                                    done = True
                                    gui_print("Game Over!")
                                    gui_print(f"Final Score - You: {total_human_profit}, Agent: {total_agent_profit}")
                                else:
                                    gui_print(f"--- Round {round_num} ---")
                                    window.metadata["current_step"] = "ask_wholesale"
                                    gui_print("Enter wholesale price:")

                                update_round_and_profits(window, round_num, total_human_profit, total_agent_profit)
                            else:
                                gui_print("Invalid revenue share. Try again:")
                    except ValueError:
                        gui_print("Invalid input. Please enter an integer for revenue share:")

            else:
                # Retailer logic
                if current_step == "ask_stock":
                    try:
                        stock_choice = int(user_input)
                        if not (0 <= stock_choice <= env.max_stock):
                            gui_print("Invalid stock. Enter a number between 0 and 150:")
                        else:
                            w, b, r = state
                            optimal_stock = env.get_optimal_stock()
                            if stock_choice <= optimal_stock * 0.8:
                                human_action = 0
                            elif stock_choice <= optimal_stock * 1.2:
                                human_action = 1
                            else:
                                human_action = 2

                            next_state, rewards, done, _ = env.retailer_step(human_action)
                            env.demand = random.randint(50, 150)

                            if contract_type == "wholesale":
                                human_profit = 12*min(stock_choice, env.demand) - w*stock_choice
                                agent_profit = (w - 3)*min(stock_choice, env.demand)
                            elif contract_type == "buyback":
                                if b > w:
                                    b = w
                                human_profit = (12*min(stock_choice, env.demand)
                                                - w*stock_choice
                                                + b*(stock_choice - min(stock_choice, env.demand)))
                                agent_profit = ((w - 3)*min(stock_choice, env.demand)
                                                - b*(stock_choice - min(stock_choice, env.demand)))
                            elif contract_type == "revenue-sharing":
                                max_revenue_share = 12 - w
                                if r > max_revenue_share:
                                    r = max_revenue_share
                                human_profit = (12*min(stock_choice, env.demand)
                                                - w*stock_choice
                                                - r*(stock_choice - min(stock_choice, env.demand)))
                                agent_profit = ((w - 3)*min(stock_choice, env.demand)
                                                + r*min(stock_choice, env.demand))

                            total_human_profit += human_profit
                            total_agent_profit += agent_profit
                            manufacturer_agent.update_opponent_model(human_action)

                            gui_print(f"  Demand this round: {env.demand}")
                            gui_print(f"  Your profit this round: {human_profit}")
                            gui_print(f"  Agent's profit this round: {agent_profit}")

                            state = next_state
                            round_num += 1
                            if round_num > env.max_rounds:
                                done = True
                                gui_print("Game Over!")
                                gui_print(f"Final Score - You: {total_human_profit}, Agent: {total_agent_profit}")
                            else:
                                gui_print(f"--- Round {round_num} ---")
                                # Show manufacturer decisions again for next round
                                manufacturer_action = manufacturer_agent.get_action(state, opponent_traits=human_traits_estimate)
                                state = env.manufacturer_step(manufacturer_action)
                                w, b, r = state
                                if contract_type == "wholesale":
                                    gui_print(f"Manufacturer's wholesale price: {w}")
                                elif contract_type == "buyback":
                                    gui_print(f"Manufacturer's wholesale price: {w}, Buyback price: {b}")
                                elif contract_type == "revenue-sharing":
                                    gui_print(f"Manufacturer's wholesale price: {w}, Revenue share: {r}")

                                window.metadata["current_step"] = "ask_stock"
                                gui_print(f"Enter retailer stock (0 to {env.max_stock}):")

                            update_round_and_profits(window, round_num, total_human_profit, total_agent_profit)
                    except ValueError:
                        gui_print("Invalid input. Please enter an integer between 0 and 150:")

window.close()
