In [13]:
import numpy as np
import random
import matplotlib.pyplot as plt
import scipy.stats as stats
from scipy import optimize

# Environment Setup

In [19]:

class ValuationMatchingEnv:
    def __init__(self, valuation_means, valuation_stds, prices, T, seed=None):
        self.means = np.array(valuation_means)
        self.stds = np.array(valuation_stds)
        self.prices = prices
        self.K = len(prices)
        self.n_products = len(valuation_means)
        self.T = T
        self.rng = np.random.default_rng(seed)

        # Pre-generate valuations for efficiency
        self.valuations = self.rng.normal(
            loc=self.means[:, None],
            scale=self.stds[:, None],
            size=(self.n_products, T)
        )
        self.t = 0

    def round(self, matching):
        """
        matching: list of (product_idx, price_idx)
        returns:
          - reward_list: [p_j if v_i >= p_j else 0]
          - purchases: list of booleans
          - actual_prices: list of prices
        """
        rewards = []
        purchases = []
        actual_prices = []

        for i, j in matching:
            val = self.valuations[i, self.t]
            price = self.prices[j]
            if price <= val:
                rewards.append(price)
                purchases.append(True)
            else:
                rewards.append(0.0)
                purchases.append(False)
            actual_prices.append(price)

        self.t += 1
        return rewards, purchases, actual_prices


# Agent Setup

In [None]:
class UCBMatchingAgentWithInventory:
    def __init__(self, n_products, prices, T, total_inventory, confidence_bound=2, seed=None):
        self.n_products = n_products # number of products
        self.prices = prices # list of prices for each product
        self.K = len(prices)  # number of price indices
        self.T = T # total number of time steps
        self.P = total_inventory # total inventory across all products
        self.rho = total_inventory / T # average inventory per time step
        self.remaining_inventory = total_inventory 
        self.confidence_bound = confidence_bound
        self.rng = np.random.default_rng(seed)

        self.N_pulls = np.zeros((n_products, self.K)) # number of pulls

        self.W_avg = np.zeros((n_products, self.K))  # avg reward
        self.C_avg = np.zeros((n_products, self.K))  # avg demand

        self.t = 0
        self.history = {'price_indices': [], 'actual_prices': [], 'rewards': [], 'purchases': [], 'inventory': []}

    def _get_ucb_lcb(self):
        bonus = self.confidence_bound * np.sqrt(2 * np.log(self.T) / np.maximum(1, self.N_pulls))
        W_ucb = self.W_avg + bonus
        C_lcb = np.maximum(0, self.C_avg - bonus)
        W_ucb[self.N_pulls == 0] = 10
        return W_ucb, C_lcb

    def _solve_distribution(self, W_ucb, C_lcb):
        n, k = self.n_products, self.K
        N = n * k
        c = -W_ucb.flatten()

        A_ub = [C_lcb.flatten()]
        b_ub = [self.rho]

        A_eq = np.zeros((n, N))
        for i in range(n):
            A_eq[i, i * k:(i + 1) * k] = 1
        b_eq = [1] * n

        bounds = [(0, 1) for _ in range(N)]

        res = linprog(c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, b_eq=b_eq, bounds=bounds, method="highs")
        if res.success:
            gamma = res.x.reshape((n, k))
        else:
            gamma = np.ones((n, k)) / k
        return gamma

    def pull_arm(self):
        if self.remaining_inventory <= 0:
            print("⚠️ Inventory exhausted")
            return [None] * self.n_products

        W_ucb, C_lcb = self._get_ucb_lcb()
        gamma = self._solve_distribution(W_ucb, C_lcb)

        # Costruisci matrice di pesi
        W_match = gamma * W_ucb

        rows, cols = linear_sum_assignment(W_match, maximize=True)
        return list(cols)

    def update(self, rewards, purchases, price_indices):
        if self.remaining_inventory <= 0:
            return

        for i in range(self.n_products):
            j = price_indices[i]
            if j is None:
                continue
            price = self.prices[j]
            purchased = purchases[i]
            reward = price if purchased else 0

            self.N_pulls[i, j] += 1
            self.W_avg[i, j] += (reward - self.W_avg[i, j]) / self.N_pulls[i, j]
            self.C_avg[i, j] += (purchased - self.C_avg[i, j]) / self.N_pulls[i, j]

        self.remaining_inventory = max(0, self.remaining_inventory - sum(purchases))

        actual_prices = [self.prices[j] if j is not None else np.nan for j in price_indices]
        self.history['price_indices'].append(price_indices)
        self.history['actual_prices'].append(actual_prices)
        self.history['rewards'].append(sum([a * b for a, b in zip(purchases, actual_prices)]))
        self.history['purchases'].append(purchases)
        self.history['inventory'].append(self.remaining_inventory)
        self.t += 1


# Theoretical analysis

In [16]:
def compute_theoretical_optimum(prices, valuation_means, valuation_stds, n_products):
    opt_prices = []
    expected_revenue = 0
    for mu, sigma in zip(valuation_means, valuation_stds):
        best_rev = 0
        best_price = prices[0]
        for p in prices:
            prob = 1 - norm.cdf(p, loc=mu, scale=sigma)
            rev = p * prob
            if rev > best_rev:
                best_rev = rev
                best_price = p
        opt_prices.append(best_price)
        expected_revenue += best_rev
    return opt_prices, expected_revenue



# Simulation


In [18]:
from scipy.stats import norm

# Settings
T = 1000
P = 1000
n_products = 3
prices = [1.0, 1.5, 2.0]
valuation_means = [1.2, 1.4, 1.6]
valuation_stds = [0.3, 0.3, 0.3]

# Init
env = MultiProductEnvironment(n_products, prices, valuation_means, valuation_stds, seed=42)
agent = CombinatorialUCBLikeAgent(prices, T, P, n_products)

for t in range(T):
    offered_prices = agent.select_prices()
    if np.isnan(offered_prices[0]):
        break
    valuations = env.simulate_customer()
    purchases, reward = env.simulate_purchase(valuations, offered_prices)
    agent.update(offered_prices, purchases, reward)

# Print summary
print("Total reward:", np.sum(agent.history['rewards']))
print("Remaining inventory:", agent.remaining_inventory)


⚠️ Inventory exhausted
Total reward: 1101.5
Remaining inventory: 0
