In [None]:
import numpy as np
import random
import matplotlib.pyplot as plt
import scipy.stats as stats

# Environment Setup

In [None]:
class NonStationaryStochasticPricingEnvironment:
    """
    Non-Stationary stochastic environment, with the distribution over customer valuations for a single product changing quickly over time.
    """
    def __init__(self, valuation_distributions, demand_noise_std=0.1):
        """
        Args:
            valuation_distributions: A list of different scipy.stats distributions representing customer valuations
            demand_noise_std: Standard deviation of noise in demand probability
            current_round: Variable that keep the count of the round been played.
        """
        self.valuation_dist = valuation_distributions
        self.noise_std = demand_noise_std
        self.current_round = 0


    def demand_probability(self, price):
        """
        Calculate the probability that a customer buys at given price.
        This is P(valuation >= price) with some noise.
        """
        # Retrieve the distribution associated with the current round
        current_dist = self.valuation_dist[self.current_round]

        # Base probability: customers buy if their valuation >= price
        base_prob = 1 - current_dist.cdf(price)
        
        # Add some noise to make it stochastic
        noise = np.random.normal(0, self.noise_std)
        
        prob = base_prob + noise
        # Ensure probability is in [0, 1]
        return np.clip(prob, 0, 1)
    

    def simulate_round(self, price):
        
        """
        Simulate one pricing round.
        Returns: (sale_made, revenue)
        """

        # Retrieve the distribution associated with the current round
        current_dist = self.valuation_dist[self.current_round]

        # Draw a random customer valuation from the distribution
        valuation = current_dist.rvs()
        
        # Customer purchases if their valuation >= price
        sale_made = 1 if valuation >= price else 0
        
        # Revenue is price if sale was made, 0 otherwise
        revenue = sale_made * price
        
        # Updating rounds' count
        self.current_round += 1

        return sale_made, revenue

Poi quando vado a definire l'env conf, definisco una funzione lambda per media e std. Creo un vettore di distribuzioni da passare all'environment.
Forse addirittura ha senso cambiare la distribuzione (e non farle solo normale), per creare un cambiamento più sharp.

# Agent

PRIMAL-DUAL PROBLEM -> SECONDO ME DOVREBBE ANDARE BENE IL COMBINATORIAL UCB VISTO L'HINT.
Domani ci lavoro meglio