In [1]:
import numpy as np 
import matplotlib.pyplot as plt
import scipy

In [2]:
class NonstationaryAdvEnvironment:
    def __init__(self, T, n_users, n_competitors, auctions_per_day):
        np.random.seed(1)
        
        # Pricing Env
        self.t = 0 
        self.prices = np.linspace(0,1,100) # price discretization
        user_effect = np.random.normal(0,0.2,size = (n_users, T)) # user specific effect for every day
        
        prob_func = lambda p,t: scipy.special.expit(5*p*t/T) # design choice
        self.prob_history = np.vectorize(prob_func)(self.prices[:, np.newaxis], range(1,T+1))
        self.current_prob = np.zeros(n_users)
        
        
        # Auction Env
        self.auc_t = 0 
        self.n_auctions = auctions_per_day*T
        
        sin = np.sin(np.linspace(0,10, self.n_auctions)) # design choice
        self.bids_history = np.tile(sin, (n_competitors,1)) + np.random.normal(0,1,size=(n_competitors,self.n_auctions))
        self.bids_history = np.maximum(0,self.bids_history)
        
        self.current_reward = 0
        self.cumulative_reward = 0
        
    def get_prob(self, a_t):
        self.current_prob = self.prob_history[a_t, self.t] + + np.random.normal(0, 0.2, self.current_prob.shape)
        self.current_prob = np.maximum(0, np.minimum(1, self.current_prob))
        self.t += 1
        return self.current_prob
    
    def get_bid(self):
        bid = self.bids_history[:,self.auc_t]
        self.auc_t += 1
        return bid
    
    def next_round(self, slots_won, a_t):
        prob = self.current_prob 
        self.current_reward = slots_won * np.dot(prob, np.ones(prob.shape) * self.prices[a_t])
        self.cumulative_reward += self.current_reward
        pass

### Usage Example

In [3]:
auctions_per_day = 2
T = 10
n_users = 2
n_competitors = 3

env = NonstationaryAdvEnvironment(T, n_users, n_competitors, auctions_per_day)

i = 0
while i < T:
    # Assess price (ex: p = 0.5 fixed)
    arm = 50
    print(f'Day {i+1} started. Price = {0.5}.')
    prob = env.get_prob(arm)
    print(f'Current Purchase Probability for every user: {prob}.\n')
    
    for j in range(i*auctions_per_day, (i+1)*auctions_per_day):
        bid = env.get_bid()
        print(f'Auction {j+1} started. Competitors Bids: {bid}.')
        
        # Assess auction (ex: 1 slot won)
        slot_won = 1
        print(f'Number of AdvSlots won: {slot_won}.')
        
        env.next_round(slot_won, arm)
        print(f'Round Reward: {env.current_reward}.\n')
    
    print(f'Cumulative Reward at the end of day {i}: {env.cumulative_reward}.\n')
    print('------------------------------------------------------------------------')
    
    i += 1

Day 1 started. Price = 0.5.
Current Purchase Probability for every user: [0.51833233 0.52264634].

Auction 1 started. Competitors Bids: [0. 0. 0.].
Number of AdvSlots won: 1.
Round Reward: 0.5257468005458048.

Auction 2 started. Competitors Bids: [1.64707486 0.         1.75521931].
Number of AdvSlots won: 1.
Round Reward: 0.5257468005458048.

Cumulative Reward at the end of day 0: 1.0514936010916096.

------------------------------------------------------------------------
Day 2 started. Price = 0.5.
Current Purchase Probability for every user: [0.66095776 0.70565581].

Auction 3 started. Competitors Bids: [1.77032034 0.12157132 1.38165944].
Number of AdvSlots won: 1.
Round Reward: 0.6902088750736839.

Auction 4 started. Competitors Bids: [1.50246112 2.69242138 0.70187395].
Number of AdvSlots won: 1.
Round Reward: 0.6902088750736839.

Cumulative Reward at the end of day 1: 2.4319113512389774.

------------------------------------------------------------------------
Day 3 started. Price