In [None]:
import numpy as np
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
from collections import Counter
from scipy.stats import norm, beta, bernoulli

In [None]:
import sys
sys.path.append("C:/Users/clementine.rosier/OneDrive - Ekimetrics/Documents/GitHub/dynamic_pricing")

%load_ext autoreload
%autoreload 2

In [None]:
#simple simulations
from src.simulations import SimpleSimulation
from src.simulation_melange_gaussien import GMixSimulation
from src.simulation_uniforme import UniformSimulation
#simulations with context
from src.context_generator import ContextGenerator
from src.simulation_with_context import ContextualDemandSimulation
#bandit algo
from src.binomial_bandit import BinomialBandit
from src.simple_bandit_greedy import GreedyBandit
from src.simple_bandit_ucb import UCBBandit
from src.context_bandit import ContextBandit
#evaluation algo (regret)
from src.evaluate_model import EvaluateBandit
from src.evaluate_with_context import EvaluateBanditContext

In [None]:
def plot_demand_context(simulation,context_generator, min_price = 0, max_price = 150, n_points = 50, n_estimates_per_point = 100) : 
    prices = []
    revenues = []
    for price in np.linspace(min_price, max_price, n_points) : 
        context_c, context_d = context_generator.simulate()
        revenues.extend( [simulation.evaluate(price,context_c, context_d) * price for x in range(n_estimates_per_point)] )
        prices.extend([price for x in range(n_estimates_per_point)])
    
    a = np.array([prices, revenues])

    fig = plt.figure(figsize = (15,6), facecolor="w")
    ax = sns.lineplot(a[0], a[1])
    ax.legend("")

In [None]:
def test_bandit_thompson(simulation,k_p, alpha_0, beta_0,init_round=10,test_round=5000):
    hist = []
    regret = []
    reward_T = []
    parameters=np.array([np.array([alpha_0]),np.array([beta_0])])
    bandit = BinomialBandit(k_p, alpha_0, beta_0)
    evaluation=EvaluateBandit(bandit,simulation)
    print(evaluation.best_price)
    # Exploration round
    for i in range(init_round):
        for j in range(bandit.k):
            bandit.chose_action(force_action=j)
            price = k_p[bandit.action]
            reward = int(simulation.evaluate(price)) * price
            bandit.update(bandit.action, reward)
            regret.append(evaluation.get_regret(bandit.n_obs))
            parameters = np.append(parameters,np.array([np.array([bandit.alpha_n]),np.array([bandit.beta_n])]),axis=1)

    for i in range(test_round):
        bandit.chose_action(method="thompson")
        price = k_p[bandit.action]
        reward = int(simulation.evaluate(price)) * price
        bandit.update(bandit.action, reward)
        regret.append(evaluation.get_regret(bandit.n_obs))
        hist.append(k_p[bandit.action])
        parameters = np.append(parameters,np.array([np.array([bandit.alpha_n]),np.array([bandit.beta_n])]),axis=1)
    return hist,regret,parameters

In [None]:
def test_simplebandit_context(simulation,k_p, alpha_0, beta_0,init_round,test_round):
    hist = []
    regret = []
    reward_T = []
    parameters=np.array([np.array([alpha_0]),np.array([beta_0])])
    bandit = BinomialBandit(k_p, alpha_0, beta_0)
    evaluation=EvaluateBanditContext(bandit,simulation)
    # Exploration round
    for i in range(init_round):
        for j in range(bandit.k):
            context_c, context_d = context_generator.simulate()
            bandit.chose_action(force_action=j)
            price = k_p[bandit.action]
            reward = int(simulation.evaluate(price,context_c, context_d)) * price
            bandit.update(bandit.action, reward)
            regret.append(evaluation.get_regret(bandit.n_obs,bandit.action,context_c, context_d))
            parameters = np.append(parameters,np.array([np.array([bandit.alpha_n]),np.array([bandit.beta_n])]),axis=1)

    for i in range(test_round):
        context_c, context_d = context_generator.simulate()
        bandit.chose_action(method="thompson")
        price = k_p[bandit.action]
        reward = int(simulation.evaluate(price,context_c, context_d)) * price
        bandit.update(bandit.action, reward)
        regret.append(evaluation.get_regret(bandit.n_obs,bandit.action,context_c, context_d))
        hist.append(bandit.action)
        parameters = np.append(parameters,np.array([np.array([bandit.alpha_n]),np.array([bandit.beta_n])]),axis=1)
    return hist,regret,parameters

In [None]:
def test_contextualbandit_context(simulation,size_context,m_0,q_0,init_round,test_round):
    hist = []
    regret = []
    reward_T = []
    parameters=np.array([np.array([m_0]),np.array([q_0])])
    bandit = ContextBandit(k_p,size_context,m_0,q_0)
    evaluation=EvaluateBanditContext(bandit,simulation)
    # Exploration round
    for i in range(init_round):
        for j in range(bandit.k):
            context_c, context_d = context_generator.simulate()
            bandit.chose_action(force_action=j)
            price = k_p[bandit.action]
            reward = int(simulation.evaluate(price,context_c, context_d)) * price
            regret.append(evaluation.get_regret(bandit.n_obs,bandit.action,context_c, context_d))
            bandit.update(bandit.action, reward)
            parameters = np.append(parameters,np.array([np.array([bandit.m_n]),np.array([bandit.q_n])]),axis=1)

    for i in range(test_round):
        context_c, context_d = context_generator.simulate()
        bandit.chose_action(method="thompson")
        price = k_p[bandit.action]
        reward = int(simulation.evaluate(price,context_c, context_d)) * price
        regret.append(evaluation.get_regret(bandit.n_obs,bandit.action,context_c, context_d))
        bandit.update(bandit.action, reward)
        hist.append(bandit.action)
        parameters = np.append(parameters,np.array([np.array([bandit.m_n]),np.array([bandit.q_n])]),axis=1)
    return hist,regret,parameters

In [None]:
# simulations of context

In [None]:
beta_c=np.array([0])
mu_c=np.array([0])
sigma_c=np.array([1])
beta_d=np.array([[1,-1,7],[2,100,-1,8,-40]])
n=np.array([3,5])
mu_e=-200
sigma_e=15
#instantiate context simulation
context_generator_discret = ContextGenerator(mu_c,sigma_c,n)

#instantiate demand simulation
demand_simulation_discret = ContextualDemandSimulation(beta_c, beta_d, mu_e, sigma_e)

In [None]:
plot_demand_context(demand_simulation_discret,context_generator_discret)
plt.title('Simulation du revenu avec contexte (variables discrètes)')
plt.xlabel('prix')
plt.ylabel('revenu')
plt.savefig('demand_context_disc.png')

In [None]:
beta_c=np.array([2,-5,2])
mu_c=np.array([50,4,30])
sigma_c=np.array([1,4,20])
beta_d=np.array([[1]])
n=np.array([1])
mu_e=-200
sigma_e=15
#instantiate context simulation
context_generator_cont = ContextGenerator(mu_c,sigma_c,n)

#instantiate demand simulation
demand_simulation_cont = ContextualDemandSimulation(beta_c, beta_d, mu_e, sigma_e)

In [None]:
plot_demand_context(demand_simulation_cont,context_generator_cont)
plt.title('Simulation du revenu avec contexte (variables continues)')
plt.xlabel('prix')
plt.ylabel('revenu')
plt.savefig('demand_context_cont.png')

In [None]:
beta_c=np.array([2,-5,2])
mu_c=np.array([50,4,30])
sigma_c=np.array([1,4,20])
beta_d=np.array([[1,-1,7],[2,100,-1,8,-40]])
n=np.array([3,5])
mu_e=-200
sigma_e=15
#instantiate context simulation
context_generator_all = ContextGenerator(mu_c,sigma_c,n)

#instantiate demand simulation
demand_simulation_all = ContextualDemandSimulation(beta_c, beta_d, mu_e, sigma_e)

In [None]:
plot_demand_context(demand_simulation_all,context_generator_all)
plot_demand_context(demand_simulation_cont,context_generator_cont)
plt.title('Simulation du revenu avec contexte')
plt.xlabel('prix')
plt.ylabel('revenu')
plt.savefig('demand_context_all.png')

In [None]:
#test des algorithmes thompson sampling

In [None]:
k_p=

In [None]:
#instantiate for contextual bandit
norm_mean = 0
norm_std = 1
m_0 = np.zeros(shape=(len(k_p),size_context)) + norm_mean
q_0 = np.zeros(shape=(len(k_p),size_context)) + norm_std