In [1]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import scipy.stats
import numpy as np
import matplotlib.pyplot as plt

from environment import *
from algorithms.zhang import Zhang_algorithm
from algorithms.rcosd import RCOSD_algorithm

from tqdm.notebook import tqdm

In [2]:
nb_products = 1
T = 50_000
epsilon = 2

np.random.seed(10)
env = PerishableEnvironment(nb_products,3,T,epsilon)

In [3]:
yt = np.zeros(nb_products)
for t in range(1,11) :
    xt = env.get_state(t,yt)
    demand = env.demands[t]
    yt = 10*np.ones(1)
    sales = env.get_sales(t,yt)
    print("t={}, true state={},\n\txt={}, yt={}, demand={}, sales={}".format(t,env.state, xt,yt,demand,sales))

t=1, true state=[[0. 0.]],
	xt=[0.], yt=[10.], demand=[1.], sales=[1.]
t=2, true state=[[0. 9.]],
	xt=[9.], yt=[10.], demand=[3.], sales=[3.]
t=3, true state=[[6. 7.]],
	xt=[7.], yt=[10.], demand=[2.], sales=[2.]
t=4, true state=[[1. 4.]],
	xt=[4.], yt=[10.], demand=[0.], sales=[0.]
t=5, true state=[[3. 9.]],
	xt=[9.], yt=[10.], demand=[2.], sales=[2.]
t=6, true state=[[6. 7.]],
	xt=[7.], yt=[10.], demand=[4.], sales=[4.]
t=7, true state=[[1. 4.]],
	xt=[4.], yt=[10.], demand=[3.], sales=[3.]
t=8, true state=[[1. 7.]],
	xt=[7.], yt=[10.], demand=[2.], sales=[2.]
t=9, true state=[[5. 8.]],
	xt=[8.], yt=[10.], demand=[3.], sales=[3.]
t=10, true state=[[3. 5.]],
	xt=[5.], yt=[10.], demand=[4.], sales=[4.]


### Simulation of CUP

In [4]:
alg = Zhang_algorithm(nb_products,np.zeros(nb_products),10*np.ones(nb_products), G=1, gamma=1)

yt, gt, st = np.zeros(nb_products), np.zeros(nb_products), np.zeros(nb_products)
for t in range(1,11) :
    print("t={}".format(t))

    xt = env.get_state(t,yt)
    if((xt==0).all()) :
        print("\tEmpty system")

    new_yt = alg.next_decision(t,xt,gt,st)
    if((yt!=new_yt).any()) :
        print("\tChanging yt from {} to {}".format(yt,new_yt))
        yt = new_yt
    
    lt = env.get_loss(t,yt)
    gt = env.get_subgradient(t,yt)
    st = env.get_sales(t,yt)

t=1
	Empty system
t=2
	Empty system
	Changing yt from [0.] to [10.]
t=3
t=4
t=5
t=6
t=7
t=8
t=9
t=10


In [5]:
alg = RCOSD_algorithm(nb_products,np.zeros(nb_products),10*np.ones(nb_products), G=1, gamma=1)

yt, gt, st = 0.5*np.ones(nb_products), np.zeros(nb_products), np.zeros(nb_products)
for t in range(1,11) :
    print("t={}".format(t))

    xt = env.get_state(t,yt)
    yt = alg.next_decision(t,xt,gt,st)
    print("\tyt={}".format(yt))
    
    lt = env.get_loss(t,yt)
    gt = env.get_subgradient(t,yt)
    st = env.get_sales(t,yt)
    if((st>0).all()) :
        print("\tPositive sales occurs")

t=1
	yt=[5.]
	Positive sales occurs
t=2
xt=[4.], st=[1.]
[0.]
	yt=[0.]
t=3
	yt=[0.]
t=4
	yt=[0.]
t=5
	yt=[0.]
t=6
	yt=[0.]
t=7
	yt=[0.]
t=8
	yt=[0.]
t=9
	yt=[0.]
t=10
	yt=[0.]
