In [1]:
import numpy as np
import torch
from module_space import *

In [2]:
T = 1000
n_arms = 12 # N
n_features_default = 20 # d
n_assortment = 4 # K
n_samples = 10 # M, number of samples per each round and arm, for TS
noise_std = 1 # noise of reward: xi = noise_std*N(0,1)
### noise_std = 0.01 # noise of reward: xi = noise_std*N(0,1)

n_sim = 10 # number of simulations

def F(x): # round_reward_function
    if len(np.shape(x)) == 1: # if x is a vector
        return np.sum(x)
    else: # if x is a matrix
        return np.sum(x, axis=1)  
    
reg_factor = 1.0 # lambda
delta = 0.1 # delta
exploration_variance = 1 # nu^2, for TS
confidence_scaling_factor = 1 # gamma, for UCB

hidden_size_default = 60 # m
epochs = 100 # repeat training for each period
train_every = 10 # training period
training_window = 100
learning_rate = 0.01
### p = 0.2
p = 0.0 # no dropout

use_cuda = False

#if torch.cuda.is_available():
#    use_cuda = True
#else:
#    use_cuda = False



In [3]:
n_features = n_features_default
hidden_size = hidden_size_default

In [4]:
h1 = "h1"
h2 = "h2"
h3 = "h3"

## Hidden function
SEED = 1234
np.random.seed(SEED)

a = np.random.randn(n_features)
a /= np.linalg.norm(a, ord=2)

h_str = h3

if h_str == "h1":
    h = lambda x: 100*np.dot(x, a)
    ### h = lambda x: np.dot(x, a)
elif h_str == "h2":
    h = lambda x: 100*np.dot(x, a)**2
    ### h = lambda x: np.dot(x, a)**2
elif h_str == "h3":
    h = lambda x: 100*np.cos(np.pi*np.dot(x, a))
    ### h = lambda x: np.cos(np.pi*np.dot(x, a))

In [5]:
bandit = ContextualBandit(T,
                              n_arms,
                              n_features, 
                              h,
                              noise_std=noise_std,
                              n_assortment=n_assortment,
                              n_samples=10,
                              round_reward_function=F
                             )

In [6]:
model = Neural("TS",
               bandit,
               hidden_size=hidden_size,
               reg_factor=reg_factor,
               delta=delta,
               confidence_scaling_factor=confidence_scaling_factor,
               exploration_variance=exploration_variance,
               p=p,
               training_window=training_window,
               learning_rate=learning_rate,
               epochs=epochs,train_every=train_every,
               use_cuda=use_cuda
              )

In [7]:
model.update_sample_rewards()

In [8]:
np.shape(model.sample_rewards)

(1000, 12, 10)

In [9]:
np.shape(model.optimistic_sample_rewards)

(1000, 12)

In [10]:
model.mu_hat

array([[ 0.02626817, -0.00330963, -0.01576912, ..., -0.04582045,
        -0.01545779, -0.07112938],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       ...,
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ]])

In [11]:
model.sample_rewards[0]

array([[ 0.02352622,  0.01600308,  0.00815414,  0.0423145 ,  0.02561192,
         0.02895288,  0.01695412,  0.03813445,  0.01659437,  0.01278695],
       [-0.01219793, -0.01170379, -0.03329411, -0.01419706,  0.01458935,
        -0.01207098,  0.00092053, -0.00101096,  0.00103745, -0.00891334],
       [-0.01502731,  0.00599842, -0.01427748,  0.00081348, -0.01657705,
        -0.00919286, -0.03984443, -0.0264477 ,  0.00599421, -0.01371699],
       [-0.00370583,  0.01242471,  0.01158467,  0.00572465,  0.00609305,
         0.00499323,  0.00056414,  0.01637793,  0.00239356,  0.01203567],
       [-0.03737662, -0.04148508, -0.02115865, -0.04077334, -0.03575723,
        -0.04003168, -0.0310792 , -0.0607688 , -0.04072594, -0.02148726],
       [-0.08243175, -0.06155418, -0.06374963, -0.07527283, -0.08286729,
        -0.0735454 , -0.07502753, -0.06727194, -0.07979786, -0.08354984],
       [-0.00106568, -0.0092379 ,  0.00713031,  0.00189037, -0.0069726 ,
        -0.00353846, -0.00470857,  0.00754043

In [12]:
model.optimistic_sample_rewards[0]

array([ 0.0423145 ,  0.01458935,  0.00599842,  0.01637793, -0.02115865,
       -0.06155418,  0.00754043,  0.04934879,  0.01530238, -0.02509991,
        0.01645596, -0.03880649])

In [13]:
np.max(model.sample_rewards, axis=-1)[0] == model.optimistic_sample_rewards[0]

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True])

In [8]:
type(float(np.sqrt(4)))

float

In [3]:
import torch
torch.cuda.is_available()

True

In [50]:
from varname import nameof

In [51]:
nameof(rr)

'rr'

In [55]:
def save_test(array):
    np.save(nameof(array), array)

In [58]:
np.save(nameof(rr), rr)

In [37]:
np.random.seed(5678)

In [25]:
np.random.randn(10)

array([-0.70978938, -0.01719118,  0.31941137, -2.26533107, -1.37745366,
        1.94998073, -0.56381007, -0.84373759,  0.22453858, -0.39137772])

In [26]:
np.random.randn(10)

array([ 0.60550347, -0.68615034,  1.18336005,  0.54895683, -0.39621907,
        1.3737187 , -0.21972682,  0.15974422,  0.97310239, -0.00849755])

In [28]:
class RandSeedTest:
    def __init__(self):
        # np.random.seed(5678)
        self.r = np.random.randn(10)    

In [36]:
r1 = RandSeedTest()
r1.r

array([-0.70978938, -0.01719118,  0.31941137, -2.26533107, -1.37745366,
        1.94998073, -0.56381007, -0.84373759,  0.22453858, -0.39137772])

In [54]:
np.save('regret/r1r', r1.r)

FileNotFoundError: [Errno 2] No such file or directory: 'regret/r1r.npy'

In [40]:
rr = np.load('r1r.npy')
rr

array([-0.70978938, -0.01719118,  0.31941137, -2.26533107, -1.37745366,
        1.94998073, -0.56381007, -0.84373759,  0.22453858, -0.39137772])

In [47]:
rr

AttributeError: 'numpy.ndarray' object has no attribute '__name__'

In [46]:
np.save('%s' % rr, rr)

OSError: [Errno 22] Invalid argument: '[-0.70978938 -0.01719118  0.31941137 -2.26533107 -1.37745366  1.94998073\n -0.56381007 -0.84373759  0.22453858 -0.39137772].npy'

In [32]:
r2 = RandSeedTest()
r2.r

array([ 0.60550347, -0.68615034,  1.18336005,  0.54895683, -0.39621907,
        1.3737187 , -0.21972682,  0.15974422,  0.97310239, -0.00849755])

In [14]:
class Dot:
    def __init__(self, x, y):
        self.x = x
        self.y = y
    
    def norm(self):
        return np.sqrt(self.x**2 + self.y**2)
    
class Dot2:
    def __init__(self, x, y):
        self.x = x
        self.y = y
    
    def norm(self):
        return np.sqrt(self.x**2 + self.y**2)    

In [11]:
a = 3
b = 4

def test(f):
    if f == Dot:
        print('good')
    dot = f(a,b)
    return dot.norm()

In [15]:
test(Dot2)

5.0

In [2]:
T = 2000
n_arms = 12 # a -> 숫자를 키워볼 것
n_features = 20 # d -> 숫자를 키워볼 것
n_assortment = 4
noise_std = 0.1

n_samples = 1 # number of samples for each round and arm, only for TS

n_sim = 2 # number of simulations

def F(x): # round_reward_function
    if len(np.shape(x)) == 1: # if x is a vector
        return np.sum(x)
    else: # if x is a matrix
        return np.sum(x, axis=1)                

In [3]:
reg_factor = 1.0 # lambda
delta = 0.1 # delta
exploration_variance = 1.0 # nu, only for TS
confidence_scaling_factor = 1.0 # gamma, only for UCB

In [4]:
p = 0.2 # dropout probability
hidden_size = 64 # m
epochs = 100 # 
train_every = 10
confidence_scaling_factor = 1.0
use_cuda = False

In [5]:
### mean reward function
a = np.random.randn(n_features)
a /= np.linalg.norm(a, ord=2)
h = lambda x: 100*np.dot(a, x)

In [6]:
bandit = ContextualBandit(T, n_arms, n_features, h, noise_std=noise_std, \
                          n_assortment=n_assortment, n_samples=n_samples, round_reward_function=F)

regrets = np.empty((n_sim, T))

bandit.reset_rewards()
model = NeuralUCB(bandit,
                  hidden_size=hidden_size,
                  reg_factor=reg_factor,
                  delta=delta,
                  confidence_scaling_factor=confidence_scaling_factor,
                  # exploration_variance=exploration_variance,
                  training_window=100,
                  p=p,
                  learning_rate=0.01,
                  epochs=epochs,
                  train_every=train_every,
                  use_cuda=use_cuda
                 )

lin_model = LinUCB(bandit,
                   reg_factor=reg_factor,
                   delta=delta,
                   confidence_scaling_factor=confidence_scaling_factor
                  )

In [8]:
inital_parameters = model.model.parameters()

<generator object Module.parameters at 0x7f55cdaa3bd0>

In [44]:
model.model.train()

Model(
  (layers): ModuleList(
    (0): Linear(in_features=20, out_features=64, bias=False)
    (1): Linear(in_features=64, out_features=1, bias=False)
  )
  (dropout): Dropout(p=0.2, inplace=False)
  (activation): ReLU()
)

In [60]:
aa = torch.empty(1)

In [69]:
aa - aa

tensor([0., 0., 0.,  ..., 0., 0., 0.])

In [68]:
aa = torch.empty(1)
for p in model.model.parameters():
    aa = torch.cat((aa, p.data.flatten()))
aa = aa[1:]

In [66]:
aa.size()

torch.Size([1344])

In [46]:
for p in model.model.get_initial_parameters():
    print(np.linalg.norm(p.data.flatten()))

4.6717014
0.5899825


In [8]:
np.random.randn()

1.302046392073708

In [7]:
model.approximator_dim

1344

In [15]:
input_size = 20
hidden_size = 64
n_layers = 2

size  = [input_size] + [hidden_size,] * (n_layers-1) + [1]
layers = [nn.Linear(size[i], size[i+1], bias = False) for i in range(n_layers)]

In [16]:
layers

[Linear(in_features=20, out_features=64, bias=False),
 Linear(in_features=64, out_features=1, bias=False)]

1409

In [11]:
lin_model.approximator_dim

20

In [11]:
np.max(bb, axis=-1)

array([ 9, 17, 10,  7, 15])

In [5]:
aa = np.array([[9,17,10,7,15]])
np.shape(aa)
bb = np.reshape(aa, (5,1))
np.shape(bb)
bbb

array([[ 9],
       [17],
       [10],
       [ 7],
       [15]])

In [2]:
## --
aa = np.array([[9,17,10,7,15], [20,22,21,27,26], [1,2,3,4,5]])

aa[1, np.array([0,2])]

array([20, 21])

In [None]:
## --
a = np.array([9,17,10,7,15])
ind = np.argpartition(a, -3)[-3:]
s_ind = ind[np.argsort(a[ind])][::-1]
b = a[s_ind]

print(s_ind)
print(b)

In [None]:
## --
def inv_sherman_morrison_iter(a, A_inv):
    temp = A_inv
    for u in a:
        print(np.shape(u))
        print(np.shape(temp))
        Au = np.dot(temp, u)
        temp = temp - np.outer(Au, Au)/(1+np.dot(u.T, Au))
    return temp

In [None]:
c = np.array( [np.array([1,0]) , np.array([0,1]), np.array([1,0]) , np.array([0,1])] )
A_inv = np.array( [[np.array([1,0]) , np.array([0,1])]] )
A_inv = np.squeeze(A_inv, axis=0)

inv_sherman_morrison_iter(c, A_inv)


In [None]:
## --
a = np.array([[9,17,10,7,15], [20,22,21,27,26]])
ind = np.argpartition(a, -3, axis=1)[:,-3:]
s_ind = np.array([list(ind[i][np.argsort(a[i][ind[i]])][::-1]) for i in range(0, np.shape(a)[0])])
b = np.array([a[i][s_ind[i]] for i in range(0,np.shape(a)[0])])

In [None]:
## --
print(s_ind)
print(b)

In [None]:
## --
bandit = ContextualBandit(T, n_arms, n_features, h, noise_std=noise_std, n_assortment=n_assortment, round_reward_function=F)

In [None]:
## --
print('super arm:', bandit.best_super_arm)
print('scores:', bandit.best_rewards)
print('round reward:', bandit.best_round_reward)

In [3]:
import numpy as np 

aa = np.array([[9,17,10,7,15]])
bb = np.array([[12,17,14,7,15]])

np.linalg.norm(aa-bb)

5.0