In [1]:
import os

os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
import numpy as np
import torch
import math 
import copy
import random
import argparse
import torch.optim as optim
import torch.nn as nn
import modeldefine
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [41]:
alpha = 0.01

def UCB(A, phi):
    #### ucb term
    phi = phi.view(-1,1)
    try:
        tmp, LU = torch.linalg.solve(phi,A)
    except:
        A = A.detach().numpy()
        phi2 = phi.detach().numpy()
        tmp = torch.Tensor(np.linalg.solve(A, phi2))

    return torch.sqrt(torch.matmul(torch.transpose(phi,1,0), tmp))

def calculate_v(contextinfo_list, A, theta):
    vj_list = []
    feature_list = []
    for i in contextinfo_list:
        feature = model(i.to(device)).cpu()
        first_item =  torch.mm( feature.view(1,-1) , theta)
        second_item = alpha * UCB(A, feature)
        vj_list.append((first_item + second_item).item())
        feature_list.append(feature.detach().numpy())
    return np.array(vj_list), feature_list

def update_A(A, info_subset):
    for i in info_subset:
        feature = model(i.to(device)).cpu()
        A = A + torch.mm(feature.t(), feature)
    return A

def prob(vj_list):
    sum = np.sum(np.exp(vj_list)) + 1
    return [np.exp(vj_list[i]) / sum for i in range(len(vj_list))]  

def revenue(vj_list, reward_list):
    sum = np.sum(np.exp(vj_list)) + 1
    return np.sum(np.multiply(np.exp(vj_list), reward_list) / sum)

def assort(contextinfo_list, reward_list, vj_list):
    length = len(vj_list)
    # sort the contextinfo_list and vj with descending order of reward_list
    sorted_list = sorted(zip(contextinfo_list, vj_list, reward_list), key=lambda x: x[2], reverse=True)
    contextinfo_list = [x[0] for x in sorted_list]
    vj_list = [x[1] for x in sorted_list]
    reward_list = [x[2] for x in sorted_list]

    # calculate the optimal assortment
    optimal_assort = []
    optimal_reward = 0
    index = 1
    for i in range(length):
        if revenue(vj_list[:index], reward_list[:index]) >= optimal_reward:
            optimal_reward = revenue(vj_list[:index], reward_list[:index])
            index += 1
        else:
            break
    return contextinfo_list[:index]

In [3]:
def likelihood(theta, feature_list,y_list):
    # feature's dimension is len * dimension , theta is 1*dimension
    v_list = np.matmul(feature_list, theta.T).reshape(-1)
    ln_prob = np.log(prob(v_list))
    summation = ln_prob * y_list
    return -1 * np.sum(ln_prob)

def likelihood_derivative(theta, feature_list, y_list):
    v_list = np.matmul(feature_list, theta.T).reshape(-1)
    prob_list = prob(v_list)
    summation = np.matmul(np.array(feature_list).T, (y_list - prob_list))
    return -1 * summation

In [86]:
import numpy as np
from scipy.optimize import minimize

initial_guess = np.array([0.5 for i in range(15)])  

known_feature = a
known_y =  y

result = minimize(likelihood, initial_guess, args=(known_feature, known_y), method='SLSQP', 
                  constraints={'type':'eq', 'fun': likelihood_derivative, 'args':(known_feature, known_y)})

optimal_parameters = result.x
print("最优参数:", optimal_parameters)


  ln_prob = np.log(prob(v_list))
  summation = ln_prob * y_list


最优参数: [ -9.28791415  10.75118047 -10.35431476   3.03321144   4.85594309
   5.28008933 -13.60273543   6.45015259  -8.79067984   7.59958689
   9.13536936   3.55705702  -2.93728797   0.69546937  -6.09454989]


In [4]:
class CustomLikelihoodLoss(nn.Module):
    def __init__(self, theta):
        super(CustomLikelihoodLoss, self).__init__()
        self.theta = theta
        
    def forward(self, output_list, y_list):
        loss = 0
        index = 0
        for output in output_list:  
            y = torch.tensor(y_list[index]).to(device)
            v = torch.mm(output, self.theta.view(-1,1))
            prob = torch.exp(v) / (torch.sum(torch.exp(v)) + 1)
            loss += torch.sum(torch.log(prob) * y)
            index += 1  
        return -loss

In [106]:
# construct a test set 
a = torch.randn(100,10).numpy()
theta  = np.random.randint(1,4,(1,10)).reshape(-1)
y = np.array([1] + [0 for i in range(99)])
y = np.random.permutation(y)

a_list = []
y_list = []
for i in [20,30,50,100, 60, 80]:
    a = torch.randn(i,10).numpy()
    a_list.append(a)
    y = np.array([1] + [0 for i in range(i-1)]) 
    y = np.random.permutation(y)
    y_list.append(y)


In [5]:
import modeldefine
import torch.optim as optim
model = modeldefine.Model(5,10,10,5).to(device)
# 10 20 20 20 20 20  5
optimizer = optim.Adam(model.parameters(), lr=0.1)


tensor([ 5, 10, 10, 10, 10, 10, 10], dtype=torch.int32)


In [110]:
loss_function = CustomLikelihoodLoss(torch.Tensor(theta).to(device))
epochs = 10

for epoch in range(epochs):
    output_list = [model(torch.Tensor(a).to(device)) for a in a_list]
    loss = loss_function(output_list, y_list)
    print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item()}")
      
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

Epoch [1/10], Loss: 1420.2218017578125
Epoch [2/10], Loss: 1420.2218017578125
Epoch [3/10], Loss: 1420.2218017578125
Epoch [4/10], Loss: 1420.2218017578125
Epoch [5/10], Loss: 1420.2218017578125
Epoch [6/10], Loss: 1420.2218017578125
Epoch [7/10], Loss: 1420.2218017578125
Epoch [8/10], Loss: 1420.2218017578125
Epoch [9/10], Loss: 1420.2218017578125
Epoch [10/10], Loss: 1420.2218017578125


In [6]:
CONTEXT_ARRAY = np.load('linear_data/features.npy') 
REWARD_ARRAY = np.load('linear_data/rewards.npy')
TRUE_THETA = np.load('linear_data/theta.npy')

In [7]:
data_length = len(CONTEXT_ARRAY)

# define the hyperparameters
input_size = 20
hidden_size = 20
output_size = 10
num_layers = 10

beta = 0.1
lambd = 1
H = 100

# initialize the parameters

theta = np.random.randn(output_size, 1) / np.sqrt(output_size)
theta = torch.tensor(theta, dtype=torch.float32)
LAMBDA = lambd * torch.eye(output_size, dtype=torch.float32)

for t in range(0,10000):
    context = CONTEXT_ARRAY[t]
    profit = REWARD_ARRAY[t]
    v_array = calculate_v(torch.tensor(context,dtype=torch.float32), LAMBDA, theta)[0]
    assortment = assort(context, profit.tolist()[0], v_array.tolist() )
    

In [36]:
context = CONTEXT_ARRAY[1]
profit = REWARD_ARRAY[1]

v_array = calculate_v(torch.tensor(context,dtype=torch.float32), LAMBDA, theta)[0]

In [42]:
assort(context, profit.tolist()[0], v_array.tolist() )

[array([0.87288918, 0.80581414, 0.19787804, 0.4953588 , 0.74021847,
        0.46710976, 0.41584958, 0.68319892, 0.04427872, 0.10969431]),
 array([0.93023902, 0.67943639, 0.42031603, 0.26120118, 0.71224936,
        0.10566559, 0.14503889, 0.71575083, 0.09789287, 0.32400914]),
 array([0.27043911, 0.91301561, 0.17765901, 0.1509276 , 0.75013311,
        0.96394034, 0.35105353, 0.68819658, 0.91029403, 0.98579013]),
 array([0.87070441, 0.3095844 , 0.67060454, 0.33518521, 0.09222245,
        0.42369953, 0.29902919, 0.14342832, 0.69594331, 0.87490535]),
 array([0.18822713, 0.35742447, 0.36166591, 0.83437478, 0.93996396,
        0.67566146, 0.50832378, 0.5704803 , 0.32539995, 0.75333515]),
 array([0.76438591, 0.46887631, 0.07918692, 0.20564424, 0.84512476,
        0.21246723, 0.95477161, 0.1692681 , 0.33776783, 0.59680885]),
 array([0.97590103, 0.4712956 , 0.35975283, 0.84626926, 0.89347454,
        0.22016018, 0.83810063, 0.08024189, 0.75561889, 0.30867027]),
 array([0.2617273 , 0.21996422, 0.