In [63]:
import os

os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
import numpy as np
import torch
import random
import math 
import copy
import random
import argparse
import torch.optim as optim
import torch.nn as nn
import modeldefine
import numpy as np
from scipy.optimize import minimize
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
alpha = 0.01

def UCB(A, phi):
    #### ucb term
    phi = phi.view(-1,1)
    try:
        tmp, LU = torch.linalg.solve(phi,A)
    except:
        A = A.detach().numpy()
        phi2 = phi.detach().numpy()
        tmp = torch.Tensor(np.linalg.solve(A, phi2))

    return torch.sqrt(torch.matmul(torch.transpose(phi,1,0), tmp))

def calculate_v(contextinfo_list, A, theta):
    vj_list = []
    feature_list = []
    for i in contextinfo_list:
        feature = model(i.to(device)).cpu()
        first_item =  torch.mm( feature.view(1,-1) , theta)
        second_item = 0*alpha * UCB(A, feature)
        vj_list.append((first_item + second_item).item())
        feature_list.append(feature.detach().numpy())
    return np.array(vj_list), feature_list

def update_A(A, info_subset):
    for i in info_subset:
        i = torch.tensor(i, dtype=torch.float32,device=device)
        feature = model(i.to(device)).view(1,-1).cpu()
        A = A + torch.mm(feature.t(), feature)
    return A

def prob(vj_list):
    sum = np.sum(np.exp(vj_list)) + 1
    return [np.exp(vj_list[i]) / sum for i in range(len(vj_list))]  

def revenue(vj_list, reward_list):
    sum = np.sum(np.exp(vj_list)) + 1
    return np.sum(np.multiply(np.exp(vj_list), reward_list) / sum)

def assort(contextinfo_list, reward_list, vj_list, feature_list):
    length = len(vj_list)
    # sort the contextinfo_list and vj with descending order of reward_list
    sorted_list = sorted(zip(contextinfo_list, vj_list, reward_list, feature_list), key=lambda x: x[2], reverse=True)
    
    contextinfo_list = [x[0] for x in sorted_list]
    vj_list = [x[1] for x in sorted_list]
    reward_list = [x[2] for x in sorted_list]
    feature_list = [x[3] for x in sorted_list]

    # calculate the optimal assortment
    optimal_assort = []
    optimal_reward = 0
    index = 1
    for i in range(length):
        if revenue(vj_list[:index], reward_list[:index]) >= optimal_reward:
            optimal_reward = revenue(vj_list[:index], reward_list[:index])
            index += 1
        else:
            break
    return contextinfo_list[:index], feature_list[:index]

# this is for the linear purchase model when v = x dot theta
def get_linear_purchase(feature_list):
    true_Vlist = [(TRUE_THETA @ feature_list[i].reshape(-1,1)).item() for  i in range(len(feature_list))]
    prob_list = prob(true_Vlist)

    # sample item according to prob_list
    if random.uniform(0,1) < 1 - np.sum(prob_list):
        return np.array([0 for i in range(len(feature_list))])
    else:
        returnlist = [0 for i in range(len(feature_list))]
        indexchoose = random.choices([i for i in range(len(prob_list))], weights = prob_list)[0]
        returnlist[indexchoose] = 1
        return np.array(returnlist)
    

def likelihood(theta, feature_list ,y_list):
    # feature's dimension is len * dimension , theta is 1*dimension
    v_list = np.matmul(feature_list, theta.T).reshape(-1)
    ln_prob = np.log(prob(v_list))
    summation = ln_prob * y_list
    return -1 * np.sum(summation)

def likelihood_derivative(theta, feature_list, y_list):
    v_list = np.matmul(feature_list, theta.T).reshape(-1)
    prob_list = prob(v_list)
    summation = np.matmul(np.array(feature_list).T, (y_list - prob_list))
    return -1 * summation

def likelihood_array(theta, feature_list_list, y_list_list):
    summation =  0.5 * lambd * np.dot(theta, theta)
    for i in range(len(feature_list_list)):
        summation += likelihood(theta, feature_list_list[i], y_list_list[i])
    return summation

def likelihood_derivative_array(theta, feature_list_list, y_list_list):
    summation = 0.5 * lambd * theta
    for i in range(len(feature_list_list)):
        summation += likelihood_derivative(theta, feature_list_list[i], y_list_list[i])
    return summation

class CustomLikelihoodLoss(nn.Module): 
    def __init__(self, theta_list):
        super(CustomLikelihoodLoss, self).__init__()
        self.theta_list = theta_list

    def forward(self, output_list, y_list):
        loss = 0
        index = 0
        for output in output_list:  
            y = torch.tensor(y_list[index]).to(device) 
            theta = torch.tensor(self.theta_list[index], dtype= torch.float32).to(device) 
            v = torch.mm(output, theta.view(-1,1)) 
            prob = torch.exp(v) / (torch.sum(torch.exp(v)) + 1)  
            loss += torch.sum(-y * torch.log(prob))  
            index += 1  
        return loss / (len(y_list)/100)

class CustomLikelihoodLoss2(nn.Module):
    def __init__(self, theta_list):
        super(CustomLikelihoodLoss2, self).__init__()
        self.theta_list = theta_list
     
    def forward(self, output_list, y_list):
        loss = 0
        index = 0
        for output in output_list:
            y = torch.tensor(y_list[index]).to(device)
            theta = torch.tensor(self.theta_list[index], dtype= torch.float32).to(device)
            v = torch.mm(output, theta.view(-1,1))
            prob = torch.exp(v) / (torch.sum(torch.exp(v)) + 1)
            # 加入方差正则项
            var = torch.var(prob)
            # ce loss between prob and y
            loss += torch.sum(-y * torch.log(prob) - (1-y) * torch.log(1-prob))
            loss -= 10*var 
            index += 1
        return loss / (len(y_list)/100)
    
CONTEXT_ARRAY = np.load('linear_data/features.npy') 
REWARD_ARRAY = np.load('linear_data/rewards.npy')
TRUE_THETA = np.load('linear_data/theta.npy')

cuda:0


In [64]:
import modeldefine
import torch.optim as optim
model = modeldefine.Model_drop(10,20,10,1).to(device)
# 10 20 20 20 20 20  5
optimizer = optim.SGD(model.parameters(), lr=0.01,weight_decay=0.01)

In [42]:
print(model)

Model_drop(
  (layers): ModuleList(
    (0): Linear(in_features=10, out_features=20, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.3, inplace=False)
    (3): Linear(in_features=20, out_features=10, bias=True)
    (4): ReLU()
  )
)


In [65]:

data_length = len(CONTEXT_ARRAY)

# define the hyperparameters
input_size = 20
hidden_size = 20
output_size = 10
num_layers = 1

beta = 0.1

H = 100
lambd = 0.1
# initialize the parameters

theta = TRUE_THETA

LAMBDA = lambd * torch.eye(output_size, dtype=torch.float32)


ass_list = []
feature_list = []
purchase_list = []
theta_list = []
for t in range(0,500):
    context = CONTEXT_ARRAY[t]
    profit = REWARD_ARRAY[t]

    theta_tensor = torch.tensor(theta.reshape(-1,1), dtype=torch.float32)
    v_array,initial_feature = calculate_v(torch.tensor(context,dtype=torch.float32), LAMBDA, theta_tensor)
    assortment, ass_features = assort(context, profit.tolist()[0], v_array.tolist() , initial_feature)
    
    purchase_vector = get_linear_purchase(assortment)

    # add to list
    ass_list.append(np.array(assortment))
    feature_list.append(np.array(ass_features))
    purchase_list.append(purchase_vector)
    # update the parameters
    #  不更新lambda
    LAMBDA = update_A(LAMBDA, assortment)
    
    # 这个版本不优化theta

    #print("best parameter",theta)
    theta_list.append(theta)

    # update the neural networks

    if t % H == 99:
        #a_list = ass_list[-1*H:]
        #y_list = purchase_list[-1*H:]

        a_list = ass_list
        y_list = purchase_list

        loss_function = CustomLikelihoodLoss(theta_list)
        epochs = 5
     
        for epoch in range(epochs):
            output_list = [model(torch.tensor(a,dtype = torch.float32).to(device)) for a in a_list]
            loss = loss_function(output_list, y_list)
            if (epoch == epochs-1): print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item()}")
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        #theta_list = []

Epoch [5/5], Loss: 641.876220703125
Epoch [5/5], Loss: 857.7105102539062
Epoch [5/5], Loss: 834.7308959960938
Epoch [5/5], Loss: 793.2800903320312
Epoch [5/5], Loss: 739.8826293945312


In [72]:
for i in range(100):
    print(prob((ass_list[i] @ TRUE_THETA.reshape(10,1)).reshape(1,-1)))
    print(purchase_list[i])
    print(prob(calculate_v(torch.tensor(ass_list[i],dtype=torch.float32), LAMBDA, theta_tensor)[0]))
    print('--')

[array([0.27218587, 0.14482865, 0.34169434, 0.21888684])]
[0 0 1 0]
[0.24283306721170864, 0.23718694920602232, 0.23018497957533468, 0.25092932837416804]
--
[array([0.17584609, 0.18665843, 0.42217407, 0.20004035])]
[0 0 1 0]
[0.2547903904207516, 0.24646790093927604, 0.22623602625511993, 0.23430676936672562]
--
[array([0.11931847, 0.25625625, 0.19990725, 0.40696943])]
[0 0 0 1]
[0.23614769172577454, 0.24179921949146035, 0.24137477383361644, 0.24153164371495223]
--
[array([0.13744456, 0.55683174, 0.14451605, 0.14151387])]
[0 1 0 0]
[0.2517106346537353, 0.2312989009421612, 0.23290509105129592, 0.24503161721798855]
--
[array([0.41522624, 0.30061917, 0.24204334])]
[0 1 0]
[0.32654370683726375, 0.3016679950980273, 0.3209933976932274]
--
[array([0.2044439 , 0.53458395, 0.23115579])]
[1 0 0]
[0.3152590631701298, 0.3273531985637735, 0.3071046191804338]
--
[array([0.19696546, 0.11764848, 0.66154449])]
[0 0 1]
[0.316269863426288, 0.3015326421257129, 0.3313665870180361]
--
[array([0.15374852, 0.059

In [45]:
model(torch.tensor(ass_list[0],dtype=torch.float32,device = device))

tensor([[0.0000, 3.5049, 0.0000, 0.0000, 0.0000, 0.8354, 0.0000, 0.0000, 0.0000,
         2.7785],
        [0.0000, 3.4321, 0.0000, 0.0000, 0.0000, 0.8161, 0.0000, 0.0000, 0.0000,
         2.8329],
        [0.0000, 3.4293, 0.0000, 0.0000, 0.0000, 0.7963, 0.0000, 0.0000, 0.0000,
         2.8394],
        [0.0000, 3.4293, 0.0000, 0.0000, 0.0000, 0.7963, 0.0000, 0.0000, 0.0000,
         2.8394]], device='cuda:0', grad_fn=<MulBackward0>)

In [50]:
ass_list[200]

array([[0.99079081, 0.1461756 , 0.88821132, 0.45493304, 0.84505474,
        0.70337897, 0.95585258, 0.78423527, 0.15951751, 0.59713498],
       [0.88733714, 0.10644181, 0.30202356, 0.32071516, 0.64291886,
        0.96390973, 0.51084253, 0.73378024, 0.55242924, 0.65154806]])

In [9]:
print(prob((ass_list[2000] @ TRUE_THETA.reshape(10,1)).reshape(1,-1)))
print(prob(calculate_v(torch.tensor(ass_list[2000],dtype=torch.float32), LAMBDA, theta_tensor)[0]))

[array([0.47976096, 0.0776614 , 0.28741634, 0.14555404])]
[0.24479331524790462, 0.2452223105993166, 0.24633473628990002, 0.244131962167728]


In [14]:
probablity_list = [prob((ass_list[i] @ TRUE_THETA.reshape(10,1)).reshape(1,-1)) for i in range(len(ass_list))]
purchase_list = purchase_list

In [69]:
purchase_list

[array([0, 0, 1, 0]),
 array([0, 0, 1, 0]),
 array([0, 0, 0, 1]),
 array([0, 1, 0, 0]),
 array([0, 1, 0]),
 array([1, 0, 0]),
 array([0, 0, 1]),
 array([0, 0, 0, 0, 1]),
 array([1, 0, 0]),
 array([0, 0, 0, 0, 0]),
 array([0, 1, 0, 0]),
 array([0, 0, 0]),
 array([0, 1, 0, 0]),
 array([1, 0]),
 array([0, 1, 0]),
 array([0, 0, 1]),
 array([1, 0, 0, 0]),
 array([1, 0, 0, 0]),
 array([0, 0, 1]),
 array([1, 0]),
 array([0, 0, 1, 0]),
 array([0, 0, 1, 0, 0, 0, 0]),
 array([0, 0, 1, 0]),
 array([0, 0, 0, 1, 0]),
 array([0, 1, 0]),
 array([0, 0, 0, 1, 0]),
 array([0, 0, 1]),
 array([1, 0, 0, 0]),
 array([0, 0, 1, 0]),
 array([0, 0, 0, 0, 1]),
 array([0, 0, 1, 0]),
 array([1, 0, 0]),
 array([0, 1]),
 array([0, 1, 0, 0, 0]),
 array([0, 0, 0, 0, 1]),
 array([0, 0, 0, 1]),
 array([0, 0, 0, 1]),
 array([0, 1, 0, 0]),
 array([0, 0, 1, 0, 0]),
 array([0, 0, 1, 0, 0]),
 array([0, 1]),
 array([0, 1, 0, 0, 0]),
 array([0, 0, 1]),
 array([0, 0, 0, 0, 1]),
 array([0, 1, 0, 0]),
 array([1, 0, 0, 0]),
 array

[array([[0.00448243, 0.76994016, 0.81208992, 0.31374422, 0.87745478,
         0.18644208, 0.95848417, 0.55289522, 0.00402194, 0.39305549],
        [0.49786776, 0.33536584, 0.31191497, 0.65755711, 0.33916533,
         0.28000802, 0.96353922, 0.48668879, 0.2137879 , 0.05138831],
        [0.3103722 , 0.44329644, 0.59948727, 0.4270652 , 0.07183972,
         0.38027624, 0.11436435, 0.87397129, 0.30284334, 0.67260384],
        [0.98514682, 0.34194817, 0.11071858, 0.06985572, 0.66624931,
         0.78898765, 0.55769639, 0.88758613, 0.4063638 , 0.26814952]]),
 array([[0.87288918, 0.80581414, 0.19787804, 0.4953588 , 0.74021847,
         0.46710976, 0.41584958, 0.68319892, 0.04427872, 0.10969431],
        [0.93023902, 0.67943639, 0.42031603, 0.26120118, 0.71224936,
         0.10566559, 0.14503889, 0.71575083, 0.09789287, 0.32400914],
        [0.27043911, 0.91301561, 0.17765901, 0.1509276 , 0.75013311,
         0.96394034, 0.35105353, 0.68819658, 0.91029403, 0.98579013],
        [0.87070441, 0.30