In [6]:
import os

os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
import numpy as np
import torch
import random
import math 
import copy
import random
import argparse
import torch.optim as optim
import torch.nn as nn
import modeldefine
import numpy as np
from scipy.optimize import minimize
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
alpha = 0.01

def UCB(A, phi):
    #### ucb term
    phi = phi.view(-1,1)
    try:
        tmp, LU = torch.linalg.solve(phi,A)
    except:
        A = A.detach().numpy()
        phi2 = phi.detach().numpy()
        tmp = torch.Tensor(np.linalg.solve(A, phi2))

    return torch.sqrt(torch.matmul(torch.transpose(phi,1,0), tmp))

def calculate_v(contextinfo_list, A, theta):
    vj_list = []
    feature_list = []
    for i in contextinfo_list:
        feature = model(i.to(device)).cpu()
        first_item =  torch.mm( feature.view(1,-1) , theta)
        second_item = alpha * UCB(A, feature)
        vj_list.append((first_item + second_item).item())
        feature_list.append(feature.detach().numpy())
    return np.array(vj_list), feature_list

def update_A(A, info_subset):
    for i in info_subset:
        i = torch.tensor(i, dtype=torch.float32,device=device)
        feature = model(i.to(device)).view(1,-1).cpu()
        A = A + torch.mm(feature.t(), feature)
    return A

def prob(vj_list):
    sum = np.sum(np.exp(vj_list)) + 1
    return [np.exp(vj_list[i]) / sum for i in range(len(vj_list))]  

def revenue(vj_list, reward_list):
    sum = np.sum(np.exp(vj_list)) + 1
    return np.sum(np.multiply(np.exp(vj_list), reward_list) / sum)

def assort(contextinfo_list, reward_list, vj_list, feature_list):
    length = len(vj_list)
    # sort the contextinfo_list and vj with descending order of reward_list
    sorted_list = sorted(zip(contextinfo_list, vj_list, reward_list, feature_list), key=lambda x: x[2], reverse=True)
    
    contextinfo_list = [x[0] for x in sorted_list]
    vj_list = [x[1] for x in sorted_list]
    reward_list = [x[2] for x in sorted_list]
    feature_list = [x[3] for x in sorted_list]

    # calculate the optimal assortment
    optimal_assort = []
    optimal_reward = 0
    index = 1
    for i in range(length):
        if revenue(vj_list[:index], reward_list[:index]) >= optimal_reward:
            optimal_reward = revenue(vj_list[:index], reward_list[:index])
            index += 1
        else:
            break
    return contextinfo_list[:index], feature_list[:index]

# this is for the linear purchase model when v = x dot theta
def get_linear_purchase(feature_list):
    true_Vlist = [(TRUE_THETA @ feature_list[i].reshape(-1,1)).item() for  i in range(len(feature_list))]
    prob_list = prob(true_Vlist)

    # sample item according to prob_list
    if random.uniform(0,1) < 1 - np.sum(prob_list):
        return np.array([0 for i in range(len(feature_list))])
    else:
        returnlist = [0 for i in range(len(feature_list))]
        indexchoose = random.choices([i for i in range(len(prob_list))], weights = prob_list)[0]
        returnlist[indexchoose] = 1
        return np.array(returnlist)
    
lambd = 1
def likelihood(theta, feature_list ,y_list):
    # feature's dimension is len * dimension , theta is 1*dimension
    v_list = np.matmul(feature_list, theta.T).reshape(-1)
    ln_prob = np.log(prob(v_list))
    summation = ln_prob * y_list
    return -1 * np.sum(summation)

def likelihood_derivative(theta, feature_list, y_list):
    v_list = np.matmul(feature_list, theta.T).reshape(-1)
    prob_list = prob(v_list)
    summation = np.matmul(np.array(feature_list).T, (y_list - prob_list))
    return -1 * summation

def likelihood_array(theta, feature_list_list, y_list_list):
    summation =  0.5 * lambd * np.dot(theta, theta)
    for i in range(len(feature_list_list)):
        summation += likelihood(theta, feature_list_list[i], y_list_list[i])
    return summation

def likelihood_derivative_array(theta, feature_list_list, y_list_list):
    summation = 0.5 * lambd * theta
    for i in range(len(feature_list_list)):
        summation += likelihood_derivative(theta, feature_list_list[i], y_list_list[i])
    return summation

class CustomLikelihoodLoss(nn.Module): 
    def __init__(self, theta_list):
        super(CustomLikelihoodLoss, self).__init__()
        self.theta_list = theta_list

    def forward(self, output_list, y_list):
        loss = 0
        index = 0
        for output in output_list:  
            y = torch.tensor(y_list[index]).to(device) 
            theta = torch.tensor(self.theta_list[index], dtype= torch.float32).to(device) 
            v = torch.mm(output, theta.view(-1,1)) 
            prob = torch.exp(v) / (torch.sum(torch.exp(v)) + 1)  
            loss += torch.sum(torch.log(prob) * y)  
            index += 1  
        return -loss 

class CustomLikelihoodLoss2(nn.Module):
    def __init__(self, theta_list):
        super(CustomLikelihoodLoss2, self).__init__()
        self.theta_list = theta_list
     
    def forward(self, output_list, y_list):
        loss = 0
        index = 0
        for output in output_list:
            y = torch.tensor(y_list[index]).to(device)
            theta = torch.tensor(self.theta_list[index], dtype= torch.float32).to(device)
            v = torch.mm(output, theta.view(-1,1))
            prob = torch.exp(v) / (torch.sum(torch.exp(v)) + 1)
            # ce loss between prob and y
            loss += torch.sum(-y * torch.log(prob) - (1-y) * torch.log(1-prob))
            index += 1
        return loss / (len(y_list)/100)
    
CONTEXT_ARRAY = np.load('linear_data/features.npy') 
REWARD_ARRAY = np.load('linear_data/rewards.npy')
TRUE_THETA = np.load('linear_data/theta.npy')

cuda:0


In [7]:
import modeldefine
import torch.optim as optim
model = modeldefine.Model(5,10,10,2).to(device)
# 10 20 20 20 20 20  5
optimizer = optim.SGD(model.parameters(), lr=0.001,weight_decay=0.1)
data_length = len(CONTEXT_ARRAY)

# define the hyperparameters
input_size = 20
hidden_size = 20
output_size = 10
num_layers = 10

beta = 0.1

H = 100

# initialize the parameters

theta = TRUE_THETA

LAMBDA = lambd * torch.eye(output_size, dtype=torch.float32)


ass_list = []
feature_list = []
purchase_list = []
theta_list = []
for t in range(0,10000):
    context = CONTEXT_ARRAY[t]
    profit = REWARD_ARRAY[t]

    theta_tensor = torch.tensor(theta.reshape(-1,1), dtype=torch.float32)
    v_array,initial_feature = calculate_v(torch.tensor(context,dtype=torch.float32), LAMBDA, theta_tensor)
    assortment, ass_features = assort(context, profit.tolist()[0], v_array.tolist() , initial_feature)
    
    purchase_vector = get_linear_purchase(assortment)

    # add to list
    ass_list.append(np.array(assortment))
    feature_list.append(np.array(ass_features))
    purchase_list.append(purchase_vector)
    # update the parameters
    LAMBDA = update_A(LAMBDA, assortment)
    
    # 这个版本不优化theta

    #print("best parameter",theta)
    theta_list.append(theta)

    # update the neural networks

    if t % H == 99:
        #a_list = ass_list[-1*H:]
        #y_list = purchase_list[-1*H:]

        a_list = ass_list
        y_list = purchase_list

        loss_function = CustomLikelihoodLoss2(theta_list)
        epochs = 5
     
        for epoch in range(epochs):
            output_list = [model(torch.tensor(a,dtype = torch.float32).to(device)) for a in a_list]
            loss = loss_function(output_list, y_list)
            if (epoch == epochs-1): print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item()}")
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        #theta_list = []

tensor([ 5, 10, 10, 10], dtype=torch.int32)
Epoch [5/5], Loss: 999.692138671875
Epoch [5/5], Loss: 1142.798095703125
Epoch [5/5], Loss: 1135.14453125
Epoch [5/5], Loss: 1135.2701416015625
Epoch [5/5], Loss: 1107.1767578125
Epoch [5/5], Loss: 1076.39306640625
Epoch [5/5], Loss: 1066.36083984375
Epoch [5/5], Loss: 1056.3272705078125
Epoch [5/5], Loss: 1040.22119140625
Epoch [5/5], Loss: 1027.0799560546875
Epoch [5/5], Loss: 1014.1148071289062
Epoch [5/5], Loss: 1002.590087890625
Epoch [5/5], Loss: 993.8888549804688
Epoch [5/5], Loss: 984.236083984375
Epoch [5/5], Loss: 971.8844604492188
Epoch [5/5], Loss: 958.95751953125
Epoch [5/5], Loss: 947.5950317382812
Epoch [5/5], Loss: 939.9688720703125
Epoch [5/5], Loss: 930.358154296875
Epoch [5/5], Loss: 922.6279296875
Epoch [5/5], Loss: 918.1915283203125
Epoch [5/5], Loss: 913.0737915039062
Epoch [5/5], Loss: 907.0564575195312
Epoch [5/5], Loss: 896.9989624023438
Epoch [5/5], Loss: 890.5178833007812
Epoch [5/5], Loss: 883.2529907226562
Epoch [

In [9]:
print(prob((ass_list[2000] @ TRUE_THETA.reshape(10,1)).reshape(1,-1)))
print(prob(calculate_v(torch.tensor(ass_list[2000],dtype=torch.float32), LAMBDA, theta_tensor)[0]))

[array([0.47976096, 0.0776614 , 0.28741634, 0.14555404])]
[0.24479331524790462, 0.2452223105993166, 0.24633473628990002, 0.244131962167728]


In [10]:
ass_list

[array([[0.00448243, 0.76994016, 0.81208992, 0.31374422, 0.87745478,
         0.18644208, 0.95848417, 0.55289522, 0.00402194, 0.39305549],
        [0.49786776, 0.33536584, 0.31191497, 0.65755711, 0.33916533,
         0.28000802, 0.96353922, 0.48668879, 0.2137879 , 0.05138831],
        [0.3103722 , 0.44329644, 0.59948727, 0.4270652 , 0.07183972,
         0.38027624, 0.11436435, 0.87397129, 0.30284334, 0.67260384],
        [0.98514682, 0.34194817, 0.11071858, 0.06985572, 0.66624931,
         0.78898765, 0.55769639, 0.88758613, 0.4063638 , 0.26814952],
        [0.08866118, 0.13439661, 0.62080881, 0.75090637, 0.91403987,
         0.38438196, 0.28755832, 0.28634988, 0.42181774, 0.79153211],
        [0.35858516, 0.18013178, 0.61352218, 0.56263594, 0.93919831,
         0.30437239, 0.90516922, 0.12975216, 0.57737656, 0.23194576]]),
 array([[0.87288918, 0.80581414, 0.19787804, 0.4953588 , 0.74021847,
         0.46710976, 0.41584958, 0.68319892, 0.04427872, 0.10969431],
        [0.93023902, 0.67