In [5]:
import os

os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
import numpy as np
import torch
import random
import math 
import copy
import random
import argparse
import torch.optim as optim
import torch.nn as nn

import numpy as np
from scipy.optimize import minimize
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [11]:
alpha = 0.01

def UCB(A, phi):
    #### ucb term
    phi = phi.view(-1,1)
    try:
        tmp, LU = torch.linalg.solve(phi,A)
    except:
        A = A.detach().numpy()
        phi2 = phi.detach().numpy()
        tmp = torch.Tensor(np.linalg.solve(A, phi2))

    return torch.sqrt(torch.matmul(torch.transpose(phi,1,0), tmp))

def calculate_v(contextinfo_list, A, theta):
    vj_list = []
    feature_list = []
    for i in contextinfo_list:
        feature = i.cpu()
        first_item =  torch.mm( feature.view(1,-1) , theta)
        second_item = alpha * UCB(A, feature)
        vj_list.append((first_item + second_item).item())
        feature_list.append(feature.detach().numpy())
    return np.array(vj_list), feature_list

def update_A(A, info_subset):
    for i in info_subset:
        i = torch.tensor(i, dtype=torch.float32,device=device)
        feature = i.view(1,-1).cpu()
        A = A + torch.mm(feature.t(), feature)
    return A

# 这里调小utility 
def prob(vj_list):
    sum = np.sum(np.exp(vj_list)) + 1
    return [np.exp(vj_list[i]) / sum for i in range(len(vj_list))]  

def revenue(vj_list, reward_list):
    sum = np.sum(np.exp(vj_list)) + 1
    return np.sum(np.multiply(np.exp(vj_list), reward_list) / sum)

def assort(contextinfo_list, reward_list, vj_list, feature_list):
    length = len(contextinfo_list)
    # sort the contextinfo_list and vj with descending order of reward_list
    sorted_list = sorted(zip(contextinfo_list, vj_list, reward_list, feature_list), key=lambda x: x[2], reverse=True)
    
    contextinfo_list = [x[0] for x in sorted_list]
    vj_list = [x[1] for x in sorted_list]
    reward_list = [x[2] for x in sorted_list]
    feature_list = [x[3] for x in sorted_list]

    # calculate the optimal assortment
    optimal_assort = []
    optimal_reward = revenue(vj_list[:1], reward_list[:1])
    index = 1 
    for i in range(2,length):
        if revenue(vj_list[:i], reward_list[:i]) >= optimal_reward:
            optimal_reward = revenue(vj_list[:i], reward_list[:i])
        else:
            index = i - 1 
            break
    return contextinfo_list[:index], feature_list[:index], vj_list[:index], reward_list[:index]

# this is for the non_linear purchase model when v = x dot theta
def sigmoid(x):
    return 1 / (1 + np.exp(-1*x))

def get_nonlinear_purchase(feature_list):
    true_Vlist = [(TRUE_THETA @ sigmoid(feature_list[i])).reshape(-1,1).item() for  i in range(len(feature_list))]
    prob_list = prob(true_Vlist)

    # sample item according to prob_list
    if random.uniform(0,1) < 1 - np.sum(prob_list):
        return np.array([0 for i in range(len(feature_list))])
    else:
        returnlist = [0 for i in range(len(feature_list))]
        indexchoose = random.choices([i for i in range(len(prob_list))], weights = prob_list)[0]
        returnlist[indexchoose] = 1
        return np.array(returnlist)

In [9]:
lambd = 0.1
def likelihood(theta, feature_list ,y_list):
    # feature's dimension is len * dimension , theta is 1*dimension
    v_list = np.matmul(feature_list, theta.T).reshape(-1)
    ln_prob = np.log(prob(v_list))
    summation = ln_prob * y_list
    return -1 * np.sum(summation)

def likelihood_derivative(theta, feature_list, y_list):
    v_list = np.matmul(feature_list, theta.T).reshape(-1)
    prob_list = prob(v_list)
    summation = np.matmul(np.array(feature_list).T, (y_list - prob_list))
    return -1 * summation

def likelihood_array(theta, feature_list_list, y_list_list):
    summation =  0.5 * lambd * np.dot(theta, theta)
    for i in range(len(feature_list_list)):
        summation += likelihood(theta, feature_list_list[i], y_list_list[i])
    return summation

def likelihood_derivative_array(theta, feature_list_list, y_list_list):
    summation = 0.5 * lambd * theta
    for i in range(len(feature_list_list)):
        summation += likelihood_derivative(theta, feature_list_list[i], y_list_list[i])
    return summation

In [8]:
# this block is only used for linear model revenue calculation
def calculate_sigmoid_v(contextinfo_list, A, theta):
    vj_list = []
    feature_list = []
    for i in contextinfo_list:
        feature = sigmoid(i)
        first_item =  torch.mm( feature.view(1,-1) , theta)

        vj_list.append(first_item.item())
        feature_list.append(feature.detach().numpy())
    return np.array(vj_list), feature_list

# 真实情况乱下的theta，feature
def get_true_sigmoid_ass(context, profit):
    
    v_array = np.array(sigmoid(context) @ TRUE_THETA.T ).reshape(-1)
    assortment, ass_features, v_list, profit_list = assort(context, profit.tolist()[0], v_array.tolist() , sigmoid(context))
    true_probablility = np.array(prob(v_list))
    revenue = np.dot(true_probablility, np.array(profit_list))
    return revenue


def get_assortment_revenue(assortment, profit):
    v_array = np.array(sigmoid(np.array(assortment)) @ TRUE_THETA.T ).reshape(-1)
    true_probablility = np.array(prob(v_array))
    revenue = np.dot(true_probablility, np.array(profit))
    return revenue

In [10]:
# data reader
CONTEXT_ARRAY = np.load('nonlinear_data/features.npy') 
REWARD_ARRAY = np.load('nonlinear_data/rewards.npy')
TRUE_THETA = np.load('nonlinear_data/theta.npy')

In [13]:
data_length = len(CONTEXT_ARRAY)

# define the hyperparameters
input_size = 20
hidden_size = 20
output_size = 10
num_layers = 10

beta = 0.1

H = 100

# initialize the parameters

theta = np.random.randn(output_size) / np.sqrt(output_size)
#theta = TRUE_THETA
LAMBDA = lambd * torch.eye(output_size, dtype=torch.float32)

ass_list = []
feature_list = []
purchase_list = []
theta_list = []

revenue_list1 = []
revenue_list2 = []
true_profit_list = []

for t in range(0, 1000):
    context = CONTEXT_ARRAY[t]
    profit = REWARD_ARRAY[t]

    theta_tensor = torch.tensor(theta.reshape(-1,1), dtype=torch.float32)
    v_array,initial_feature = calculate_v(torch.tensor(context,dtype=torch.float32), LAMBDA, theta_tensor)
    assortment, ass_features, vv_list , reward = assort(context, profit.tolist()[0], v_array.tolist() , initial_feature)
    purchase_vector = get_nonlinear_purchase(assortment)
    
    # calculate the ideal 
    expected_revenue1 = get_true_sigmoid_ass(context, profit)
    revenue_list1.append(expected_revenue1)
    true_profit_list.append(np.dot(np.array(purchase_vector), reward))
    revenue_list2.append(get_assortment_revenue(assortment, reward))

    # add to list
    ass_list.append(np.array(assortment))
    feature_list.append(np.array(ass_features))
    purchase_list.append(purchase_vector)

    # update the parameters
    LAMBDA = update_A(LAMBDA, assortment)
    
    # update theta using MLE
    initial_guess = theta
    
    try:
        result = minimize(likelihood_array, initial_guess, args=(feature_list, purchase_list), method='SLSQP', 
                  constraints={'type':'eq', 'fun': likelihood_derivative_array, 'args':(feature_list, purchase_list)})
        theta = result.x
        if t % 100 == 1: print(theta)
    except: 
        print('error occured')
        theta = theta
    theta_list.append(theta)
    


[-0.22870528 -0.17935727 -0.65400813 -0.20605636 -0.23534029 -0.34605186
  0.33327695  0.17143666 -0.40880569 -0.07799873]
[-0.00141143  0.13714504 -0.08575865  0.10103516 -0.05731372 -0.01257677
 -0.0078417   0.02746426 -0.00866562  0.18567483]
[-0.07690536  0.03995909 -0.00101855  0.06878837  0.02134609 -0.0095121
 -0.05170432  0.04522771 -0.05705588  0.12051182]
[-0.04769646  0.05402247  0.00067224  0.08268005  0.01933419  0.00374535
 -0.04098286  0.01787088 -0.02862267  0.13319404]
[-0.00928362  0.02698569  0.00726601  0.06184916  0.05816884  0.00843404
 -0.01275227 -0.01151073 -0.01825658  0.13223609]
[-0.01760894  0.03523695  0.02333788  0.04357887  0.05670006 -0.00940703
 -0.03689488 -0.00539235  0.00510561  0.11311941]
[-0.02416429  0.0459545   0.03606601  0.05054745  0.0578339  -0.00669501
 -0.03200995  0.0037365   0.00312875  0.11492772]
[-0.01799888  0.05219038  0.0294003   0.02874532  0.07601689 -0.00589668
 -0.05404372 -0.00890669  0.00956768  0.11692867]
[-0.01473836  0.0

In [14]:
np.save('nonlinear_data/record/mle_true_revenue_list1.npy', np.array(revenue_list1))
np.save('nonlinear_data/record/mle_simulate_revenue_list2.npy', np.array(revenue_list2))
np.save('nonlinear_data/record/mle_purchase_revenue_list.npy', np.array(true_profit_list))