In [1]:
import collections
import pickle
import torch
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from matrix_factorization import MF, NCF_ours, NCF
from utils import *
from sklearn.metrics import roc_auc_score
import time
np.random.seed(2020)
torch.manual_seed(2020)

def mse_func(x, y): return np.mean((x-y)**2)

file = open("constructed_data", "rb")
constructed_data_train = pickle.load(file)
constructed_data_test = pickle.load(file)
x_tr = pickle.load(file)
t_tr = pickle.load(file)
c_tr = pickle.load(file)
y_tr = pickle.load(file)
file.close()

# 用户的数量
num_user = int(max(x_tr[:, 0]) + 1)

# 视频的数量
num_item = int(max(x_tr[:, 1]) + 1)

In [2]:
# 嵌入维度
embedding_k = 64

# 惩罚
C = 0.4

# 预算
budget = 0.4

# 批量大小
batch_size = 8192

# 场景
scenario = 'coupon'

# 测试集
x_te = constructed_data_test[:, :2]

# 初始化奖励数组
R = np.zeros(constructed_data_test.shape[0])

if scenario == 'cash':
    for i in range(constructed_data_test.shape[0]):
        if constructed_data_test[:, 6][i] == 4:
            R[i] = 1
        elif constructed_data_test[:, 6][i] == 3 or constructed_data_test[:, 6][i] == 5:
            R[i] = -C
if scenario == 'coupon':
    for i in range(constructed_data_test.shape[0]):
        if constructed_data_test[:, 6][i] == 4:
            R[i] = 1
        elif constructed_data_test[:, 6][i] == 5:
            R[i] = -C

            
# 将奖励添加到测试集中
constructed_data_test = np.c_[constructed_data_test, R]   

In [3]:
# 使用神经协同过滤（NCF）模型来训练和预测用户行为

# 创建和训练第一个模型（y0or）
ncf_y0or = NCF(num_user, num_item, embedding_k = embedding_k)
ncf_y0or.cuda()
ncf_y0or.fit(x_tr[t_tr == 0], y_tr[t_tr == 0], lamb = 1e-5, lr = 0.01, batch_size = 8192)
y0_or, _ = ncf_y0or.predict(x_tr)
y0_or_te, _ = ncf_y0or.predict(x_te)

# 创建和训练第二个模型（y1or）
ncf_y1or = NCF(num_user, num_item, embedding_k = embedding_k)
ncf_y1or.cuda()
ncf_y1or.fit(x_tr[t_tr == 1], y_tr[t_tr == 1], lamb = 1e-5, lr = 0.01, batch_size = 8192)
y1_or, _  = ncf_y1or.predict(x_tr)
y1_or_te, _  = ncf_y1or.predict(x_te)

# 计算奖励
reward_or = (1 + C) * (y1_or_te - y0_or_te) - C
temp_or = np.c_[constructed_data_test, reward_or]

# 根据奖励值的高低排序
temp_or = temp_or[np.argsort(-temp_or[:, -1])]

[NCF] epoch:30, xent:11.713759675621986
[NCF] epoch:22, xent:21.279595658183098


In [4]:
# 原始奖励
reward_naive = y1_or_te - C
temp_naive = np.c_[constructed_data_test, reward_naive]
temp_naive = temp_naive[np.argsort(-temp_naive[:, -1])]

In [5]:
ncf = NCF(num_user, num_item, embedding_k = embedding_k)
ncf.cuda()
ncf.fit(x_tr[t_tr == 1], c_tr[t_tr == 1], lamb = 1e-5, lr = 0.01, batch_size = 8192)

# 对c1进行预测
c1_te, _ = ncf.predict(x_te)

# 计算不同情况下的预测结果
y1_or_c1_te = c1_te * y1_or_te
y1_or_c0_te = (1-c1_te) * y1_or_te
y0_or_c1_te = c1_te * (1-y1_or_te)
y0_or_c0_te = (1-c1_te) * (1-y1_or_te)

# 不同类别的概率
pa_or = y0_or_c0_te
pb_or = y1_or_c0_te
pc_or = y0_or_c1_te
pe_or = y0_or_te - y1_or_c0_te
pd_or = y1_or_c1_te - pe_or


if scenario == 'cash':
    cost = np.array([0, 0, -C, 1, -C])
    
if scenario == 'coupon':
    cost = np.array([0, 0, 0, 1, -C])    

    

p_or = np.c_[pa_or, pb_or, pc_or, pd_or, pe_or]    

# 定义总奖励
reward_or = np.sum(p_or * cost, axis = 1)

# 对奖励值进行降序排序，确保奖励高的样本在前
temp_or_ours = np.c_[constructed_data_test, reward_or]
temp_or_ours = temp_or_ours[np.argsort(-temp_or_ours[:, -1])]

[NCF] epoch:15, xent:22.684517860412598


In [6]:
temp_5 = temp_or_ours[:int(min(budget, sum(temp_or_ours[:, -1] > 0)/temp_or_ours.shape[0]) * temp_or_ours.shape[0])]
a = sum(temp_5[:, 6] == 1)
b = sum(temp_5[:, 6] == 2)
c = sum(temp_5[:, 6] == 3)
d = sum(temp_5[:, 6] == 4)
e = sum(temp_5[:, 6] == 5)
Real_reward = sum(temp_5[:, -2])
print(a, b, c, d, e, Real_reward)

74426 15778 18467 70627 22730 61534.999999971005


In [7]:
temp_5 = temp_or[:int(min(budget, sum(temp_or[:, -1] > 0)/temp_or.shape[0]) * temp_or.shape[0])]
a = sum(temp_5[:, 6] == 1)
b = sum(temp_5[:, 6] == 2)
c = sum(temp_5[:, 6] == 3)
d = sum(temp_5[:, 6] == 4)
e = sum(temp_5[:, 6] == 5)
Real_reward = sum(temp_5[:, -2])
print(a, b, c, d, e, Real_reward)

36698 4196 5784 37296 7780 34183.999999994245


In [8]:
temp_5 = temp_naive[:int(min(budget, sum(temp_naive[:, -1] > 0)/temp_naive.shape[0]) * temp_naive.shape[0])]
a = sum(temp_5[:, 6] == 1)
b = sum(temp_5[:, 6] == 2)
c = sum(temp_5[:, 6] == 3)
d = sum(temp_5[:, 6] == 4)
e = sum(temp_5[:, 6] == 5)
Real_reward = sum(temp_5[:, -2])
print(a, b, c, d, e, Real_reward)

14322 75719 50364 4172 57451 -18808.40000000552


In [9]:
ncf_ps = NCF(num_user, num_item, embedding_k = embedding_k)
ncf_ps.cuda()
ncf_ps.fit(x_tr, t_tr, lamb = 1e-5, lr = 0.01, batch_size = 8192)
ps_tr, _ = ncf_ps.predict(x_tr)
ps_tr = np.clip(ps_tr, 0.1, 0.9)

[NCF] epoch:37, xent:22.491099908947945


In [10]:
ncf_y0ips = NCF(num_user, num_item, embedding_k = embedding_k)
ncf_y0ips.cuda()
ncf_y0ips.fit(x_tr, (1-t_tr) * (y_tr/(1-ps_tr)), lamb = 1e-5, lr = 0.01, batch_size = 8192)
y0_ips, _  = ncf_y0ips.predict(x_tr)
y0_ips_te, _  = ncf_y0ips.predict(x_te)

ncf_y1ips = NCF(num_user, num_item, embedding_k = embedding_k)
ncf_y1ips.cuda()
ncf_y1ips.fit(x_tr, t_tr * y_tr/(ps_tr), lamb = 1e-5, lr = 0.01, batch_size = 8192)
y1_ips, _  = ncf_y1ips.predict(x_tr)
y1_ips_te, _  = ncf_y1ips.predict(x_te)

reward_ips = (1 + C) * (y1_ips_te - y0_ips_te) - C
temp_ips = np.c_[constructed_data_test, reward_ips]
temp_ips = temp_ips[np.argsort(-temp_ips[:, -1])]

[NCF] epoch:23, xent:126.20267647504807
[NCF] epoch:20, xent:149.22846603393555


In [11]:
y1_ips_c1_te = c1_te * y1_ips_te
y1_ips_c0_te = (1-c1_te) * y1_ips_te
y0_ips_c1_te = c1_te * (1-y1_ips_te)
y0_ips_c0_te = (1-c1_te) * (1-y1_ips_te)

pa_ips = y0_ips_c0_te
pb_ips = y1_ips_c0_te
pc_ips = y0_ips_c1_te
pe_ips = y0_ips_te - y1_ips_c0_te
pd_ips = y1_ips_c1_te - pe_ips

if scenario == 'cash':
    cost = np.array([0, 0, -C, 1, -C])
    
if scenario == 'coupon':
    cost = np.array([0, 0, 0, 1, -C])    
    
p_ips = np.c_[pa_ips, pb_ips, pc_ips, pd_ips, pe_ips]    
    
reward_ips = np.sum(p_ips * cost, axis = 1)

temp_ips_ours = np.c_[constructed_data_test, reward_ips]
temp_ips_ours = temp_ips_ours[np.argsort(-temp_ips_ours[:, -1])]

In [12]:
temp_5 = temp_ips_ours[:int(min(budget, sum(temp_ips_ours[:, -1] > 0)/temp_ips_ours.shape[0]) * temp_ips_ours.shape[0])]
a = sum(temp_5[:, 6] == 1)
b = sum(temp_5[:, 6] == 2)
c = sum(temp_5[:, 6] == 3)
d = sum(temp_5[:, 6] == 4)
e = sum(temp_5[:, 6] == 5)
Real_reward = sum(temp_5[:, -2])
print(a, b, c, d, e, Real_reward)

82899 7042 13417 81492 17178 74620.80000003369


In [13]:
temp_5 = temp_ips[:int(min(budget, sum(temp_ips[:, -1] > 0)/temp_ips.shape[0]) * temp_ips.shape[0])]
a = sum(temp_5[:, 6] == 1)
b = sum(temp_5[:, 6] == 2)
c = sum(temp_5[:, 6] == 3)
d = sum(temp_5[:, 6] == 4)
e = sum(temp_5[:, 6] == 5)
Real_reward = sum(temp_5[:, -2])
print(a, b, c, d, e, Real_reward)

81461 8558 14075 79336 18598 71896.80000001728


In [14]:
ncf_y0dr = NCF(num_user, num_item, embedding_k = embedding_k)
ncf_y0dr.cuda()
ncf_y0dr.fit(x_tr, ((1-t_tr) * (y_tr/(1-ps_tr) - y0_or/(1-ps_tr)) + y0_or), 
             lamb = 1e-5, lr = 0.01, batch_size = 8192)
y0_dr_te,_ = ncf_y0dr.predict(x_te)

ncf_y1dr = NCF(num_user, num_item, embedding_k = 64)
ncf_y1dr.cuda()
ncf_y1dr.fit(x_tr, (t_tr * (y_tr/(ps_tr) - y1_or/(ps_tr)) + y1_or),
             lamb = 1e-5, lr = 0.01, batch_size = 8192)
y1_dr_te,_ = ncf_y1dr.predict(x_te)

reward_dr = (1 + C) * (y1_dr_te - y0_dr_te) - C
temp_dr = np.c_[constructed_data_test, reward_dr]
temp_dr = temp_dr[np.argsort(-temp_dr[:, -1])]

[NCF] epoch:25, xent:89.36724120378494
[NCF] epoch:15, xent:80.07591837644577


In [15]:
y1_dr_c1_te = c1_te * y1_dr_te
y1_dr_c0_te = (1-c1_te) * y1_dr_te
y0_dr_c1_te = c1_te * (1-y1_dr_te)
y0_dr_c0_te = (1-c1_te) * (1-y1_dr_te)

pa_dr = y0_dr_c0_te
pb_dr = y1_dr_c0_te
pc_dr = y0_dr_c1_te
pe_dr = y0_dr_te - y1_dr_c0_te
pd_dr = y1_dr_c1_te - pe_dr

if scenario == 'cash':
    cost = np.array([0, 0, -C, 1, -C])
    
if scenario == 'coupon':
    cost = np.array([0, 0, 0, 1, -C])    
    
p_dr = np.c_[pa_dr, pb_dr, pc_dr, pd_dr, pe_dr]    
    
reward_dr = np.sum(p_dr * cost, axis = 1)

temp_dr_ours = np.c_[constructed_data_test, reward_dr]
temp_dr_ours = temp_dr_ours[np.argsort(-temp_dr_ours[:, -1])]

In [16]:
temp_5 = temp_dr_ours[:int(min(budget, sum(temp_dr_ours[:, -1] > 0)/temp_dr_ours.shape[0]) * temp_dr_ours.shape[0])]
a = sum(temp_5[:, 6] == 1)
b = sum(temp_5[:, 6] == 2)
c = sum(temp_5[:, 6] == 3)
d = sum(temp_5[:, 6] == 4)
e = sum(temp_5[:, 6] == 5)
Real_reward = sum(temp_5[:, -2])
print(a, b, c, d, e, Real_reward)

83219 6862 14685 81560 15702 75279.20000004367


In [17]:
temp_5 = temp_dr[:int(min(budget, sum(temp_dr[:, -1] > 0)/temp_dr.shape[0]) * temp_dr.shape[0])]
a = sum(temp_5[:, 6] == 1)
b = sum(temp_5[:, 6] == 2)
c = sum(temp_5[:, 6] == 3)
d = sum(temp_5[:, 6] == 4)
e = sum(temp_5[:, 6] == 5)
Real_reward = sum(temp_5[:, -2])
print(a, b, c, d, e, Real_reward)

72513 4532 11023 73631 11183 69157.80000000223


In [18]:
ncf = NCF_ours(num_user, num_item, embedding_k = embedding_k)
ncf.cuda()
ncf.fit(x_tr, t_tr, c_tr, y_tr, eta = 0.5, alpha1 = 5, alpha = 1, beta = 1, theta = 5, gamma = 5, rho = 5, thr = 0.1,
       lr=0.01,
       batch_size=8192,
       lamb=1e-5,
       tol=1e-5,
       verbose=False)

if scenario == 'cash':
    cost = np.array([0, 0, -C, 1, -C])
    
if scenario == 'coupon':
    cost = np.array([0, 0, 0, 1, -C])

pred_class = ncf.predict(constructed_data_test[:, :2]).reshape([-1, 5])
reward_ours = np.sum(pred_class * cost, axis = 1)

temp_ours = np.c_[constructed_data_test, reward_ours]
temp_ours = temp_ours[np.argsort(-temp_ours[:, -1])]

[NCF] epoch:37, xent:10288872.5078125


In [19]:
temp_5 = temp_ours[:int(min(budget, sum(temp_ours[:, -1] > 0)/temp_ours.shape[0]) * temp_ours.shape[0])]
a = sum(temp_5[:, 6] == 1)
b = sum(temp_5[:, 6] == 2)
c = sum(temp_5[:, 6] == 3)
d = sum(temp_5[:, 6] == 4)
e = sum(temp_5[:, 6] == 5)
Real_reward = sum(temp_5[:, -2])
print(a, b, c, d, e, Real_reward)

89874 42 9582 90201 12329 85269.40000006242
