In [8]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as opt
# from tqdm import tqdm
from tqdm import tqdm_notebook as tqdm
from heapq import heappush, heappop
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import math
import evaluation
import data_loader
# from model import MF, MUD
# import pdb
# import torchsnooper

In [9]:
class MF(nn.Module):
    """
        - userLen: the number of users
        - itemLen: the number of items
        - params: the parameters dict used for constructing model
            - l_size: latent dimension size
            - gpu: True/False, whether using GPU
            
    """
    def __init__(self, userLen, itemLen, avg_rating, params):
        super(MF, self).__init__()
        self.userNum = userLen
        self.itemNum = itemLen
        self.params = params
        if 'gpu' in params and params['gpu'] == True:
            self.device = 'cuda:0'
        else:
            self.device = 'cpu'

        l_size = params['l_size']
        
        """
            Initialize  global bias,
                        user bias,
                        item bias,
                        user embedding,
                        item embedding
        """
        self.globalBias = avg_rating.to(self.device)
        
        self.uBias = nn.Embedding(userLen,1).to(self.device)
        self.uBias.weight.data = torch.zeros_like(self.uBias.weight.data)
        
        self.itemBias = nn.Embedding(itemLen,1).to(self.device)
        self.itemBias.weight.data = torch.zeros_like(self.itemBias.weight.data)
        
#         a = math.sqrt(2.0 / params['l_size'])
        self.uEmbed = nn.Embedding(userLen, l_size).to(self.device)
        self.uEmbed.weight.data.uniform_(-1, 1)
        
        self.itemEmbed = nn.Embedding(itemLen, l_size).to(self.device)
        self.itemEmbed.weight.data.uniform_(-1, 1)
    
    def forward(self, users, items):
        uE = self.uEmbed(users)
#         print('uE:',uE.shape)
        uB = self.uBias(users)
#         print('uB:',uB.shape)
        iE = self.itemEmbed(items)
#         print('iE:',iE.shape)
        iB = self.itemBias(items)
#         print('iB:',iB.shape)
#         gB = self.globalBias.expand(users.shape[0],1)
#         print('mul(uE, iE):',(torch.mul(uE, iE).sum(1)).shape)
        score = self.globalBias + uB + iB + torch.mul(uE, iE).sum(1).view(-1,1)
#         print((self.globalBias + uB + iB).shape)
#         print(((uE*iE).sum(1)).shape)
#         score = (self.globalBias + uB + iB).view(-1) + (uE*iE).sum(1)
#         score = (uE*iE).sum(1)
#         print(score.shape)
        return score
        

In [10]:
params = dict()
params['lr'] = 1e-1
params['negNum_train'] = 3
params["negNum_test"] = 200
params['epoch_limit'] = 20
params['w_decay'] = 0
params['batch_size'] = 64
params['gpu'] = False
params['l_size'] = 16
params['epsilon'] = 0.01

In [11]:
category = 'Baby'

In [12]:
train, val, test = data_loader.read_data(category)
item_price = data_loader.get_price(category)
item_related = data_loader.get_related(category)
distribution = data_loader.get_distribution(category)

In [13]:
print('Loading training, validation set...')
trainset = data_loader.TransactionData(train, item_related, \
            item_price, distribution)
valset = data_loader.UserTransactionData(val, item_price, \
            trainset.itemNum, trainset.userHist)
print('Finish loading training and validation set.')
print('Loading Testing set...')
testset = data_loader.UserTransactionData(test, item_price, \
            trainset.itemNum, trainset.userHist)
print('Finish loading testing set.')
avg_rating = trainset.get_avgRating()
print('Average rating score of training set: %.2f' %avg_rating)



Loading training, validation set...
Finish loading training and validation set.
Loading Testing set...
Finish loading testing set.
Average rating score of training set: 4.20


In [14]:
print('Finish data loading, start model preparing...')
model = MF(userLen = trainset.userNum, itemLen = trainset.itemNum,\
        avg_rating = avg_rating, params = params)
optimizer = opt.SGD(model.parameters(), lr = params['lr'], weight_decay = params['w_decay'])
criterion = nn.MSELoss()

trainset.set_negN(params['negNum_train'])
trainLoader = DataLoader(trainset, batch_size = params['batch_size'], \
        shuffle = True, num_workers = 0)

valset.set_negN(params["negNum_test"])
valLoader = DataLoader(valset, batch_size = 1, shuffle = False, num_workers = 0)

testset.set_negN(params["negNum_test"])
testLoader = DataLoader(testset, batch_size = 1, shuffle = False, num_workers = 0)

Finish data loading, start model preparing...


In [15]:
runningLoss = []
printLoss = []
epoch = 0
while epoch < params['epoch_limit']:
    epoch += 1
    L = len(trainLoader.dataset)
    pbar = tqdm(total = L)
    for i, batchData in enumerate(trainLoader):
        users = torch.LongTensor(batchData['user'])#.to(Rating.device)
#         print(users.shape)
        items = torch.LongTensor(batchData['item'])#.to(Rating.device)
#         print(items.shape)
        pre_r = model.forward(users, items).view(-1)
#         print(pre_r.shape)
        r = torch.FloatTensor(batchData['rating'])#.to(Rating.device)
#         print(r)
        
        loss = criterion(pre_r,r)
        optimizer.zero_grad()
#         print(loss)
        runningLoss.append(loss.item())
        if (i+1) % 50 == 0:
            printLoss.append(np.mean(np.array(runningLoss[-50:])))
        if (i+1) >= 50:
            pbar.set_postfix({'loss' : '{0:1.5f}'.format(np.mean(np.array(runningLoss[-50:])))})
            
        loss.backward()
        optimizer.step()
        
        pbar.update(users.shape[0])
    pbar.close()
    
#     with torch.no_grad():
#         L = len(valLoader.dataset)
#         pbar = tqdm(total = L)
#         scoreDict = dict()
#         for i, batchData in enumerate(valLoader):
#             if i > 1000:
#                 break
#             user = torch.LongTensor(batchData['user']).to(model.device)
#             posItems = torch.LongTensor(batchData['posItem']).to(model.device)
#             negItems = torch.LongTensor(batchData['negItem']).to(model.device)
#             items = torch.cat((posItems, negItems),1).view(-1)
#             users = user.expand(items.shape[0])
#             out = model.forward(users,items)
#             # print("user: \n" + str(user))
#             # print("posItems: \n" + str(posItems))
#             # print("negItems: \n" + str(negItems))
#             # print("items: \n" + str(items))
#             # print("users: \n" + str(users))
#             # print("out: \n" + str(out))
#             # print("model: \n" + str(model.))
#             # input()
#             # pdb.set_trace()
#             scoreHeap = list()
#             for j in range(out.shape[0]):
#                 gt = False
#                 if j < posItems.shape[1]:
#                     gt = True
#                 # if prices[j] > budget:
#                 #     heappush(scoreHeap, (100, (0 + items[j].cpu().numpy(), gt)))
#                 # else:
#                 #     heappush(scoreHeap, (1 - out[j].cpu().numpy(), (0 + items[j].cpu().numpy(), gt)))
#                 heappush(scoreHeap, (1 - out[j].cpu().numpy(), (0 + items[j].cpu().numpy(), gt)))
#             scores = list()
#             candidate = len(scoreHeap)
#             for k in range(candidate):
#                 scores.append(heappop(scoreHeap))
#             pbar.update(1)
#             scoreDict[user[0]] = (scores, posItems.shape[1])
#         pbar.close()
#     valResult = evaluation.ranking_performance(scoreDict,10)































































[19853, 3819, 4], [12437, 12559, 5], [18452, 12559, 5], 
[18713, 12559, 5], [6165, 12559, 5], [2471, 12559, 4],
[16993, 34058, 2]

In [17]:
model.forward(torch.LongTensor([19853,12437,18452,18713,6165,2471,16993]),
              torch.LongTensor([3819,12559,12559,12559,12559,12559,34058]))

tensor([[4.3069],
        [4.1740],
        [4.9030],
        [4.8148],
        [3.4789],
        [4.5673],
        [3.8936]], grad_fn=<AddBackward0>)

In [36]:
import analyse

L = len(testLoader.dataset)
pbar = tqdm(total = L)
model.eval()
with torch.no_grad():
    scoreDict = dict()
    for i, batchData in enumerate(testLoader):

        # iterate through each user

        # get input
        users = torch.LongTensor(batchData['user']).to(model.device) # users
#         print(users.shape)
        items = torch.LongTensor(batchData['posItem']).to(model.device) # items
#         print(items.shape)
        negItems = torch.LongTensor(batchData['negItem']).to(model.device) # items
#         print(negItems.shape)
        
        nNeg = params["negNum_test"] 
        items = items.view(-1)
#         print(items.shape)
        posUsers = users.view(-1).expand(items.shape[0])
#         print(users.shape)
        
        nNeg = params["negNum_test"] 
#             labels = torch.FloatTensor(batchData["label"]).to(model.device) # ratings
        # forward for possitive samples
        pOut = model.forward(posUsers, items) # ranking score of positive samples
        pOut = pOut.view(-1)
#         print(pOut)
        
        # forward for negative samples
        npUsers = users.view(-1).expand(negItems.shape[0])
        negItems = negItems.view(-1)
#         print(negItems.shape)
         
        npUsers = npUsers.expand(npUsers.shape[0], nNeg) # users expanded to match negative samples
#         print(npUsers.shape)
        nOut = model.forward(npUsers.reshape(-1), negItems.reshape(-1)) # ranking score of negative samples
        nOut = nOut.reshape(-1)
#         print(nOut.shape)
        
        # record scores
        scoreHeap = list()
        for j in range(pOut.shape[0]):
            gt = True
            heappush(scoreHeap, (1 - pOut[j].cpu().numpy(), (0 + items[j].cpu().numpy(), gt)))
        for j in range(nOut.shape[0]):
            gt = False
            heappush(scoreHeap, (1 - nOut[j].cpu().numpy(), (0 + negItems[j].cpu().numpy(), gt)))
        scores = list()
        for k in range(len(scoreHeap)):
            scores.append(heappop(scoreHeap))
        # progress report
        pbar.update(1)
        scoreDict[users[0]] = (scores, items.shape[0])
pbar.close()

# save validation result
analyse.ranking_analysis(scoreDict, 100)

	Precision@: {1:0.00364108144304; 5: 0.00429396501214; 10: 0.00443626014899}
	Recall@: {1:0.00246324374453; 5: 0.0149880222879; 10: 0.0312779252044}
	NDCG@: {1:0.00364108144304; 5: 0.0107420319783; 10: 0.0163403002463}


{'avg_ndcg': array([ 0.00364108,  0.00615217,  0.00774492,  0.00930256,  0.01074203,
         0.01205546,  0.01321699,  0.01430996,  0.01539065,  0.0163403 ,
         0.01732194,  0.01854086,  0.01957377,  0.02049086,  0.02145732,
         0.02255016,  0.02369756,  0.02462053,  0.02559351,  0.02655857,
         0.02745765,  0.02830011,  0.02926516,  0.03029609,  0.03111665,
         0.03223294,  0.03322141,  0.03409447,  0.03490758,  0.03577644,
         0.03668003,  0.0375892 ,  0.03856583,  0.03938466,  0.0402672 ,
         0.04110878,  0.04204995,  0.04298151,  0.04390029,  0.04479513,
         0.04572542,  0.04671051,  0.04774645,  0.04858103,  0.04945726,
         0.05038287,  0.05137249,  0.05235649,  0.05349036,  0.0544058 ,
         0.05529926,  0.05632381,  0.05729758,  0.05830593,  0.0593414 ,
         0.06026597,  0.06135209,  0.06244597,  0.06342913,  0.06447865,
         0.06549584,  0.06651716,  0.06750972,  0.0684365 ,  0.06945526,
         0.0705972 ,  0.07166506,  0.07

In [37]:
L = len(testLoader.dataset)
pbar = tqdm(total = L)
model.eval()
with torch.no_grad():
    scoreDict = dict()
    for i, batchData in enumerate(testLoader):
        user = torch.LongTensor(batchData['user']).to(model.device)
        posItems = torch.LongTensor(batchData['posItem']).to(model.device)
        negItems = torch.LongTensor(batchData['negItem']).to(model.device)
        items = torch.cat((posItems, negItems),1).view(-1)
        users = user.expand(items.shape[0])
        out = model.forward(users,items)
        scoreHeap = list()
        for j in range(out.shape[0]):
            gt = False
            if j < posItems.shape[1]:
                gt = True
            # if prices[j] > budget:
            #     heappush(scoreHeap, (100, (0 + items[j].cpu().numpy(), gt)))
            # else:
            #     heappush(scoreHeap, (1 - out[j].cpu().numpy(), (0 + items[j].cpu().numpy(), gt)))
            heappush(scoreHeap, (1 - out[j].cpu().numpy(), (0 + items[j].cpu().numpy(), gt)))
        scores = list()
        candidate = len(scoreHeap)
        for k in range(candidate):
            scores.append(heappop(scoreHeap))
        pbar.update(1)
        scoreDict[user[0]] = (scores, posItems.shape[1])
    pbar.close()
testResult = evaluation.ranking_performance(scoreDict,10)

	Precision@: {1:0.00397589352976; 5: 0.00424374319913; 10: 0.00458274043693}
	Recall@: {1:0.00302533278594; 5: 0.0150665711985; 10: 0.0321772308109}
	NDCG@: {1:0.00397589352976; 5: 0.0110482850398; 10: 0.0169196694632}


In [None]:
i = 0
for k,v in scoreDict.items():
    print (v)
    i += 1
    if i == 10:
        break

In [17]:
b = torch.LongTensor([[1,10,1100,23000,20000],[1,10,1100,23000,20000],[1,10,1100,23000,20000]])
a = torch.LongTensor([[20000,9999,776,28,1000],[20000,9999,776,28,1000],[20000,9999,776,28,1000]])

In [18]:
print(torch.mul(a, b).sum(1).view(-1,1))

tensor([[21617590],
        [21617590],
        [21617590]])


In [19]:
b.shape

torch.Size([3, 5])

In [20]:
c = b.view(-1,1)
d = b.view(-1)

In [22]:
torch.mul(c,d).shape

torch.Size([15, 15])

In [23]:
(c*d).shape

torch.Size([15, 15])

In [26]:
(c+d).shape

torch.Size([15, 15])