In [1]:
import numpy as np

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

%load_ext autoreload
%autoreload 2
import my_baselines
import utils

In [2]:
with open('train.json') as f:
    data = f.readlines()

import ast
data = [ast.literal_eval(x) for x in data]

In [3]:
data[0]

{'categories': [['Clothing, Shoes & Jewelry', 'Women'],
  ['Clothing, Shoes & Jewelry',
   'Novelty, Costumes & More',
   'Novelty',
   'Clothing',
   'Women',
   'Leggings']],
 'categoryID': 0,
 'helpful': {'nHelpful': 0, 'outOf': 0},
 'itemID': 'I402344648',
 'rating': 4.0,
 'reviewHash': 'R798569390',
 'reviewText': "The model in this picture has them rolled up at the top because they are actually very high waisted! that's my only complaint though, because they are very good quality, and fit really well! I am 5'2&#34; 120lbs with thick thighs and i love them i can't wait to wear them out!",
 'reviewTime': '09 26, 2013',
 'reviewerID': 'U490934656',
 'summary': 'High Waisted',
 'unixReviewTime': 1380153600}

In [4]:
item_popularity = {}
user_buy_count = {}
item2cat = {}

alpha = 0.
beta_user = {}
beta_item = {}
beta_category = {}

for d in data:
    reviewer = d['reviewerID']
    item = d['itemID']
    rating = d['rating']
    cat = d['categoryID']
    
    item2cat[item] = cat
    
    p = item_popularity.get(item, 0)
    item_popularity[item] = p + 1
    
    uc = user_buy_count.get(reviewer, 0)
    user_buy_count[reviewer] = uc + 1
    
    alpha += rating
    
    bu = beta_user.get(reviewer, (0., 0))
    bu = (bu[0]+rating, bu[1]+1)
    beta_user[reviewer] = bu
    
    bi = beta_item.get(item, (0., 0))
    bi = (bi[0]+rating, bi[1]+1)
    beta_item[item] = bi
    
    bc = beta_category.get(cat, (0., 0))
    bc = (bc[0]+rating, bc[1]+1)
    beta_category[cat] = bc

item_rank = sorted(item_popularity.items(), key=lambda x: x[1])
total_items = len(list(item_rank))
item_rank = {x[0]:rank/total_items for rank, x in enumerate(item_rank)}

user_rank = sorted(user_buy_count.items(), key=lambda x: x[1])
total_users = len(list(user_rank))
user_rank = {x[0]:rank/total_users for rank, x in enumerate(user_rank)}

alpha /= len(data)
beta_user = {r:(b[0]/b[1]) for r,b in beta_user.items()}
beta_item = {r:(b[0]/b[1]) for r,b in beta_item.items()}
beta_category = {r:(b[0]/b[1]) for r,b in beta_category.items()}

In [5]:
def getFeature(u, i):
    x = [1.]
    
    try:
        c = item2cat[i]
        x.append(item_rank[i]) # Item popularity rank
        x.append(beta_item[i]) # Item average rating
        x.append(beta_category[c]) # User rating on this category
    except:
        x.append(0.5)
        x.append(alpha)
        x.append(alpha)
        
    try:
        x.append(0.5) # User buy rank
        x.append(beta_user[u]) # User rating on this item
    except:
        x.append(total_users//2)
        x.append(alpha)
    
    return x

def getFeatures(X):
    return np.array([getFeature(x[0], x[1]) for x in X])

def create_batch(X, Y, batch_size):
    m = X.shape[0]
    n_batch = int(m / batch_size)

    X_batches = []
    Y_batches = []

    permutation = np.random.permutation(m)
    X_shuffle = X[permutation, :]
    Y_shuffle = Y[permutation, :]

    for i in range(n_batch):
        X_batch = X_shuffle[i * batch_size: (i+1) * batch_size, :]
        Y_batch = Y_shuffle[i * batch_size: (i+1) * batch_size, :]
        X_batches.append(X_batch)
        Y_batches.append(Y_batch)

    if m % n_batch != 0:
        X_batch = X_shuffle[n_batch * batch_size:, :]
        Y_batch = Y_shuffle[n_batch * batch_size:, :]
        X_batches.append(X_batch)
        Y_batches.append(Y_batch)
        n_batch += 1

    return X_batches, Y_batches, n_batch

In [6]:
trainX, trainY, valX, valY = utils.create_purchase_dataset(data)

In [7]:
trainX = getFeatures(trainX)
trainY = np.array(trainY).reshape(-1, 1)
valX = getFeatures(valX)
valY = np.array(valY).reshape(-1, 1)
trainX.shape, trainY.shape, valX.shape, valY.shape

((320000, 6), (320000, 1), (80000, 6), (80000, 1))

In [8]:
class Net(nn.Module):
    
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(6, 6)
        self.fc2 = nn.Linear(6, 20)
        self.fc3 = nn.Linear(20, 1)
        self.dropout = nn.Dropout(0.2)
        self.act = nn.Sigmoid()
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        x = self.act(x)
        return x

In [9]:
net = Net().to(device)

In [10]:
# criterion = nn.BCEWithLogitsLoss()
# optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-5)
optimizer = optim.Adam(net.parameters(), lr=0.001, betas=(0.9, 0.999))
criterion = nn.BCELoss()

In [11]:
def targetTensor(target):
    return torch.tensor(target, dtype=torch.float, device=device).view(-1, 1)

def inputTensor(x):
    return torch.tensor(x, dtype=torch.float, device=device)

In [12]:
def train(x, y):
    optimizer.zero_grad()
    output = net(x)
    loss = criterion(output, y)
    loss.backward()
    optimizer.step()
    return loss.item()

def trainIter(X, Y, n_iter, batch_size=16):
    for it in range(n_iter):
        X_batches, Y_batches, n_batch = create_batch(X, Y, batch_size)

        loss = 0.
        for i in range(n_batch):
            x = inputTensor(X_batches[i])
            y = targetTensor(Y_batches[i])
            loss += train(x, y)

        loss /= X.shape[0]
        print('#%d iters, loss:%f' % (it, loss))

In [13]:
trainIter(trainX, trainY, 2)

#0 iters, loss:0.043284
#1 iters, loss:0.043222


In [20]:
valX[0]

array([1.        , 0.31972482, 3.4       , 4.18738596, 0.5       ,
       4.        ])

In [26]:
with torch.no_grad():
    
#     x = inputTensor(np.expand_dims(valX[0], axis=0))
    x = inputTensor(trainX)
    output = net(x)
    print(output)

tensor([[0.5130],
        [0.4652],
        [0.5130],
        ...,
        [0.4231],
        [0.5130],
        [0.5130]], device='cuda:0')


In [27]:
x

tensor([[1.0000, 0.9356, 4.5833, 4.1874, 0.5000, 5.0000],
        [1.0000, 0.4709, 4.6667, 4.1874, 0.5000, 4.2000],
        [1.0000, 0.5393, 4.2857, 4.3452, 0.5000, 4.5000],
        ...,
        [1.0000, 0.9661, 4.5882, 4.3425, 0.5000, 3.4000],
        [1.0000, 0.9247, 3.9545, 4.1874, 0.5000, 4.3333],
        [1.0000, 0.6608, 4.4444, 4.1874, 0.5000, 4.6000]], device='cuda:0')

In [28]:
pred = output.cpu().numpy() >= 0

In [29]:
np.sum(pred == trainY) / len(trainY)

0.500740625

In [25]:
for name, param in net.named_parameters():
    if param.requires_grad:
        print (name, param.data)

fc1.weight tensor([[-0.3143,  0.3624, -0.1684, -0.0791, -0.1149, -0.1217],
        [-0.0982, -0.2248,  0.0130, -0.1965,  0.1113,  0.0917],
        [-0.2405,  0.0016, -0.3753, -0.0447,  0.3163,  0.4471],
        [ 0.3351, -0.1912, -0.0877, -0.0853, -0.1396, -0.3205],
        [-0.4717,  0.0196, -0.1316,  0.1604, -0.3708, -0.1256],
        [-0.2248, -0.1046,  0.5353,  0.0379, -0.1429, -0.4450]],
       device='cuda:0')
fc1.bias tensor([ 0.0269,  0.3400, -0.3544,  0.3181,  0.0734, -0.2188], device='cuda:0')
fc2.weight tensor([[-0.1412,  0.0296, -0.5414, -0.1227,  0.1369, -1.0179],
        [ 0.2476, -0.1059,  1.1096, -0.0697,  0.0423,  0.1180],
        [-0.1072, -0.2815, -0.0197, -0.1402, -0.1205, -0.3572],
        [ 0.1389, -0.2253, -0.4513, -0.3101,  0.2413, -0.3526],
        [ 0.2228, -0.1143, -0.5621, -0.3354, -0.3076, -0.3919],
        [ 0.2517, -0.1053, -0.2468, -0.0490, -0.0547, -0.0905]],
       device='cuda:0')
fc2.bias tensor([ 0.4461,  0.4104, -0.0471,  0.2917,  0.3641, -0.2492],