In [1]:
from collections import defaultdict
import csv
import scipy
import scipy.optimize
import random
import numpy as np
import time

import torch
from torch import nn
import torch.nn.functional as F
from torch.autograd import Variable

import pandas as pd 
from IPython.display import display

### Experiment 1 - Alpha-only

In [2]:
class Alpha_Only(nn.Module):
    
    def __init__(self, mean=0):
        super(Alpha_Only, self).__init__()
        
        # alpha only
        self.bias = nn.Parameter(torch.ones(1))
    
    def forward(self, train_x):
        item_id = train_x[:, 0]
        user_id = train_x[:, 1]
        
        prediction = (self.bias)
        
        return prediction
    
    def loss(self, prediction, target):
        return F.mse_loss(prediction, target.squeeze())

### Experiment 2 - Alpha_Theta

In [22]:
class Alpha_Theta(nn.Module):
    
    def __init__(self, mean=0):
        super(Alpha_Theta, self).__init__()
        
        # alpha + theta only
        self.theta = nn.Parameter(torch.ones(1))
        self.bias = nn.Parameter(torch.ones(1))
    
    def forward(self, train_x):
        freq = train_x[:, 2].float()
        
        prediction = (self.bias)+self.theta*freq
        
        return prediction
    
    def loss(self, prediction, target):
        return F.mse_loss(prediction, target.squeeze())

### Experiment 3 - Alpha_Theta_MF

In [28]:
class MF_theta(nn.Module):
    
    def __init__(self, n_user, n_item, k=1):
        super(MF_theta, self).__init__()
        self.k = k
        self.n_user = n_user
        self.n_item = n_item
        
        # gammas (users and items)
        self.user = nn.Embedding(n_user, k)
        self.item = nn.Embedding(n_item, k)
        
        # alpha and betas (users and items)
        self.bias_user = nn.Embedding(n_user, 1)
        self.bias_item = nn.Embedding(n_item, 1)
        
        self.theta = nn.Parameter(torch.ones(1))
        self.bias = nn.Parameter(torch.ones(1))
    
    def forward(self, train_x):
        item_id = train_x[:, 0]
        user_id = train_x[:, 1]
        freq = train_x[:, 2].float()
        vector_user = self.user(user_id)
        vector_item = self.item(item_id)
        
        # Pull out biases
        bias_user = self.bias_user(user_id).squeeze()
        bias_item = self.bias_item(item_id).squeeze()
        biases = (self.bias + self.theta*freq + bias_user + bias_item)
        
        ui_interaction = torch.sum(vector_user * vector_item, dim=1)
        
        # Add bias prediction to the interaction prediction
        prediction = ui_interaction + biases
        return prediction
    
    def loss(self, prediction, target):
        return F.mse_loss(prediction, target.squeeze())

In [4]:
data = pd.read_csv("../datasets/google_local/reviews_freq.csv")
display(data.info())
display(data.head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11453845 entries, 0 to 11453844
Data columns (total 5 columns):
gPlusPlaceId      int64
gPlusUserId       int64
rating            float64
unixReviewTime    object
num_reviews       int64
dtypes: float64(1), int64(3), object(1)
memory usage: 436.9+ MB


None

Unnamed: 0,gPlusPlaceId,gPlusUserId,rating,unixReviewTime,num_reviews
0,1368311,0,3.0,1372686659,3
1,370282,1,5.0,1342870724,3
2,237940,2,5.0,1390653513,1
3,249417,2,5.0,1389187706,2
4,1181533,2,4.0,1390486279,1


In [10]:
n_user = len(data['gPlusUserId'].unique())
n_place = len(data['gPlusPlaceId'].unique())

print(n_user,n_place)

5054567 3116785


In [6]:
# Shuffle the data
shuffled_data = data.sample(frac=1).reset_index(drop=True)
shuffled_data.head()

N = shuffled_data.index.size

train_split = int(N * 0.70)
valid_split =  int(N * 0.85)

train_x = shuffled_data.loc[:train_split, ['gPlusPlaceId','gPlusUserId','num_reviews']]
train_y = shuffled_data.loc[:train_split, 'rating':'rating']
valid_x = shuffled_data.loc[train_split+1:valid_split, ['gPlusPlaceId','gPlusUserId','num_reviews']]
valid_y = shuffled_data.loc[train_split+1:valid_split, 'rating':'rating']
test_x = shuffled_data.loc[valid_split+1:, ['gPlusPlaceId','gPlusUserId','num_reviews']]
test_y = shuffled_data.loc[valid_split+1:, 'rating':'rating']

print(N, train_x.index.size, valid_x.index.size,test_x.index.size)

11453845 8017692 1718077 1718076


In [7]:
train_x[:10]

Unnamed: 0,gPlusPlaceId,gPlusUserId,num_reviews
0,1362493,1883179,44
1,1244850,3217020,45
2,771251,1608666,12
3,1830167,276218,22
4,1222900,848399,4
5,1098674,660751,12
6,1769168,2721991,28
7,1175141,4882577,17
8,249338,540961,21
9,1860408,1892466,10


In [29]:
# Hyperparameters
lr = 1e-2
lamb = 1e-6
k=1
batch_size = 1024

cuda = torch.cuda.is_available()
print (cuda)

True


In [31]:
from random import shuffle


# This code utilizes ignite engine's create_supervised_trainer()
# But we need something more basic

# model = MF(n_user, n_item, k=k)
# model = Bias_Only(n_user, n_item)

# Experiment 1 - model = Alpha_Only()
# Experiment 2 - model = Alpha_Theta()

model = MF_theta(n_user, n_place, k=k)

if cuda:
    model.cuda()
    
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=lamb)

def chunks(X, Y, size):
    """Yield successive n-sized chunks from l."""
    starts = list(range(0, len(X), size))
    shuffle(starts)
    for i in starts:
        yield (X[i:i + size], Y[i:i + size])
        
# To keep track to best hyperparameters and results
best_loss = 0
best = []

losses = []
valid_losses = []

for epoch in range(10+1):
    
    i = 0
    for feature, target in chunks(np.array(train_x), np.array(train_y), batch_size):
        # This zeros the gradients on every parameter. 
        # This is easy to miss and hard to troubleshoot.
        optimizer.zero_grad()
        # Convert 
        feature = Variable(torch.from_numpy(feature))
        target = Variable(torch.from_numpy(target).type(torch.FloatTensor))
        
        if cuda:
            feature = feature.cuda()
            target = target.cuda()
            
        # model in training mode    
        model.train()
            
        # Compute a prediction for these features
        prediction = model.forward(feature)
        # Compute a loss given what the true target outcome was
        loss = model.loss(prediction, target)
        # break
        # Backpropagate: compute the direction / gradient every model parameter
        # defined in your __init__ should move in in order to minimize this loss
        # However, we're not actually changing these parameters, we're just storing
        # how they should change.

        loss.backward()
        # Now take a step & update the model parameters. The optimizer uses the gradient at 
        # defined on every parameter in our model and nudges it in that direction.
        optimizer.step()
        
        if i%1000 == 0 and epoch%1 == 0:
            print("Epoch[{}] Iteration[{}] Training Loss: {:.2f}".format(epoch, i, loss.data))

        # Record the loss per example
        losses.append(loss.cpu().data.numpy() / len(feature))
        
        if i%1000 == 0 and epoch%1 == 0:
            
            val_feature = torch.from_numpy(np.array(valid_x))
            val_target = torch.from_numpy(np.array(valid_y)).type(torch.FloatTensor)
            
            if cuda:
                val_feature = val_feature.cuda()
                val_target = val_target.cuda()
                
            # model in test mode    
            model.eval()

            val_pred = model.forward(val_feature)
            vloss = model.loss(val_pred, val_target)
            print("Epoch[{}] Validation Loss: {:.3f} ".format(epoch, vloss.data))
            
            # Record the validation loss per example
            valid_losses.append(vloss.cpu().data.numpy()/len(val_feature))
            
            if best_loss is 0:
                best_loss = vloss
                best = [vloss,lr,lamb]
                print("Save best theta...")
            else:
                if vloss < best_loss:
                    best_loss = vloss
                    best = [vloss,lr,lamb]
                    print("Save best theta...")
        i += 1

Epoch[0] Iteration[0] Training Loss: 2038.10
Epoch[0] Validation Loss: 2207.594 
Save best theta...
Epoch[0] Iteration[1000] Training Loss: 1.77
Epoch[0] Validation Loss: 1.703 
Save best theta...
Epoch[0] Iteration[2000] Training Loss: 1.42
Epoch[0] Validation Loss: 1.435 
Save best theta...
Epoch[0] Iteration[3000] Training Loss: 1.39
Epoch[0] Validation Loss: 1.364 
Save best theta...
Epoch[0] Iteration[4000] Training Loss: 1.25
Epoch[0] Validation Loss: 1.316 
Save best theta...
Epoch[0] Iteration[5000] Training Loss: 1.28
Epoch[0] Validation Loss: 1.282 
Save best theta...
Epoch[0] Iteration[6000] Training Loss: 1.22
Epoch[0] Validation Loss: 1.261 
Save best theta...
Epoch[0] Iteration[7000] Training Loss: 1.23
Epoch[0] Validation Loss: 1.250 
Save best theta...
Epoch[1] Iteration[0] Training Loss: 0.94
Epoch[1] Validation Loss: 1.254 
Epoch[1] Iteration[1000] Training Loss: 0.93
Epoch[1] Validation Loss: 1.254 
Epoch[1] Iteration[2000] Training Loss: 1.18
Epoch[1] Validation Los

In [14]:
# Alpha-only model --> alpha = mean

best

[tensor(1.4290, device='cuda:0', grad_fn=<MseLossBackward>), 0.01, 1e-06]

In [15]:
model.bias.data

tensor([4.0432], device='cuda:0')

In [27]:
# Alpha-Theta model --> alpha ~ mean, popularity has slight uplift to rating

best

[tensor(1.4249, device='cuda:0', grad_fn=<MseLossBackward>), 0.01, 1e-06]

In [26]:
print(model.bias.data)
print(model.theta.data)

tensor([4.0180], device='cuda:0')
tensor([0.0014], device='cuda:0')


In [32]:
# MF-Theta model --> Does not outperform MF(k=1) model

best

[tensor(1.2371, device='cuda:0', grad_fn=<MseLossBackward>), 0.01, 1e-06]

In [33]:
print(model.bias.data)
print(model.theta.data)

tensor([3.9391], device='cuda:0')
tensor([0.0001], device='cuda:0')
