In [1]:
from collections import defaultdict
import csv
import scipy
import scipy.optimize
import random
import numpy as np
import time

import torch
from torch import nn
import torch.nn.functional as F
from torch.autograd import Variable

import pandas as pd 
from IPython.display import display

In [2]:
class Bias_Only(nn.Module):
    # itr = 0
    
    def __init__(self, n_user, n_item, mean=0):
        super(Bias_Only, self).__init__()

        self.n_user = n_user
        self.n_item = n_item
        
        # alpha and betas (users and items)
        self.bias_user = nn.Embedding(n_user, 1)
        self.bias_item = nn.Embedding(n_item, 1)
        self.bias = nn.Parameter(torch.ones(1))
        
    
    def forward(self, train_x):
        item_id = train_x[:, 0]
        user_id = train_x[:, 1]
        
        # Pull out biases
        bias_user = self.bias_user(user_id).squeeze()
        bias_item = self.bias_item(item_id).squeeze()
        prediction = (self.bias + bias_user + bias_item)
        
        return prediction
    
    def loss(self, prediction, target):
        loss_mse = F.mse_loss(prediction, target.squeeze())
        
        return loss_mse

In [11]:
class MF(nn.Module):
    itr = 0
    
    def __init__(self, n_user, n_item, k=1, c_vector=1.0, c_bias=1.0, writer=None):
        super(MF, self).__init__()
        self.writer = writer
        self.k = k
        self.n_user = n_user
        self.n_item = n_item
        self.c_bias = c_bias
        self.c_vector = c_vector
        
        # gammas (users and items)
        self.user = nn.Embedding(n_user, k)
        self.item = nn.Embedding(n_item, k)
        
        # alpha and betas (users and items)
        self.bias_user = nn.Embedding(n_user, 1)
        self.bias_item = nn.Embedding(n_item, 1)
        self.bias = nn.Parameter(torch.ones(1))
    
    def forward(self, train_x):
        item_id = train_x[:, 0]
        user_id = train_x[:, 1]
        vector_user = self.user(user_id)
        vector_item = self.item(item_id)
        
        # Pull out biases
        bias_user = self.bias_user(user_id).squeeze()
        bias_item = self.bias_item(item_id).squeeze()
        biases = (self.bias + bias_user + bias_item)
        
        ui_interaction = torch.sum(vector_user * vector_item, dim=1)
        
        # Add bias prediction to the interaction prediction
        prediction = ui_interaction + biases
        return prediction
    
    def loss(self, prediction, target):
        loss_mse = F.mse_loss(prediction, target.squeeze())
        
        # BUG - PyTorch optimizer already takes care of regularization!!!
        
        # prior_bias_user =  l2_regularize(self.bias_user.weight) * self.c_bias
        # prior_bias_item = l2_regularize(self.bias_item.weight) * self.c_bias
        # prior_user =  l2_regularize(self.user.weight) * self.c_vector
        # prior_item = l2_regularize(self.item.weight) * self.c_vector
        # total = loss_mse + prior_user + prior_item + prior_bias_user + prior_bias_item
        
        total = loss_mse
        
        for name, var in locals().items():
            if type(var) is torch.Tensor and var.nelement() == 1 and self.writer is not None:
                self.writer.add_scalar(name, var, self.itr)
        return total

## user_id and place_id --> user_idx, place_idx

To perform matrix factorization, we need to convert user_id and place_id into their index in the interaction matrix. This has already been done in the Notebook GoogleLocal_reformat.ipynb

### Reviews based on (user_id, place_id, rating, time)

In [3]:
data = pd.read_csv("../datasets/google_local/reviews.csv")
display(data.info())
display(data.head())

n_user = len(data['gPlusUserId'].unique())
n_place = len(data['gPlusPlaceId'].unique())

print(n_user,n_place)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11453845 entries, 0 to 11453844
Data columns (total 4 columns):
gPlusPlaceId      object
gPlusUserId       object
rating            float64
unixReviewTime    object
dtypes: float64(1), object(3)
memory usage: 349.5+ MB


None

Unnamed: 0,gPlusPlaceId,gPlusUserId,rating,unixReviewTime
0,108103314380004200232,100000010817154263736,3.0,1372686659
1,102194128241608748649,100000013500285534661,5.0,1342870724
2,101409858828175402384,100000021336848867366,5.0,1390653513
3,101477177500158511502,100000021336848867366,5.0,1389187706
4,106994170641063333085,100000021336848867366,4.0,1390486279


5054567 3116785


### Reviews based on (user_idx, place_idx, rating, time)

In [3]:
data = pd.read_csv("../datasets/google_local/reviews_reformatted.csv")
display(data.info())
display(data.head())

n_user = len(data['gPlusUserId'].unique())
n_item = len(data['gPlusPlaceId'].unique())

print(n_user,n_item)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11453845 entries, 0 to 11453844
Data columns (total 4 columns):
gPlusPlaceId      int64
gPlusUserId       int64
rating            float64
unixReviewTime    object
dtypes: float64(1), int64(2), object(1)
memory usage: 349.5+ MB


None

Unnamed: 0,gPlusPlaceId,gPlusUserId,rating,unixReviewTime
0,1368311,0,3.0,1372686659
1,370282,1,5.0,1342870724
2,237940,2,5.0,1390653513
3,249417,2,5.0,1389187706
4,1181533,2,4.0,1390486279


5054567 3116785


In [4]:
def l2_regularize(array):
    loss = torch.sum(array ** 2.0)
    return loss

In [6]:
# Shuffle the data
shuffled_data = data.sample(frac=1).reset_index(drop=True)
shuffled_data.head()

Unnamed: 0,gPlusPlaceId,gPlusUserId,rating,unixReviewTime
0,1790517,2755302,4.0,1360610950
1,2125285,74522,5.0,1389741387
2,1323596,3431767,1.0,1327291970
3,2019579,3822788,5.0,1364904582
4,912466,1995657,4.0,1319107160


### Split into Training, Validation and Test Datasets

In [7]:
N = shuffled_data.index.size

train_split = int(N * 0.70)
valid_split =  int(N * 0.85)

train_x = shuffled_data.loc[:train_split, 'gPlusPlaceId':'gPlusUserId']
train_y = shuffled_data.loc[:train_split, 'rating':'rating']
valid_x = shuffled_data.loc[train_split+1:valid_split, 'gPlusPlaceId':'gPlusUserId']
valid_y = shuffled_data.loc[train_split+1:valid_split, 'rating':'rating']
test_x = shuffled_data.loc[valid_split+1:, 'gPlusPlaceId':'gPlusUserId']
test_y = shuffled_data.loc[valid_split+1:, 'rating':'rating']

display(train_x.describe())
display(train_y.describe())
display(valid_x.describe())
display(valid_y.describe())
display(test_x.describe())
display(test_y.describe())

print(N, train_x.index.size, valid_x.index.size,test_x.index.size)


Unnamed: 0,gPlusPlaceId,gPlusUserId
count,8017692.0,8017692.0
mean,1558347.0,2514258.0
std,899662.6,1457303.0
min,0.0,0.0
25%,779578.8,1250433.0
50%,1558165.0,2509377.0
75%,2336931.0,3773621.0
max,3116784.0,5054566.0


Unnamed: 0,rating
count,8017692.0
mean,4.047441
std,1.195633
min,0.0
25%,3.0
50%,4.0
75%,5.0
max,5.0


Unnamed: 0,gPlusPlaceId,gPlusUserId
count,1718077.0,1718077.0
mean,1558657.0,2512333.0
std,900278.1,1456864.0
min,0.0,2.0
25%,779553.0,1249378.0
50%,1558435.0,2506407.0
75%,2338080.0,3772192.0
max,3116779.0,5054565.0


Unnamed: 0,rating
count,1718077.0
mean,4.046968
std,1.195618
min,0.0
25%,3.0
50%,4.0
75%,5.0
max,5.0


Unnamed: 0,gPlusPlaceId,gPlusUserId
count,1718076.0,1718076.0
mean,1558454.0,2513663.0
std,899895.6,1457995.0
min,1.0,1.0
25%,779764.2,1248296.0
50%,1557790.0,2507972.0
75%,2338689.0,3774438.0
max,3116784.0,5054566.0


Unnamed: 0,rating
count,1718076.0
mean,4.047148
std,1.196265
min,0.0
25%,3.0
50%,4.0
75%,5.0
max,5.0


11453845 8017692 1718077 1718076


In [13]:
# Hyperparameters
lr = 1e-2
lamb = 1e-6
batch_size = 1024

cuda = torch.cuda.is_available()
print (cuda)

True


In [15]:
from random import shuffle


# This code utilizes ignite engine's create_supervised_trainer()
# But we need something more basic

# model = MF(n_user, n_item, k=k)

model = Bias_Only(n_user, n_item)

if cuda:
    model.cuda()
    
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=lamb)

def chunks(X, Y, size):
    """Yield successive n-sized chunks from l."""
    starts = list(range(0, len(X), size))
    shuffle(starts)
    for i in starts:
        yield (X[i:i + size], Y[i:i + size])
        
# To keep track to best hyperparameters and results
best_loss = 0
best = []

losses = []
valid_losses = []

for epoch in range(10+1):
    
    i = 0
    for feature, target in chunks(np.array(train_x), np.array(train_y), batch_size):
        # This zeros the gradients on every parameter. 
        # This is easy to miss and hard to troubleshoot.
        optimizer.zero_grad()
        # Convert 
        feature = Variable(torch.from_numpy(feature))
        target = Variable(torch.from_numpy(target).type(torch.FloatTensor))
        
        if cuda:
            feature = feature.cuda()
            target = target.cuda()
            
        # model in training mode    
        model.train()
            
        # Compute a prediction for these features
        prediction = model.forward(feature)
        # Compute a loss given what the true target outcome was
        loss = model.loss(prediction, target)
        # break
        # Backpropagate: compute the direction / gradient every model parameter
        # defined in your __init__ should move in in order to minimize this loss
        # However, we're not actually changing these parameters, we're just storing
        # how they should change.

        loss.backward()
        # Now take a step & update the model parameters. The optimizer uses the gradient at 
        # defined on every parameter in our model and nudges it in that direction.
        optimizer.step()
        
        if i%1000 == 0 and epoch%1 == 0:
            print("Epoch[{}] Iteration[{}] Training Loss: {:.2f}".format(epoch, i, loss.data))

        # Record the loss per example
        losses.append(loss.cpu().data.numpy() / len(feature))
        
        if i%1000 == 0 and epoch%1 == 0:
            
            val_feature = torch.from_numpy(np.array(valid_x))
            val_target = torch.from_numpy(np.array(valid_y)).type(torch.FloatTensor)
            
            if cuda:
                val_feature = val_feature.cuda()
                val_target = val_target.cuda()
                
            # model in test mode    
            model.eval()

            val_pred = model.forward(val_feature)
            vloss = model.loss(val_pred, val_target)
            print("Epoch[{}] Validation Loss: {:.3f} ".format(epoch, vloss.data))
            
            # Record the validation loss per example
            valid_losses.append(val_loss.cpu().data.numpy()/len(val_feature))
            
            if best_loss is 0:
                best_loss = vloss
                best = [vloss,lr,lamb]
                print("Save best theta...")
            else:
                if vloss < best_loss:
                    best_loss = vloss
                    best = [vloss,lr,lamb]
                    print("Save best theta...")
        i += 1

Epoch[0] Iteration[0] Training Loss: 12.80
Epoch[0] Validation Loss: 12.643 
Save best theta...
Epoch[0] Iteration[1000] Training Loss: 1.60
Epoch[0] Validation Loss: 1.449 
Save best theta...
Epoch[0] Iteration[2000] Training Loss: 1.35
Epoch[0] Validation Loss: 1.360 
Save best theta...
Epoch[0] Iteration[3000] Training Loss: 1.26
Epoch[0] Validation Loss: 1.310 
Save best theta...
Epoch[0] Iteration[4000] Training Loss: 1.24
Epoch[0] Validation Loss: 1.280 
Save best theta...
Epoch[0] Iteration[5000] Training Loss: 1.23
Epoch[0] Validation Loss: 1.262 
Save best theta...
Epoch[0] Iteration[6000] Training Loss: 1.19
Epoch[0] Validation Loss: 1.252 
Save best theta...
Epoch[0] Iteration[7000] Training Loss: 1.35
Epoch[0] Validation Loss: 1.246 
Save best theta...
Epoch[1] Iteration[0] Training Loss: 0.54
Epoch[1] Validation Loss: 1.243 
Save best theta...
Epoch[1] Iteration[1000] Training Loss: 1.07
Epoch[1] Validation Loss: 1.252 
Epoch[1] Iteration[2000] Training Loss: 1.26
Epoch[1]

In [16]:
# Bias-Only works quite well.

best

[tensor(1.2374, device='cuda:0', grad_fn=<MseLossBackward>), 0.01, 1e-06]

In [12]:
from random import shuffle

cuda = torch.cuda.is_available()

# Hyperparameters
lr = 1e-2
lamb = 1e-6
batch_size = 1024

# To keep track to best hyperparameters and results
best_loss = 0
best = []

k_values = [1]

for k in k_values:
    
    print("k = {}".format(k))

    model = MF(n_user, n_item, k=k)
    if cuda:
        model.cuda()

    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=lamb)

    def chunks(X, Y, size):
        """Yield successive n-sized chunks from l."""
        starts = list(range(0, len(X), size))
        shuffle(starts)
        for i in starts:
            yield (X[i:i + size], Y[i:i + size])


    losses = []
    valid_losses = []

    for epoch in range(5+1):

        i = 0
        for feature, target in chunks(np.array(train_x), np.array(train_y), batch_size):
            # This zeros the gradients on every parameter. 
            # This is easy to miss and hard to troubleshoot.
            optimizer.zero_grad()
            # Convert 
            feature = Variable(torch.from_numpy(feature))
            target = Variable(torch.from_numpy(target).type(torch.FloatTensor))

            if cuda:
                feature = feature.cuda()
                target = target.cuda()

            # model in training mode    
            model.train()

            # Compute a prediction for these features
            prediction = model.forward(feature)
            # Compute a loss given what the true target outcome was
            loss = model.loss(prediction, target)
            # break
            # Backpropagate: compute the direction / gradient every model parameter
            # defined in your __init__ should move in in order to minimize this loss
            # However, we're not actually changing these parameters, we're just storing
            # how they should change.

            loss.backward()
            # Now take a step & update the model parameters. The optimizer uses the gradient at 
            # defined on every parameter in our model and nudges it in that direction.
            optimizer.step()

            if i%1000 == 0 and epoch%1 == 0:
                print("Epoch[{}] Iteration[{}] Training Loss: {:.2f}".format(epoch, i, loss.data))

            # Record the loss per example
            losses.append(loss.cpu().data.numpy() / len(feature))

            if i%1000 == 0 and epoch%1 == 0:

                val_feature = torch.from_numpy(np.array(valid_x))
                val_target = torch.from_numpy(np.array(valid_y)).type(torch.FloatTensor)

                if cuda:
                    val_feature = val_feature.cuda()
                    val_target = val_target.cuda()

                # model in test mode    
                model.eval()

                val_pred = model.forward(val_feature)
                val_loss = model.loss(val_pred, val_target)
                print("Epoch[{}] Validation Loss: {:.3f} ".format(epoch, val_loss.data))
                
                vloss = float(val_loss.cpu().data.numpy())
                
                # Record the validation loss per example
                valid_losses.append(vloss/len(val_feature))
                
                if best_loss is 0:
                    best_loss = vloss
                    best = [vloss,lr,lamb,k]
                    print("Save best theta...")
                else:
                    if vloss < best_loss:
                        best_loss = vloss
                        best = [vloss,lr,lamb,k]
                        print("Save best theta...")

            i += 1

k = 1
Epoch[0] Iteration[0] Training Loss: 14.70
Epoch[0] Validation Loss: 13.599 
Save best theta...
Epoch[0] Iteration[1000] Training Loss: 1.41
Epoch[0] Validation Loss: 1.451 
Save best theta...
Epoch[0] Iteration[2000] Training Loss: 1.34
Epoch[0] Validation Loss: 1.361 
Save best theta...
Epoch[0] Iteration[3000] Training Loss: 1.34
Epoch[0] Validation Loss: 1.310 
Save best theta...
Epoch[0] Iteration[4000] Training Loss: 1.28
Epoch[0] Validation Loss: 1.281 
Save best theta...
Epoch[0] Iteration[5000] Training Loss: 1.18
Epoch[0] Validation Loss: 1.264 
Save best theta...
Epoch[0] Iteration[6000] Training Loss: 1.35
Epoch[0] Validation Loss: 1.252 
Save best theta...
Epoch[0] Iteration[7000] Training Loss: 1.23
Epoch[0] Validation Loss: 1.246 
Save best theta...
Epoch[1] Iteration[0] Training Loss: 0.55
Epoch[1] Validation Loss: 1.242 
Save best theta...
Epoch[1] Iteration[1000] Training Loss: 1.07
Epoch[1] Validation Loss: 1.252 
Epoch[1] Iteration[2000] Training Loss: 1.26
Ep

### Hyperparameters

learning rate = 0.01  
Lambda (regularaization) = 1e-6  
K = 1  

In [33]:
# Optimizing over lr - 0.01 is good enough; k=3

best

[1.331364393234253, 0.01, 1e-05]

In [37]:
# Optimizing over lambda - 1e-6 is good enough; k=3

best

[1.2373532056808472, 0.01, 1e-06]

In [41]:
# Optimizing over K - 1 is good enough

best

[1.23684823513031, 0.01, 1e-06, 1]