In [1]:
from collections import defaultdict
import csv
import scipy
import scipy.optimize
import random
import numpy as np
import time

import torch
from torch import nn
import torch.nn.functional as F
from torch.autograd import Variable

import pandas as pd 
from IPython.display import display

In [2]:
class MF(nn.Module):
    
    def __init__(self, n_user, n_item, k=1):
        super(MF, self).__init__()
        self.k = k
        self.n_user = n_user
        self.n_item = n_item
        
        # gammas (users and items)
        self.user = nn.Embedding(n_user, k)
        self.item = nn.Embedding(n_item, k)
        
        # alpha and betas (users and items)
        self.bias_user = nn.Embedding(n_user, 1)
        self.bias_item = nn.Embedding(n_item, 1)
        
        # self.theta = nn.Parameter(torch.ones(1))
        self.bias = nn.Parameter(torch.ones(1))
    
    def forward(self, train_x):
        item_id = train_x[:, 0]
        user_id = train_x[:, 1]
        # freq = train_x[:, 2].float()
        vector_user = self.user(user_id)
        vector_item = self.item(item_id)
        
        # Pull out biases
        bias_user = self.bias_user(user_id).squeeze()
        bias_item = self.bias_item(item_id).squeeze()
        
        # biases = (self.bias + self.theta*freq + bias_user + bias_item)
        biases = (self.bias + bias_user + bias_item)
        
        ui_interaction = torch.sum(vector_user * vector_item, dim=1)
        
        # Add bias prediction to the interaction prediction
        prediction = ui_interaction + biases
        return prediction
    
    def loss(self, prediction, target):
        return F.mse_loss(prediction, target.squeeze())

In [3]:
data = pd.read_csv("../datasets/google_local/reviews_timesorted.csv")
display(data.info())
display(data.head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10601852 entries, 0 to 10601851
Data columns (total 8 columns):
gPlusPlaceId      int64
gPlusUserId       int64
rating            int64
unixReviewTime    int64
num_reviews       int64
year              int64
month             int64
day               int64
dtypes: int64(8)
memory usage: 647.1 MB


None

Unnamed: 0,gPlusPlaceId,gPlusUserId,rating,unixReviewTime,num_reviews,year,month,day
0,804813,2021440,4,662601600,9,1990,12,31
1,1918972,389663,4,662601600,20,1990,12,31
2,449452,2709545,4,662601600,15,1990,12,31
3,942354,4936600,4,662601600,293,1990,12,31
4,3063673,828378,5,662601600,11,1990,12,31


In [4]:
original_data = pd.read_csv("../datasets/google_local/reviews_freq.csv")

n_user = len(original_data['gPlusUserId'].unique())
n_place = len(original_data['gPlusPlaceId'].unique())

print(n_user,n_place)

5054567 3116785


In [5]:
N = data.index.size

# Note that the reviews are sorted by time
train_split = int(N * 0.70)   
valid_split =  int(N * 0.85)

train_x = data.loc[:train_split, ['gPlusPlaceId','gPlusUserId','num_reviews', \
                                  'unixReviewTime','year','month','day']]
train_y = data.loc[:train_split, 'rating':'rating']
valid_x = data.loc[train_split+1:valid_split, ['gPlusPlaceId','gPlusUserId','num_reviews', \
                                  'unixReviewTime','year','month','day']]
valid_y = data.loc[train_split+1:valid_split, 'rating':'rating']
test_x = data.loc[valid_split+1:, ['gPlusPlaceId','gPlusUserId','num_reviews', \
                                  'unixReviewTime','year','month','day']]
test_y = data.loc[valid_split+1:, 'rating':'rating']

print(N, train_x.index.size, valid_x.index.size,test_x.index.size)

10601852 7421297 1590278 1590277


In [6]:
display(train_x[:5])
display(valid_x[:5])
display(test_x[:5])

Unnamed: 0,gPlusPlaceId,gPlusUserId,num_reviews,unixReviewTime,year,month,day
0,804813,2021440,9,662601600,1990,12,31
1,1918972,389663,20,662601600,1990,12,31
2,449452,2709545,15,662601600,1990,12,31
3,942354,4936600,293,662601600,1990,12,31
4,3063673,828378,11,662601600,1990,12,31


Unnamed: 0,gPlusPlaceId,gPlusUserId,num_reviews,unixReviewTime,year,month,day
7421297,2725776,4737472,26,1377798285,2013,8,29
7421298,2604227,43504,50,1377798286,2013,8,29
7421299,1298806,1549363,29,1377798286,2013,8,29
7421300,730982,3720790,6,1377798289,2013,8,29
7421301,2919327,3774840,2,1377798290,2013,8,29


Unnamed: 0,gPlusPlaceId,gPlusUserId,num_reviews,unixReviewTime,year,month,day
9011575,1959671,2938557,1,1387118864,2013,12,15
9011576,2039338,4638537,12,1387118869,2013,12,15
9011577,727670,840634,43,1387118873,2013,12,15
9011578,678877,3725103,6,1387118876,2013,12,15
9011579,2776187,2754988,2,1387118877,2013,12,15


In [7]:
# Hyperparameters
lr = 1e-2
lamb = 1e-6
k=1
batch_size = 1024

cuda = torch.cuda.is_available()
print (cuda)

True


In [25]:
from random import shuffle

# Hyperparameters
lr = 5e-3
k = 1
lamb = 5e-7
batch_size = 1024

# To keep track to best hyperparameters and results
results = {}

"""
split_values = [
    # 0.2 fixed window
    [0.6,0.8], 
    [0.5,0.7], 
    [0.4,0.6], 
    [0.3,0.5], 
    [0.2,0.4], 
    [0.1,0.3], 
    [0,0.2],
    # 0.3 fixed window
    [0.5,0.8], 
    [0.4,0.7], 
    [0.3,0.6], 
    [0.2,0.5], 
    [0.1,0.4], 
    [0,0.3], 
    # 0.4 fixed window
    [0.4,0.8], 
    [0.3,0.7], 
    [0.2,0.6], 
    [0.1,0.5], 
    [0,0.4], 
    # 0.5 fixed window
    [0.3,0.8], 
    [0.2,0.7], 
    [0.1,0.6], 
    [0,0.5], 
    # 0.6 fixed window
    [0.2,0.8], 
    [0.1,0.7], 
    [0,0.6]
]
"""

split_values = [
    # 0.2 fixed window
    [0.6,0.85], 
    [0.5,0.7], 
    [0.4,0.6], 
    [0.3,0.5], 
    [0.2,0.4], 
    [0.1,0.3], 
    [0,0.2],
    # 0.3 fixed window
    [0.5,0.8], 
    [0.4,0.7], 
    [0.3,0.6], 
    [0.2,0.5], 
    [0.1,0.4], 
    [0,0.3], 
    # 0.4 fixed window
    [0.4,0.8], 
    [0.3,0.7], 
    [0.2,0.6], 
    [0.1,0.5], 
    [0,0.4], 
    # 0.5 fixed window
    [0.3,0.8], 
    [0.2,0.7], 
    [0.1,0.6], 
    [0,0.5], 
    # 0.6 fixed window
    [0.2,0.8], 
    [0.1,0.7], 
    [0,0.6]
]

for values in split_values:
    
    # Implement a moving window - test and valid datasets are 10% each
    start = values[0]
    split = values[1]
    
    train_start = int(N * start)
    train_split = int(N * split)   
    valid_split =  int(N * (split+0.1))
    test_split =  int(N * (split+0.2))

    train_x = data.loc[train_start:train_split, ['gPlusPlaceId','gPlusUserId','num_reviews', \
                                      'unixReviewTime','year','month','day']]
    train_y = data.loc[train_start:train_split, 'rating':'rating']
    valid_x = data.loc[train_split+1:valid_split, ['gPlusPlaceId','gPlusUserId','num_reviews', \
                                      'unixReviewTime','year','month','day']]
    valid_y = data.loc[train_split+1:valid_split, 'rating':'rating']
    test_x = data.loc[valid_split+1:test_split, ['gPlusPlaceId','gPlusUserId','num_reviews', \
                                      'unixReviewTime','year','month','day']]
    test_y = data.loc[valid_split+1:test_split, 'rating':'rating']

    print(N, train_x.index.size, valid_x.index.size,test_x.index.size)

    model = MF(n_user, n_place, k=k)

    if cuda:
        model.cuda()

    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=lamb)

    def chunks(X, Y, size):
        """Yield successive n-sized chunks from l."""
        starts = list(range(0, len(X), size))
        shuffle(starts)
        for i in starts:
            yield (X[i:i + size], Y[i:i + size])

    # To keep track to best hyperparameters and results
    best_loss = 0

    losses = []
    valid_losses = []

    for epoch in range(10+1):

        i = 0
        for feature, target in chunks(np.array(train_x), np.array(train_y), batch_size):
            # This zeros the gradients on every parameter. 
            # This is easy to miss and hard to troubleshoot.
            optimizer.zero_grad()
            # Convert 
            feature = Variable(torch.from_numpy(feature))
            target = Variable(torch.from_numpy(target).type(torch.FloatTensor))

            if cuda:
                feature = feature.cuda()
                target = target.cuda()

            # model in training mode    
            model.train()

            # Compute a prediction for these features
            prediction = model.forward(feature)
            # Compute a loss given what the true target outcome was
            loss = model.loss(prediction, target)
            # break
            # Backpropagate: compute the direction / gradient every model parameter
            # defined in your __init__ should move in in order to minimize this loss
            # However, we're not actually changing these parameters, we're just storing
            # how they should change.

            loss.backward()
            # Now take a step & update the model parameters. The optimizer uses the gradient at 
            # defined on every parameter in our model and nudges it in that direction.
            optimizer.step()

            if i%1000 == 0 and epoch%1 == 0:
                print("Epoch[{}] Iteration[{}] Training Loss: {:.2f}".format(epoch, i, loss.data))

            # Record the loss per example
            losses.append(loss.cpu().data.numpy() / len(feature))

            if i%1000 == 0 and epoch%1 == 0:

                val_feature = torch.from_numpy(np.array(valid_x))
                val_target = torch.from_numpy(np.array(valid_y)).type(torch.FloatTensor)

                if cuda:
                    val_feature = val_feature.cuda()
                    val_target = val_target.cuda()

                # model in test mode    
                model.eval()

                val_pred = model.forward(val_feature)
                vloss = model.loss(val_pred, val_target)
                print("Epoch[{}] Validation Loss: {:.3f} ".format(epoch, vloss.data))

                # Record the validation loss per example
                valid_losses.append(vloss.cpu().data.numpy()/len(val_feature))

                if best_loss is 0:
                    best_loss = vloss
                    results[(lr,lamb,k,start,split)] = vloss
                    print("Save best theta...")
                else:
                    if vloss < best_loss:
                        best_loss = vloss
                        results[(lr,lamb,k,start,split)] = vloss
                        print("Save best theta...")
            i += 1

10601852 2120371 1060185 1060185
Epoch[0] Iteration[0] Training Loss: 14.38
Epoch[0] Validation Loss: 14.182 
Save best theta...
Epoch[0] Iteration[1000] Training Loss: 2.03
Epoch[0] Validation Loss: 1.926 
Save best theta...
Epoch[0] Iteration[2000] Training Loss: 1.89
Epoch[0] Validation Loss: 1.799 
Save best theta...
Epoch[1] Iteration[0] Training Loss: 1.21
Epoch[1] Validation Loss: 1.795 
Save best theta...
Epoch[1] Iteration[1000] Training Loss: 1.62
Epoch[1] Validation Loss: 1.759 
Save best theta...
Epoch[1] Iteration[2000] Training Loss: 1.23
Epoch[1] Validation Loss: 1.716 
Save best theta...
Epoch[2] Iteration[0] Training Loss: 1.14
Epoch[2] Validation Loss: 1.715 
Save best theta...
Epoch[2] Iteration[1000] Training Loss: 1.09
Epoch[2] Validation Loss: 1.706 
Save best theta...
Epoch[2] Iteration[2000] Training Loss: 0.52
Epoch[2] Validation Loss: 1.680 
Save best theta...
Epoch[3] Iteration[0] Training Loss: 0.68
Epoch[3] Validation Loss: 1.679 
Save best theta...
Epoch[3

Epoch[8] Iteration[2000] Training Loss: 0.40
Epoch[8] Validation Loss: 1.440 
Epoch[9] Iteration[0] Training Loss: 0.11
Epoch[9] Validation Loss: 1.440 
Save best theta...
Epoch[9] Iteration[1000] Training Loss: 0.30
Epoch[9] Validation Loss: 1.442 
Epoch[9] Iteration[2000] Training Loss: 0.37
Epoch[9] Validation Loss: 1.440 
Epoch[10] Iteration[0] Training Loss: 0.14
Epoch[10] Validation Loss: 1.440 
Epoch[10] Iteration[1000] Training Loss: 0.19
Epoch[10] Validation Loss: 1.441 
Epoch[10] Iteration[2000] Training Loss: 0.30
Epoch[10] Validation Loss: 1.440 
10601852 2120372 1060185 1060185
Epoch[0] Iteration[0] Training Loss: 13.24
Epoch[0] Validation Loss: 13.225 
Save best theta...
Epoch[0] Iteration[1000] Training Loss: 1.14
Epoch[0] Validation Loss: 1.247 
Save best theta...
Epoch[0] Iteration[2000] Training Loss: 1.21
Epoch[0] Validation Loss: 1.146 
Save best theta...
Epoch[1] Iteration[0] Training Loss: 0.89
Epoch[1] Validation Loss: 1.142 
Save best theta...
Epoch[1] Iteration

Epoch[7] Iteration[0] Training Loss: 0.14
Epoch[7] Validation Loss: 1.073 
Epoch[7] Iteration[1000] Training Loss: 0.24
Epoch[7] Validation Loss: 1.074 
Epoch[7] Iteration[2000] Training Loss: 0.40
Epoch[7] Validation Loss: 1.075 
Epoch[8] Iteration[0] Training Loss: 0.16
Epoch[8] Validation Loss: 1.075 
Epoch[8] Iteration[1000] Training Loss: 0.14
Epoch[8] Validation Loss: 1.077 
Epoch[8] Iteration[2000] Training Loss: 0.46
Epoch[8] Validation Loss: 1.078 
Epoch[9] Iteration[0] Training Loss: 0.10
Epoch[9] Validation Loss: 1.079 
Epoch[9] Iteration[1000] Training Loss: 0.26
Epoch[9] Validation Loss: 1.079 
Epoch[9] Iteration[2000] Training Loss: 0.25
Epoch[9] Validation Loss: 1.081 
Epoch[10] Iteration[0] Training Loss: 0.10
Epoch[10] Validation Loss: 1.077 
Epoch[10] Iteration[1000] Training Loss: 0.15
Epoch[10] Validation Loss: 1.078 
Epoch[10] Iteration[2000] Training Loss: 0.33
Epoch[10] Validation Loss: 1.084 
10601852 2120371 1060185 1060185
Epoch[0] Iteration[0] Training Loss: 

Epoch[1] Iteration[3000] Training Loss: 1.15
Epoch[1] Validation Loss: 1.606 
Save best theta...
Epoch[2] Iteration[0] Training Loss: 0.51
Epoch[2] Validation Loss: 1.604 
Save best theta...
Epoch[2] Iteration[1000] Training Loss: 0.53
Epoch[2] Validation Loss: 1.602 
Save best theta...
Epoch[2] Iteration[2000] Training Loss: 1.01
Epoch[2] Validation Loss: 1.589 
Save best theta...
Epoch[2] Iteration[3000] Training Loss: 0.79
Epoch[2] Validation Loss: 1.578 
Save best theta...
Epoch[3] Iteration[0] Training Loss: 0.35
Epoch[3] Validation Loss: 1.578 
Save best theta...
Epoch[3] Iteration[1000] Training Loss: 0.46
Epoch[3] Validation Loss: 1.582 
Epoch[3] Iteration[2000] Training Loss: 0.55
Epoch[3] Validation Loss: 1.578 
Epoch[3] Iteration[3000] Training Loss: 0.87
Epoch[3] Validation Loss: 1.568 
Save best theta...
Epoch[4] Iteration[0] Training Loss: 0.30
Epoch[4] Validation Loss: 1.568 
Save best theta...
Epoch[4] Iteration[1000] Training Loss: 0.40
Epoch[4] Validation Loss: 1.576 

Epoch[3] Iteration[1000] Training Loss: 0.47
Epoch[3] Validation Loss: 1.010 
Epoch[3] Iteration[2000] Training Loss: 0.47
Epoch[3] Validation Loss: 1.009 
Epoch[3] Iteration[3000] Training Loss: 0.61
Epoch[3] Validation Loss: 1.006 
Save best theta...
Epoch[4] Iteration[0] Training Loss: 0.40
Epoch[4] Validation Loss: 1.004 
Save best theta...
Epoch[4] Iteration[1000] Training Loss: 0.55
Epoch[4] Validation Loss: 1.007 
Epoch[4] Iteration[2000] Training Loss: 0.48
Epoch[4] Validation Loss: 1.008 
Epoch[4] Iteration[3000] Training Loss: 0.60
Epoch[4] Validation Loss: 1.004 
Epoch[5] Iteration[0] Training Loss: 0.28
Epoch[5] Validation Loss: 1.003 
Save best theta...
Epoch[5] Iteration[1000] Training Loss: 0.37
Epoch[5] Validation Loss: 1.007 
Epoch[5] Iteration[2000] Training Loss: 0.52
Epoch[5] Validation Loss: 1.010 
Epoch[5] Iteration[3000] Training Loss: 0.64
Epoch[5] Validation Loss: 1.004 
Epoch[6] Iteration[0] Training Loss: 0.32
Epoch[6] Validation Loss: 1.006 
Epoch[6] Iterati

Epoch[5] Iteration[2000] Training Loss: 0.62
Epoch[5] Validation Loss: 1.052 
Epoch[5] Iteration[3000] Training Loss: 0.85
Epoch[5] Validation Loss: 1.048 
Epoch[6] Iteration[0] Training Loss: 0.26
Epoch[6] Validation Loss: 1.047 
Epoch[6] Iteration[1000] Training Loss: 0.55
Epoch[6] Validation Loss: 1.052 
Epoch[6] Iteration[2000] Training Loss: 0.83
Epoch[6] Validation Loss: 1.053 
Epoch[6] Iteration[3000] Training Loss: 0.55
Epoch[6] Validation Loss: 1.050 
Epoch[7] Iteration[0] Training Loss: 0.39
Epoch[7] Validation Loss: 1.050 
Epoch[7] Iteration[1000] Training Loss: 0.72
Epoch[7] Validation Loss: 1.055 
Epoch[7] Iteration[2000] Training Loss: 0.70
Epoch[7] Validation Loss: 1.054 
Epoch[7] Iteration[3000] Training Loss: 1.19
Epoch[7] Validation Loss: 1.053 
Epoch[8] Iteration[0] Training Loss: 0.43
Epoch[8] Validation Loss: 1.051 
Epoch[8] Iteration[1000] Training Loss: 0.53
Epoch[8] Validation Loss: 1.055 
Epoch[8] Iteration[2000] Training Loss: 0.57
Epoch[8] Validation Loss: 1.

Epoch[4] Iteration[0] Training Loss: 0.52
Epoch[4] Validation Loss: 1.549 
Save best theta...
Epoch[4] Iteration[1000] Training Loss: 0.52
Epoch[4] Validation Loss: 1.555 
Epoch[4] Iteration[2000] Training Loss: 0.62
Epoch[4] Validation Loss: 1.556 
Epoch[4] Iteration[3000] Training Loss: 0.65
Epoch[4] Validation Loss: 1.553 
Epoch[4] Iteration[4000] Training Loss: 0.76
Epoch[4] Validation Loss: 1.548 
Save best theta...
Epoch[5] Iteration[0] Training Loss: 0.47
Epoch[5] Validation Loss: 1.547 
Save best theta...
Epoch[5] Iteration[1000] Training Loss: 0.62
Epoch[5] Validation Loss: 1.552 
Epoch[5] Iteration[2000] Training Loss: 0.51
Epoch[5] Validation Loss: 1.555 
Epoch[5] Iteration[3000] Training Loss: 0.88
Epoch[5] Validation Loss: 1.553 
Epoch[5] Iteration[4000] Training Loss: 0.88
Epoch[5] Validation Loss: 1.550 
Epoch[6] Iteration[0] Training Loss: 0.27
Epoch[6] Validation Loss: 1.547 
Save best theta...
Epoch[6] Iteration[1000] Training Loss: 0.39
Epoch[6] Validation Loss: 1.55

Epoch[1] Iteration[2000] Training Loss: 1.30
Epoch[1] Validation Loss: 1.034 
Save best theta...
Epoch[1] Iteration[3000] Training Loss: 0.73
Epoch[1] Validation Loss: 1.021 
Save best theta...
Epoch[1] Iteration[4000] Training Loss: 0.83
Epoch[1] Validation Loss: 1.010 
Save best theta...
Epoch[2] Iteration[0] Training Loss: 1.15
Epoch[2] Validation Loss: 1.008 
Save best theta...
Epoch[2] Iteration[1000] Training Loss: 0.60
Epoch[2] Validation Loss: 1.008 
Epoch[2] Iteration[2000] Training Loss: 0.60
Epoch[2] Validation Loss: 1.007 
Save best theta...
Epoch[2] Iteration[3000] Training Loss: 0.74
Epoch[2] Validation Loss: 1.003 
Save best theta...
Epoch[2] Iteration[4000] Training Loss: 1.14
Epoch[2] Validation Loss: 0.997 
Save best theta...
Epoch[3] Iteration[0] Training Loss: 1.02
Epoch[3] Validation Loss: 0.996 
Save best theta...
Epoch[3] Iteration[1000] Training Loss: 0.49
Epoch[3] Validation Loss: 1.000 
Epoch[3] Iteration[2000] Training Loss: 0.60
Epoch[3] Validation Loss: 1.0

Epoch[10] Iteration[1000] Training Loss: 0.58
Epoch[10] Validation Loss: 1.033 
Epoch[10] Iteration[2000] Training Loss: 0.52
Epoch[10] Validation Loss: 1.035 
Epoch[10] Iteration[3000] Training Loss: 0.89
Epoch[10] Validation Loss: 1.033 
Epoch[10] Iteration[4000] Training Loss: 0.96
Epoch[10] Validation Loss: 1.029 
10601852 5300927 1060185 1060185
Epoch[0] Iteration[0] Training Loss: 13.44
Epoch[0] Validation Loss: 14.184 
Save best theta...
Epoch[0] Iteration[1000] Training Loss: 1.16
Epoch[0] Validation Loss: 1.924 
Save best theta...
Epoch[0] Iteration[2000] Training Loss: 1.50
Epoch[0] Validation Loss: 1.804 
Save best theta...
Epoch[0] Iteration[3000] Training Loss: 1.12
Epoch[0] Validation Loss: 1.752 
Save best theta...
Epoch[0] Iteration[4000] Training Loss: 1.08
Epoch[0] Validation Loss: 1.711 
Save best theta...
Epoch[0] Iteration[5000] Training Loss: 1.29
Epoch[0] Validation Loss: 1.679 
Save best theta...
Epoch[1] Iteration[0] Training Loss: 0.67
Epoch[1] Validation Loss

Epoch[4] Iteration[1000] Training Loss: 0.46
Epoch[4] Validation Loss: 1.546 
Epoch[4] Iteration[2000] Training Loss: 0.75
Epoch[4] Validation Loss: 1.549 
Epoch[4] Iteration[3000] Training Loss: 0.84
Epoch[4] Validation Loss: 1.548 
Epoch[4] Iteration[4000] Training Loss: 0.75
Epoch[4] Validation Loss: 1.545 
Epoch[4] Iteration[5000] Training Loss: 0.75
Epoch[4] Validation Loss: 1.541 
Epoch[5] Iteration[0] Training Loss: 0.35
Epoch[5] Validation Loss: 1.540 
Save best theta...
Epoch[5] Iteration[1000] Training Loss: 0.57
Epoch[5] Validation Loss: 1.545 
Epoch[5] Iteration[2000] Training Loss: 0.67
Epoch[5] Validation Loss: 1.548 
Epoch[5] Iteration[3000] Training Loss: 0.79
Epoch[5] Validation Loss: 1.547 
Epoch[5] Iteration[4000] Training Loss: 1.03
Epoch[5] Validation Loss: 1.545 
Epoch[5] Iteration[5000] Training Loss: 0.82
Epoch[5] Validation Loss: 1.540 
Epoch[6] Iteration[0] Training Loss: 0.41
Epoch[6] Validation Loss: 1.540 
Save best theta...
Epoch[6] Iteration[1000] Trainin

Epoch[9] Iteration[5000] Training Loss: 0.77
Epoch[9] Validation Loss: 1.393 
Epoch[10] Iteration[0] Training Loss: 0.80
Epoch[10] Validation Loss: 1.391 
Epoch[10] Iteration[1000] Training Loss: 0.80
Epoch[10] Validation Loss: 1.396 
Epoch[10] Iteration[2000] Training Loss: 0.58
Epoch[10] Validation Loss: 1.398 
Epoch[10] Iteration[3000] Training Loss: 0.81
Epoch[10] Validation Loss: 1.398 
Epoch[10] Iteration[4000] Training Loss: 0.67
Epoch[10] Validation Loss: 1.397 
Epoch[10] Iteration[5000] Training Loss: 0.82
Epoch[10] Validation Loss: 1.391 
10601852 5300927 1060185 1060185
Epoch[0] Iteration[0] Training Loss: 14.59
Epoch[0] Validation Loss: 13.210 
Save best theta...
Epoch[0] Iteration[1000] Training Loss: 1.96
Epoch[0] Validation Loss: 1.251 
Save best theta...
Epoch[0] Iteration[2000] Training Loss: 1.14
Epoch[0] Validation Loss: 1.156 
Save best theta...
Epoch[0] Iteration[3000] Training Loss: 1.10
Epoch[0] Validation Loss: 1.110 
Save best theta...
Epoch[0] Iteration[4000] 

RuntimeError: CUDA out of memory. Tried to allocate 58.00 MiB (GPU 0; 10.91 GiB total capacity; 9.19 GiB already allocated; 50.12 MiB free; 313.49 MiB cached)

In [9]:
# First run - k=1, lr = 0.01, lamb=1e-6, Full history 70%train-15%valid-15%test
best

[tensor(1.5738, device='cuda:0', grad_fn=<MseLossBackward>), 0.01, 1e-06]

In [15]:
# Iterate on lr - Full history 70%train-15%valid-15%test
# lr = 5e-3 is acceptable
results

{(0.001, 1e-06, 1): tensor(1.5802, device='cuda:0', grad_fn=<MseLossBackward>),
 (0.005, 1e-06, 1): tensor(1.5661, device='cuda:0', grad_fn=<MseLossBackward>),
 (0.01, 1e-06, 1): tensor(1.5746, device='cuda:0', grad_fn=<MseLossBackward>),
 (0.05, 1e-06, 1): tensor(1.7742, device='cuda:0', grad_fn=<MseLossBackward>),
 (0.1, 1e-06, 1): tensor(2.1349, device='cuda:0', grad_fn=<MseLossBackward>)}

In [17]:
# Iterate on lamb - Full history 70%train-15%valid-15%test; lr = 5e-3
# lamb = 5e-07 is acceptable
results

{(0.005, 1e-08, 1): tensor(1.6073, device='cuda:0', grad_fn=<MseLossBackward>),
 (0.005, 1e-07, 1): tensor(1.5646, device='cuda:0', grad_fn=<MseLossBackward>),
 (0.005, 5e-07, 1): tensor(1.5533, device='cuda:0', grad_fn=<MseLossBackward>),
 (0.005, 1e-06, 1): tensor(1.5664, device='cuda:0', grad_fn=<MseLossBackward>),
 (0.005, 5e-06, 1): tensor(1.6181, device='cuda:0', grad_fn=<MseLossBackward>),
 (0.005, 1e-05, 1): tensor(1.6410, device='cuda:0', grad_fn=<MseLossBackward>),
 (0.005,
  0.0001,
  1): tensor(1.6890, device='cuda:0', grad_fn=<MseLossBackward>)}

In [21]:
# Iterate on moving window (full history): X%train-10%valid-10%test; lr=5e-3, lamb=5e-7
# Variance --> 90% variance of validation set as X > 50%
results

{(0.005,
  5e-07,
  1,
  0.2): tensor(1.1732, device='cuda:0', grad_fn=<MseLossBackward>),
 (0.005,
  5e-07,
  1,
  0.3): tensor(1.0440, device='cuda:0', grad_fn=<MseLossBackward>),
 (0.005,
  5e-07,
  1,
  0.4): tensor(1.0229, device='cuda:0', grad_fn=<MseLossBackward>),
 (0.005,
  5e-07,
  1,
  0.5): tensor(0.9913, device='cuda:0', grad_fn=<MseLossBackward>),
 (0.005,
  5e-07,
  1,
  0.6): tensor(1.3874, device='cuda:0', grad_fn=<MseLossBackward>),
 (0.005,
  5e-07,
  1,
  0.7): tensor(1.5389, device='cuda:0', grad_fn=<MseLossBackward>),
 (0.005,
  5e-07,
  1,
  0.8): tensor(1.5898, device='cuda:0', grad_fn=<MseLossBackward>)}

In [26]:
# Iterate on moving fixed window: X%train-10%valid-10%test; lr=5e-3, lamb=5e-7
# lamb = 5e-07 is acceptable
results

{(0.005,
  5e-07,
  1,
  0,
  0.2): tensor(1.1760, device='cuda:0', grad_fn=<MseLossBackward>),
 (0.005,
  5e-07,
  1,
  0,
  0.3): tensor(1.0439, device='cuda:0', grad_fn=<MseLossBackward>),
 (0.005,
  5e-07,
  1,
  0,
  0.4): tensor(1.0225, device='cuda:0', grad_fn=<MseLossBackward>),
 (0.005,
  5e-07,
  1,
  0,
  0.5): tensor(0.9975, device='cuda:0', grad_fn=<MseLossBackward>),
 (0.005,
  5e-07,
  1,
  0.1,
  0.3): tensor(1.0645, device='cuda:0', grad_fn=<MseLossBackward>),
 (0.005,
  5e-07,
  1,
  0.1,
  0.4): tensor(1.0357, device='cuda:0', grad_fn=<MseLossBackward>),
 (0.005,
  5e-07,
  1,
  0.1,
  0.5): tensor(0.9942, device='cuda:0', grad_fn=<MseLossBackward>),
 (0.005,
  5e-07,
  1,
  0.1,
  0.6): tensor(1.3896, device='cuda:0', grad_fn=<MseLossBackward>),
 (0.005,
  5e-07,
  1,
  0.2,
  0.4): tensor(1.0504, device='cuda:0', grad_fn=<MseLossBackward>),
 (0.005,
  5e-07,
  1,
  0.2,
  0.5): tensor(1.0032, device='cuda:0', grad_fn=<MseLossBackward>),
 (0.005,
  5e-07,
  1,
  0.2

In [22]:
split_values = [0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2]

for split in split_values:
    
    # Implement a moving window - test and valid datasets are 10% each
    train_split = int(N * split)   
    valid_split =  int(N * (split+0.1))
    test_split =  int(N * (split+0.2))

    train_x = data.loc[:train_split, ['gPlusPlaceId','gPlusUserId','num_reviews', \
                                      'unixReviewTime','year','month','day']]
    train_y = data.loc[:train_split, 'rating':'rating']
    valid_x = data.loc[train_split+1:valid_split, ['gPlusPlaceId','gPlusUserId','num_reviews', \
                                      'unixReviewTime','year','month','day']]
    valid_y = data.loc[train_split+1:valid_split, 'rating':'rating']
    test_x = data.loc[valid_split+1:test_split, ['gPlusPlaceId','gPlusUserId','num_reviews', \
                                      'unixReviewTime','year','month','day']]
    test_y = data.loc[valid_split+1:test_split, 'rating':'rating']
    
    print("Train split = {}%".format(split*100))
    print(np.var(train_y["rating"]))
    print(np.var(valid_y["rating"]))
    print(np.var(test_y["rating"]))


Train split = 80.0%
1.3461472153508236
1.7674499788103724
1.8243738761503898
Train split = 70.0%
1.2960605307672615
1.6961873735369226
1.7674499788103724
Train split = 60.0%
1.2580491692030635
1.524080754233291
1.6961873735369226
Train split = 50.0%
1.2909144334309721
1.093538923045579
1.524080754233291
Train split = 40.0%
1.3368149521792445
1.1073010945009147
1.093538923045579
Train split = 30.0%
1.418168469775325
1.092115782188113
1.1073010945009147
Train split = 20.0%
1.5763985212026381
1.062104694635307
1.092115782188113
