In [1]:
from SVDModel import SVDModel
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import time
from joblib import Parallel, delayed
import pickle
import os.path
from pathlib import Path

np.random.seed(3362)

In [2]:
# Parameters
PARAM = dict()
PARAM['path'] = {
  'ml':'./data/MovieLens25M/',
  'netflix':'./data/NetflixPrize/'
}

PARAM

{'path': {'ml': './data/MovieLens25M/', 'netflix': './data/NetflixPrize/'}}

In [5]:
names = ['userId','rating','timestamp','movieId']
df_netflix = pd.read_csv(PARAM['path']['netflix']+'data_1.txt', names=names, low_memory=False)
df_netflix_2 = pd.read_csv(PARAM['path']['netflix']+'data_2.txt', names=names, low_memory=False)
df_netflix_3 = pd.read_csv(PARAM['path']['netflix']+'data_3.txt', names=names, low_memory=False)
df_netflix_4 = pd.read_csv(PARAM['path']['netflix']+'data_4.txt', names=names, low_memory=False)
df_netflix = pd.concat([df_netflix,df_netflix_2,df_netflix_3,df_netflix_4], names=names)
df_netflix['movieId'] = df_netflix['movieId'].fillna(method='ffill')
df_netflix = df_netflix.dropna()
df_netflix['rating'] = df_netflix['rating'].astype(np.int8)
df_netflix['movieId'] = df_netflix['movieId'].astype(np.int16)
df_netflix['userId'] = df_netflix['userId'].astype(np.int32)
df_netflix['timestamp'] = pd.to_datetime(df_netflix['timestamp']).view(dtype=np.int64) // 10 ** 9
df_movies = pd.read_csv(PARAM['path']['netflix']+'movie_titles.csv',
                        encoding='latin-1',
                        names=['movieId','release_year','title'],
                        usecols=range(3))
NETFLIX = {
  'ratings': df_netflix[['userId','movieId','rating','timestamp']].sample(2**16),
  'movies': df_movies,
  'm_movies': df_netflix['movieId'].nunique(),
  'n_users': df_netflix['userId'].nunique(),
  'n_ratings': len(df_netflix)
}


In [6]:
print(NETFLIX['ratings'].shape)
print(NETFLIX['movies'].shape)

(65536, 4)
(17770, 3)


In [16]:
PARAM['features'] = [3,30,150,300]
PARAM['lr'] = [1e-2,1e-3,1e-4]
PARAM['epochs'] = [11,101,201,301,501]
PARAM['weight_decay'] = [0.02,0.2,2]
PARAM['stopping'] = 0.001
 
data = 'NETFLIX'
 
if data=='ML_25M':
    resultDir = 'model_movielens'
else:
    resultDir = 'model_netflix'

print('Creating SVD++ Model')
svd = SVDModel()
print("Created\nLoading Data")
if data=='ML_25M':
    svd.data_loader(data=ML_25M['ratings'],
                    n_items = ML_25M['m_movies'],
                    n_users= ML_25M['n_users'])
else:
    svd.data_loader(data=NETFLIX['ratings'],
                    n_items = NETFLIX['m_movies'],
                    n_users= NETFLIX['n_users'])
print("Loaded Data\nSpliting Data")
svd.split(0.8,0.8)
print("Splitted Data\nTraining...")

def train_model(features, lr, epochs, data, weight_decay):
   
 
    if os.path.isfile(f'./{resultDir}/svdpp/result/{features}_{lr}_{epochs}_{weight_decay}_0.001.pkl'):
        return
    print(f"==============================")
    print(f'features: {features}\nlearning rate: {lr}\nEpochs: {epochs}\nweight decay: {weight_decay}\nstopping: 0.001\n')
    svd.mode = 'svd++'
    svd.features = features
    svd.lr = lr
    svd.epochs = epochs
    svd.weight_decay = weight_decay
    svd.stopping = 0.001
 
    start = time.perf_counter()
    result = svd.training()
    print('Time used =', time.perf_counter()-start)
   
    print('Saving Model')
#     svd.data = []
    Path(f"./{resultDir}").mkdir(parents=True, exist_ok=True)
    Path(f"./{resultDir}/svdpp").mkdir(parents=True, exist_ok=True)
    Path(f"./{resultDir}/svdpp/model").mkdir(parents=True, exist_ok=True)
    Path(f"./{resultDir}/svdpp/result").mkdir(parents=True, exist_ok=True)
   
   
    # with open(f'./{resultDir}/svdpp/model/{features}_{lr}_{epochs}_{weight_decay}_-1.pkl', 'wb') as out:
    #     pickle.dump(svd, out, pickle.HIGHEST_PROTOCOL)
    #     out.close()
    with open(f'./{resultDir}/svdpp/result/{features}_{lr}_{epochs}_{weight_decay}_0.001.pkl', 'wb') as out:
        pickle.dump(result, out, pickle.HIGHEST_PROTOCOL)
        out.close()
    print(f"==============================")
    return svd, result

Creating SVD++ Model
Created
Loading Data
Loaded Data
Spliting Data
Splitted Data
Training...


In [18]:
for epochs in PARAM['epochs']:
    for features in PARAM['features']:
        for lr in PARAM['lr']:            
            for weight_decay in PARAM['weight_decay']:
                for data in ['NETFLIX']:
                    train_model(features, lr, epochs, data, weight_decay)

features: 3
learning rate: 0.01
Epochs: 11
weight decay: 0.02
stopping: 0.001

Epoch :    1  | Train : 1.064  | Valid : 1.105  | Time : 1.10499s
Epoch :   11  | Train : 0.797  | Valid : 1.063  | Time : 8.52542s
Training stopped:
Epoch :   11  | Train Loss : 0.797  | Valid Loss: 1.063  | Test Loss: 1.076
Time used = 10.035988458000247
Saving Model
features: 3
learning rate: 0.01
Epochs: 11
weight decay: 0.2
stopping: 0.001

Epoch :    1  | Train : 1.067  | Valid : 1.108  | Time : 0.88552s
Epoch :   11  | Train : 0.807  | Valid : 1.064  | Time : 8.37541s
Training stopped:
Epoch :   11  | Train Loss : 0.807  | Valid Loss: 1.064  | Test Loss: 1.079
Time used = 9.671038999999837
Saving Model
features: 3
learning rate: 0.01
Epochs: 11
weight decay: 2
stopping: 0.001

Epoch :    1  | Train : 1.095  | Valid : 1.133  | Time : 0.86341s
Training stopped:
Epoch :    9  | Train Loss : 0.914  | Valid Loss: 1.109  | Test Loss: 1.127
Time used = 8.003226333999919
Saving Model
features: 3
learning rate

Epoch :   11  | Train : 0.871  | Valid : 1.113  | Time : 8.66315s
Training stopped:
Epoch :   11  | Train Loss : 0.871  | Valid Loss: 1.113  | Test Loss: 1.127
Time used = 10.228711416999886
Saving Model
features: 150
learning rate: 0.001
Epochs: 11
weight decay: 0.02
stopping: 0.001

Epoch :    1  | Train : 1.277  | Valid : 1.285  | Time : 0.94892s
Epoch :   11  | Train : 1.099  | Valid : 1.165  | Time : 8.65603s
Training stopped:
Epoch :   11  | Train Loss : 1.099  | Valid Loss: 1.165  | Test Loss: 1.172
Time used = 10.174939041000016
Saving Model
features: 150
learning rate: 0.001
Epochs: 11
weight decay: 0.2
stopping: 0.001

Epoch :    1  | Train : 1.275  | Valid : 1.284  | Time : 0.87068s
Epoch :   11  | Train : 1.101  | Valid : 1.164  | Time : 8.61623s
Training stopped:
Epoch :   11  | Train Loss : 1.101  | Valid Loss: 1.164  | Test Loss: 1.171
Time used = 10.049095542000032
Saving Model
features: 150
learning rate: 0.001
Epochs: 11
weight decay: 2
stopping: 0.001

Epoch :    1  

Training stopped:
Epoch :   10  | Train Loss : 0.896  | Valid Loss: 1.109  | Test Loss: 1.127
Time used = 9.109244833000048
Saving Model
features: 30
learning rate: 0.001
Epochs: 101
weight decay: 0.02
stopping: 0.001

Epoch :    1  | Train : 1.163  | Valid : 1.171  | Time : 0.89709s
Epoch :   11  | Train : 1.061  | Valid : 1.109  | Time : 8.31305s
Epoch :   21  | Train : 1.013  | Valid : 1.093  | Time : 8.42942s
Training stopped:
Epoch :   23  | Train Loss : 1.005  | Valid Loss: 1.091  | Test Loss: 1.102
Time used = 19.756233875000362
Saving Model
features: 30
learning rate: 0.001
Epochs: 101
weight decay: 0.2
stopping: 0.001

Epoch :    1  | Train : 1.163  | Valid : 1.172  | Time : 0.85016s
Epoch :   11  | Train : 1.064  | Valid : 1.111  | Time : 8.28351s
Epoch :   21  | Train : 1.018  | Valid : 1.095  | Time : 8.32054s
Training stopped:
Epoch :   23  | Train Loss : 1.010  | Valid Loss: 1.093  | Test Loss: 1.105
Time used = 19.68493279200038
Saving Model
features: 30
learning rate: 0

Epoch :   71  | Train : 1.273  | Valid : 1.339  | Time : 9.19351s
Epoch :   81  | Train : 1.249  | Valid : 1.322  | Time : 10.08673s
Epoch :   91  | Train : 1.228  | Valid : 1.307  | Time : 9.22021s
Epoch :  101  | Train : 1.209  | Valid : 1.295  | Time : 9.07202s
Training stopped:
Epoch :  101  | Train Loss : 1.209  | Valid Loss: 1.295  | Test Loss: 1.297
Time used = 93.5205822909993
Saving Model
features: 300
learning rate: 0.0001
Epochs: 101
weight decay: 0.2
stopping: 0.001

Epoch :    1  | Train : 1.731  | Valid : 1.728  | Time : 0.94285s
Epoch :   11  | Train : 1.578  | Valid : 1.589  | Time : 11.09126s
Epoch :   21  | Train : 1.481  | Valid : 1.504  | Time : 9.12144s
Epoch :   31  | Train : 1.413  | Valid : 1.447  | Time : 8.99294s
Epoch :   41  | Train : 1.363  | Valid : 1.406  | Time : 9.01228s
Epoch :   51  | Train : 1.324  | Valid : 1.375  | Time : 8.89042s
Epoch :   61  | Train : 1.292  | Valid : 1.351  | Time : 9.04131s
Epoch :   71  | Train : 1.266  | Valid : 1.331  | Tim