In [1]:
import sys
tmp = set(sys.path)
tmp.add("/Users/leowan/Desktop/git/")
sys.path = list(tmp)
print(sys.path)

['', '/anaconda3/lib/python3.6', '/anaconda3/lib/python36.zip', '/anaconda3/lib/python3.6/site-packages', '/anaconda3/lib/python3.6/site-packages/IPython/extensions', '/Users/leowan/Desktop/git/', '/anaconda3/lib/python3.6/lib-dynload', '/anaconda3/lib/python3.6/site-packages/aeosa', '/Users/leowan/.ipython']


In [None]:
# download dataset
# download datasets
import urllib.request
dataset_prefix = 'http://s3.amazonaws.com/cornell-tech-sdl-openrec'
urllib.request.urlretrieve('%s/lastfm/lastfm_test.npy' % dataset_prefix, 
                   'lastfm_test.npy')
urllib.request.urlretrieve('%s/lastfm/lastfm_train.npy' % dataset_prefix, 
                   'lastfm_train.npy')

In [9]:
import numpy as np

In [3]:
import tensorflow as tf
import recsys.recommenders.recommender_base as recommender_base
import recsys.modules.extractions.embedding_layer as embedding_layer
import recsys.modules.interactions.mlp_softmax as mlp_softmax

In [10]:
import imp
recommender_base = imp.reload(recommender_base)
embedding_layer = imp.reload(embedding_layer)
mlp_softmax = imp.reload(mlp_softmax)

In [4]:
def VanlillaMlpRec(batch_size, dim_item_embed, max_seq_len, total_items,
        l2_reg_embed=None, l2_reg_mlp=None, dropout=None, init_model_dir=None,
        save_model_dir='VanlillaMlpRec/', train=True, serve=False):
    
    rec = recommender_base.Recommender(init_model_dir=init_model_dir,
                      save_model_dir=save_model_dir, train=train, serve=serve)

    
    @rec.traingraph.inputgraph(outs=['seq_item_id', 'seq_len', 'label'])
    def train_input_graph(subgraph):
      
        subgraph['seq_item_id'] = tf.placeholder(tf.int32, 
                                      shape=[batch_size, max_seq_len],
                                      name='seq_item_id')
        subgraph['seq_len'] = tf.placeholder(tf.int32, 
                                      shape=[batch_size], 
                                      name='seq_len')
        subgraph['label'] = tf.placeholder(tf.int32, 
                                      shape=[batch_size], 
                                      name='label')
        
        subgraph.register_global_input_mapping({'seq_item_id': subgraph['seq_item_id'],
                                                'seq_len': subgraph['seq_len'],
                                                'label': subgraph['label']})
        
        
    @rec.servegraph.inputgraph(outs=['seq_item_id', 'seq_len'])
    def serve_input_graph(subgraph):
        subgraph['seq_item_id'] = tf.placeholder(tf.int32, 
                                      shape=[None, max_seq_len],
                                      name='seq_item_id')
        subgraph['seq_len'] = tf.placeholder(tf.int32, 
                                      shape=[None],
                                      name='seq_len')
        subgraph.register_global_input_mapping({'seq_item_id': subgraph['seq_item_id'],
                                                'seq_len': subgraph['seq_len']})

    
    @rec.traingraph.itemgraph(ins=['seq_item_id'], outs=['seq_vec'])
    @rec.servegraph.itemgraph(ins=['seq_item_id'], outs=['seq_vec'])
    def item_graph(subgraph):
        _, subgraph['seq_vec']= embedding_layer.apply(l2_reg=l2_reg_embed,
                                      init='normal',
                                      id_=subgraph['seq_item_id'],
                                      shape=[total_items,dim_item_embed],
                                      subgraph=subgraph,
                                      scope='item')
        
    
    @rec.traingraph.interactiongraph(ins=['seq_vec', 'seq_len', 'label'])
    def train_interaction_graph(subgraph):
        mlp_softmax.apply(user=None,
                   item=subgraph['seq_vec'],
                   seq_len=subgraph['seq_len'],
                   max_seq_len=max_seq_len,
                   dims=[dim_item_embed, total_items],
                   l2_reg=l2_reg_mlp,
                   labels=subgraph['label'],
                   dropout=dropout,
                   train=True,
                   subgraph=subgraph,
                   scope='MLPSoftmax'
                  )

        
    @rec.servegraph.interactiongraph(ins=['seq_vec', 'seq_len'])
    def serve_interaction_graph(subgraph):
        mlp_softmax.apply(user=None,
                   item=subgraph['seq_vec'],
                   seq_len=subgraph['seq_len'],
                   max_seq_len=max_seq_len,
                   dims=[dim_item_embed, total_items],
                   l2_reg=l2_reg_mlp,
                   train=False,
                   subgraph=subgraph,
                   scope='MLPSoftmax'
                   )

        
    @rec.traingraph.optimizergraph
    def optimizer_graph(subgraph):
        losses = tf.add_n(subgraph.get_global_losses())
        optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
        subgraph.register_global_operation(optimizer.minimize(losses))
    
    
    @rec.traingraph.connector
    @rec.servegraph.connector
    def connect(graph):
        graph.itemgraph['seq_item_id'] = graph.inputgraph['seq_item_id']
        graph.interactiongraph['seq_len'] = graph.inputgraph['seq_len']
        graph.interactiongraph['seq_vec'] = graph.itemgraph['seq_vec']

        
    @rec.traingraph.connector.extend
    def train_connect(graph):
        graph.interactiongraph['label'] = graph.inputgraph['label']


    return rec

In [41]:
# dataset
import recsys.dataset as dataset
total_users = 992   
total_items = 14598
train_data = np.load('lastfm_train.npy')
test_data = np.load('lastfm_test.npy')
train_data[:2], test_data[:2]

(array([(0, 1304, 1241478537), (0, 1036, 1241445250)],
       dtype=[('user_id', '<i4'), ('item_id', '<i4'), ('ts', '<i4')]),
 array([(1, 282, 1240944095), (1, 282, 1240943945)],
       dtype=[('user_id', '<i4'), ('item_id', '<i4'), ('ts', '<i4')]))

In [42]:
# hyperparamerters
dim_item_embed = 50     # dimension of item embedding
max_seq_len = 100       # the maxium length of user's listen history
total_iter = int(1e3)   # iterations for training 
batch_size = 100        # training batch size
eval_iter = 100         # iteration of evaluation
save_iter = eval_iter   # iteration of saving model   

In [43]:
# model
model = VanlillaMlpRec(batch_size=batch_size,
    total_items=train_dataset.total_items(),
    max_seq_len=max_seq_len,
    dim_item_embed=dim_item_embed,
    save_model_dir='VanlillaMlpRec/',
    train=True, 
    serve=True)

In [44]:
# evaluators
import recsys.metrics.auc as auc
import recsys.metrics.recall as recall

auc_evaluator = auc.AUC()
recall_evaluator = recall.Recall(recall_at=[100, 200, 300, 400, 500])

In [45]:
# datasets
import recsys.dataset as dataset
dataset = imp.reload(dataset)
train_dataset = dataset.Dataset(train_data, total_users, total_items, 
                        sortby='ts', name='Train')
test_dataset = dataset.Dataset(test_data, total_users, total_items, 
                       sortby='ts', name='Test')

In [46]:
# sampler
import recsys.samplers.temporal_sampler as temporal_sampler
train_sampler = temporal_sampler.create_training_sampler(batch_size=batch_size, max_seq_len=max_seq_len, 
                                dataset=train_dataset, num_process=1)
test_sampler = temporal_sampler.create_evaluation_sampler(dataset=test_dataset, 
                                         max_seq_len=max_seq_len)

In [47]:
# trainer
import recsys.model_trainer as model_trainer
model_trainer = imp.reload(model_trainer)
trainer = model_trainer.ModelTrainer(model=model)

In [48]:
# train/test
trainer.train(total_iter=total_iter, 
    eval_iter=eval_iter,
    save_iter=save_iter,
    train_sampler=train_sampler,
    eval_samplers=[test_sampler], 
    evaluators=[auc_evaluator, recall_evaluator])

[34m[Training starts, total_iter: 1000, eval_iter: 100, save_iter: 100][0m
[31m[iter 100][0m Model saved.
[31m[iter 100][0m loss: 9.495581
[32m..(dataset: Test) evaluation[0m
INFO:tensorflow:Restoring parameters from VanlillaMlpRec/model.ckpt
[32m..(dataset: Test)[0m AUC 0.6901089264917449
[32m..(dataset: Test)[0m Recall 0.06333333333333334 0.09333333333333334 0.13333333333333333 0.16 0.18
[31m[iter 200][0m Model saved.
[31m[iter 200][0m loss: 9.008132
[32m..(dataset: Test) evaluation[0m
INFO:tensorflow:Restoring parameters from VanlillaMlpRec/model.ckpt
[32m..(dataset: Test)[0m AUC 0.7058904797789499
[32m..(dataset: Test)[0m Recall 0.06666666666666667 0.08666666666666667 0.14 0.17 0.19
[31m[iter 300][0m Model saved.
[31m[iter 300][0m loss: 8.906515
[32m..(dataset: Test) evaluation[0m
INFO:tensorflow:Restoring parameters from VanlillaMlpRec/model.ckpt
[32m..(dataset: Test)[0m AUC 0.7177821926879953
[32m..(dataset: Test)[0m Recall 0.07666666666666666 0.10