In [2]:
import os
import numpy as np
import shutil
from scipy import sparse

import matplotlib.pyplot as plt
%matplotlib inline

import tensorflow as tf
from tensorflow.contrib.layers import apply_regularization, l2_regularizer
import bottleneck as bn

os.environ["CUDA_VISIBLE_DEVICES"]="6"

In [5]:
import vaecf_util as util
import vaecf as model
import vaecf_metric as metric
import vaecf_preprocess as preprocess
import vaecf_train as trainer
import vaecf_evaluate as evaluator

In [49]:
import imp
util = imp.reload(util)
preprocess = imp.reload(preprocess)
trainer = imp.reload(trainer)
evaluator = imp.reload(evaluator)
metric = imp.reload(metric)

In [50]:
### Load meta dict
ITEM_POOL_DIR = '../item_metadata/'
def load_item_title_dict(fn):
    import json
    """
        load sid title
    """
    sid2title = {}
    with open(fn, 'r') as fd:
        for line in fd:
            line = line.rstrip()
            elems = line.split('\t')
            sid = elems[0]
            meta = json.loads(elems[1])
            sid2title[sid] = meta['candidateItem']['[rus.cell.PbHcCandidateItem.homecardData.homecardData]']['title']
    return sid2title
sid2title = load_item_title_dict(ITEM_POOL_DIR + 'item_meta_json.txt')

def get_sid_title(sid):
    if sid not in sid2title:
        return ''
    return sid2title[sid]

In [51]:
### Pre-process data

In [52]:
MODEL_DIR = '../model/'
DATA_DIR = '../data/'
fn_uid_sids = 'uid_sids.txt'

In [53]:
## Refine data, prune no-meta-recorded sids
def prune_data(fn, sid2title):
    """
        Refine data, prune no-meta-recorded sids
    """
    refined_data = []
    with open(fn, 'r') as fd:
        for line in fd:
            line = line.rstrip()
            elems = line.split(',')
            if len(elems) != 2:
                continue
            uid = elems[0]
            sids_raw = elems[1].split('|')
            sids = []
            for sid in sids_raw:
                if sid in sid2title:
                    sids.append(sid)
            refined_data.append((uid, sids))
    with open(fn + '.refine', 'w') as fd:
        for uid, sids in refined_data:
            fd.write('{},{}\n'.format(uid, '|'.join(sids)))
    return

prune_data(DATA_DIR + 'uid_sids.txt', sid2title)
fn_uid_sids = 'uid_sids.txt.refine'

In [54]:
idx2uid, uid2idx, idx2sid, sid2idx, uid_sids_dict, n_users, n_items, train_data, vad_data_tr, vad_data_te, test_data_tr, test_data_te =\
    preprocess.preprocess_data(data_dir=DATA_DIR, fn_uid_sids=fn_uid_sids)

filter out items watched by and users watches less than:5
../data/data_train.txt ../data/data_dev.txt ../data/data_test.txt
n_users:61437
n_items:10000


In [55]:
### train
trainer = imp.reload(trainer)
metric = imp.reload(metric)

In [56]:
vae = trainer.train(n_users, n_items, train_data, vad_data_tr, vad_data_te, test_data_tr, test_data_te)

INFO:tensorflow:Scale of 0 disables regularizer.
log directory: ./log/VAE_anneal200.0K_cap2.0E-01/I-600-200-600-I
chkpt directory: ./chkpt/VAE_anneal200.0K_cap2.0E-01/I-600-200-600-I
epoch:0
  batch_num:0 start_index:0 end_index:500 N_vad:6143
  batch_num:10 start_index:5000 end_index:5500 N_vad:6143
  batch_num:20 start_index:10000 end_index:10500 N_vad:6143
  batch_num:30 start_index:15000 end_index:15500 N_vad:6143
  batch_num:40 start_index:20000 end_index:20500 N_vad:6143
  batch_num:50 start_index:25000 end_index:25500 N_vad:6143
  batch_num:60 start_index:30000 end_index:30500 N_vad:6143
  batch_num:70 start_index:35000 end_index:35500 N_vad:6143
  batch_num:80 start_index:40000 end_index:40500 N_vad:6143
  batch_num:90 start_index:45000 end_index:45500 N_vad:6143
  batch_num:0 start_index:0 end_index:500 N_vad:6143
  batch_num:10 start_index:5000 end_index:5500 N_vad:6143
  cur_ndcg:0.35297295585805105 best_ndcg:-inf
epoch:1
  batch_num:0 start_index:0 end_index:500 N_vad:6143


  batch_num:60 start_index:30000 end_index:30500 N_vad:6143
  batch_num:70 start_index:35000 end_index:35500 N_vad:6143
  batch_num:80 start_index:40000 end_index:40500 N_vad:6143
  batch_num:90 start_index:45000 end_index:45500 N_vad:6143
  batch_num:0 start_index:0 end_index:500 N_vad:6143
  batch_num:10 start_index:5000 end_index:5500 N_vad:6143
  cur_ndcg:0.6770113857334643 best_ndcg:0.674453504949259
epoch:11
  batch_num:0 start_index:0 end_index:500 N_vad:6143
  batch_num:10 start_index:5000 end_index:5500 N_vad:6143
  batch_num:20 start_index:10000 end_index:10500 N_vad:6143
  batch_num:30 start_index:15000 end_index:15500 N_vad:6143
  batch_num:40 start_index:20000 end_index:20500 N_vad:6143
  batch_num:50 start_index:25000 end_index:25500 N_vad:6143
  batch_num:60 start_index:30000 end_index:30500 N_vad:6143
  batch_num:70 start_index:35000 end_index:35500 N_vad:6143
  batch_num:80 start_index:40000 end_index:40500 N_vad:6143
  batch_num:90 start_index:45000 end_index:45500 N_

  batch_num:20 start_index:10000 end_index:10500 N_vad:6143
  batch_num:30 start_index:15000 end_index:15500 N_vad:6143
  batch_num:40 start_index:20000 end_index:20500 N_vad:6143
  batch_num:50 start_index:25000 end_index:25500 N_vad:6143
  batch_num:60 start_index:30000 end_index:30500 N_vad:6143
  batch_num:70 start_index:35000 end_index:35500 N_vad:6143
  batch_num:80 start_index:40000 end_index:40500 N_vad:6143
  batch_num:90 start_index:45000 end_index:45500 N_vad:6143
  batch_num:0 start_index:0 end_index:500 N_vad:6143
  batch_num:10 start_index:5000 end_index:5500 N_vad:6143
  cur_ndcg:0.6875463074780416 best_ndcg:0.6868020765197738
epoch:22
  batch_num:0 start_index:0 end_index:500 N_vad:6143
  batch_num:10 start_index:5000 end_index:5500 N_vad:6143
  batch_num:20 start_index:10000 end_index:10500 N_vad:6143
  batch_num:30 start_index:15000 end_index:15500 N_vad:6143
  batch_num:40 start_index:20000 end_index:20500 N_vad:6143
  batch_num:50 start_index:25000 end_index:25500 N

  batch_num:10 start_index:5000 end_index:5500 N_vad:6143
  cur_ndcg:0.6905995324006916 best_ndcg:0.6911681561462192
epoch:32
  batch_num:0 start_index:0 end_index:500 N_vad:6143
  batch_num:10 start_index:5000 end_index:5500 N_vad:6143
  batch_num:20 start_index:10000 end_index:10500 N_vad:6143
  batch_num:30 start_index:15000 end_index:15500 N_vad:6143
  batch_num:40 start_index:20000 end_index:20500 N_vad:6143
  batch_num:50 start_index:25000 end_index:25500 N_vad:6143
  batch_num:60 start_index:30000 end_index:30500 N_vad:6143
  batch_num:70 start_index:35000 end_index:35500 N_vad:6143
  batch_num:80 start_index:40000 end_index:40500 N_vad:6143
  batch_num:90 start_index:45000 end_index:45500 N_vad:6143
  batch_num:0 start_index:0 end_index:500 N_vad:6143
  batch_num:10 start_index:5000 end_index:5500 N_vad:6143
  cur_ndcg:0.6907019761365397 best_ndcg:0.6911681561462192
epoch:33
  batch_num:0 start_index:0 end_index:500 N_vad:6143
  batch_num:10 start_index:5000 end_index:5500 N_va

  batch_num:70 start_index:35000 end_index:35500 N_vad:6143
  batch_num:80 start_index:40000 end_index:40500 N_vad:6143
  batch_num:90 start_index:45000 end_index:45500 N_vad:6143
  batch_num:0 start_index:0 end_index:500 N_vad:6143
  batch_num:10 start_index:5000 end_index:5500 N_vad:6143
  cur_ndcg:0.6923342928777261 best_ndcg:0.6932705998855634
epoch:43
  batch_num:0 start_index:0 end_index:500 N_vad:6143
  batch_num:10 start_index:5000 end_index:5500 N_vad:6143
  batch_num:20 start_index:10000 end_index:10500 N_vad:6143
  batch_num:30 start_index:15000 end_index:15500 N_vad:6143
  batch_num:40 start_index:20000 end_index:20500 N_vad:6143
  batch_num:50 start_index:25000 end_index:25500 N_vad:6143
  batch_num:60 start_index:30000 end_index:30500 N_vad:6143
  batch_num:70 start_index:35000 end_index:35500 N_vad:6143
  batch_num:80 start_index:40000 end_index:40500 N_vad:6143
  batch_num:90 start_index:45000 end_index:45500 N_vad:6143
  batch_num:0 start_index:0 end_index:500 N_vad:61

  batch_num:30 start_index:15000 end_index:15500 N_vad:6143
  batch_num:40 start_index:20000 end_index:20500 N_vad:6143
  batch_num:50 start_index:25000 end_index:25500 N_vad:6143
  batch_num:60 start_index:30000 end_index:30500 N_vad:6143
  batch_num:70 start_index:35000 end_index:35500 N_vad:6143
  batch_num:80 start_index:40000 end_index:40500 N_vad:6143
  batch_num:90 start_index:45000 end_index:45500 N_vad:6143
  batch_num:0 start_index:0 end_index:500 N_vad:6143
  batch_num:10 start_index:5000 end_index:5500 N_vad:6143
  cur_ndcg:0.6946063801270245 best_ndcg:0.693489412668432
epoch:54
  batch_num:0 start_index:0 end_index:500 N_vad:6143
  batch_num:10 start_index:5000 end_index:5500 N_vad:6143
  batch_num:20 start_index:10000 end_index:10500 N_vad:6143
  batch_num:30 start_index:15000 end_index:15500 N_vad:6143
  batch_num:40 start_index:20000 end_index:20500 N_vad:6143
  batch_num:50 start_index:25000 end_index:25500 N_vad:6143
  batch_num:60 start_index:30000 end_index:30500 N_

  cur_ndcg:0.6936273723695429 best_ndcg:0.6955467308681201
epoch:64
  batch_num:0 start_index:0 end_index:500 N_vad:6143
  batch_num:10 start_index:5000 end_index:5500 N_vad:6143
  batch_num:20 start_index:10000 end_index:10500 N_vad:6143
  batch_num:30 start_index:15000 end_index:15500 N_vad:6143
  batch_num:40 start_index:20000 end_index:20500 N_vad:6143
  batch_num:50 start_index:25000 end_index:25500 N_vad:6143
  batch_num:60 start_index:30000 end_index:30500 N_vad:6143
  batch_num:70 start_index:35000 end_index:35500 N_vad:6143
  batch_num:80 start_index:40000 end_index:40500 N_vad:6143
  batch_num:90 start_index:45000 end_index:45500 N_vad:6143
  batch_num:0 start_index:0 end_index:500 N_vad:6143
  batch_num:10 start_index:5000 end_index:5500 N_vad:6143
  cur_ndcg:0.6922523097860063 best_ndcg:0.6955467308681201
epoch:65
  batch_num:0 start_index:0 end_index:500 N_vad:6143
  batch_num:10 start_index:5000 end_index:5500 N_vad:6143
  batch_num:20 start_index:10000 end_index:10500 N_

  batch_num:80 start_index:40000 end_index:40500 N_vad:6143
  batch_num:90 start_index:45000 end_index:45500 N_vad:6143
  batch_num:0 start_index:0 end_index:500 N_vad:6143
  batch_num:10 start_index:5000 end_index:5500 N_vad:6143
  cur_ndcg:0.6934703642754791 best_ndcg:0.6955467308681201
epoch:75
  batch_num:0 start_index:0 end_index:500 N_vad:6143
  batch_num:10 start_index:5000 end_index:5500 N_vad:6143
  batch_num:20 start_index:10000 end_index:10500 N_vad:6143
  batch_num:30 start_index:15000 end_index:15500 N_vad:6143
  batch_num:40 start_index:20000 end_index:20500 N_vad:6143
  batch_num:50 start_index:25000 end_index:25500 N_vad:6143
  batch_num:60 start_index:30000 end_index:30500 N_vad:6143
  batch_num:70 start_index:35000 end_index:35500 N_vad:6143
  batch_num:80 start_index:40000 end_index:40500 N_vad:6143
  batch_num:90 start_index:45000 end_index:45500 N_vad:6143
  batch_num:0 start_index:0 end_index:500 N_vad:6143
  batch_num:10 start_index:5000 end_index:5500 N_vad:6143

  batch_num:40 start_index:20000 end_index:20500 N_vad:6143
  batch_num:50 start_index:25000 end_index:25500 N_vad:6143
  batch_num:60 start_index:30000 end_index:30500 N_vad:6143
  batch_num:70 start_index:35000 end_index:35500 N_vad:6143
  batch_num:80 start_index:40000 end_index:40500 N_vad:6143
  batch_num:90 start_index:45000 end_index:45500 N_vad:6143
  batch_num:0 start_index:0 end_index:500 N_vad:6143
  batch_num:10 start_index:5000 end_index:5500 N_vad:6143
  cur_ndcg:0.6946994754181118 best_ndcg:0.6955467308681201
epoch:86
  batch_num:0 start_index:0 end_index:500 N_vad:6143
  batch_num:10 start_index:5000 end_index:5500 N_vad:6143
  batch_num:20 start_index:10000 end_index:10500 N_vad:6143
  batch_num:30 start_index:15000 end_index:15500 N_vad:6143
  batch_num:40 start_index:20000 end_index:20500 N_vad:6143
  batch_num:50 start_index:25000 end_index:25500 N_vad:6143
  batch_num:60 start_index:30000 end_index:30500 N_vad:6143
  batch_num:70 start_index:35000 end_index:35500 N

  batch_num:10 start_index:5000 end_index:5500 N_vad:6143
  batch_num:20 start_index:10000 end_index:10500 N_vad:6143
  batch_num:30 start_index:15000 end_index:15500 N_vad:6143
  batch_num:40 start_index:20000 end_index:20500 N_vad:6143
  batch_num:50 start_index:25000 end_index:25500 N_vad:6143
  batch_num:60 start_index:30000 end_index:30500 N_vad:6143
  batch_num:70 start_index:35000 end_index:35500 N_vad:6143
  batch_num:80 start_index:40000 end_index:40500 N_vad:6143
  batch_num:90 start_index:45000 end_index:45500 N_vad:6143
  batch_num:0 start_index:0 end_index:500 N_vad:6143
  batch_num:10 start_index:5000 end_index:5500 N_vad:6143
  cur_ndcg:0.6945585251986395 best_ndcg:0.6956252205168736
epoch:97
  batch_num:0 start_index:0 end_index:500 N_vad:6143
  batch_num:10 start_index:5000 end_index:5500 N_vad:6143
  batch_num:20 start_index:10000 end_index:10500 N_vad:6143
  batch_num:30 start_index:15000 end_index:15500 N_vad:6143
  batch_num:40 start_index:20000 end_index:20500 N_v

  batch_num:10 start_index:5000 end_index:5500 N_vad:6143
  cur_ndcg:0.6937666824874661 best_ndcg:0.6960606289418534
epoch:107
  batch_num:0 start_index:0 end_index:500 N_vad:6143
  batch_num:10 start_index:5000 end_index:5500 N_vad:6143
  batch_num:20 start_index:10000 end_index:10500 N_vad:6143
  batch_num:30 start_index:15000 end_index:15500 N_vad:6143
  batch_num:40 start_index:20000 end_index:20500 N_vad:6143
  batch_num:50 start_index:25000 end_index:25500 N_vad:6143
  batch_num:60 start_index:30000 end_index:30500 N_vad:6143
  batch_num:70 start_index:35000 end_index:35500 N_vad:6143
  batch_num:80 start_index:40000 end_index:40500 N_vad:6143
  batch_num:90 start_index:45000 end_index:45500 N_vad:6143
  batch_num:0 start_index:0 end_index:500 N_vad:6143
  batch_num:10 start_index:5000 end_index:5500 N_vad:6143
  cur_ndcg:0.6931977085145197 best_ndcg:0.6960606289418534
epoch:108
  batch_num:0 start_index:0 end_index:500 N_vad:6143
  batch_num:10 start_index:5000 end_index:5500 N_

  batch_num:70 start_index:35000 end_index:35500 N_vad:6143
  batch_num:80 start_index:40000 end_index:40500 N_vad:6143
  batch_num:90 start_index:45000 end_index:45500 N_vad:6143
  batch_num:0 start_index:0 end_index:500 N_vad:6143
  batch_num:10 start_index:5000 end_index:5500 N_vad:6143
  cur_ndcg:0.6961317464013566 best_ndcg:0.6960606289418534
epoch:118
  batch_num:0 start_index:0 end_index:500 N_vad:6143
  batch_num:10 start_index:5000 end_index:5500 N_vad:6143
  batch_num:20 start_index:10000 end_index:10500 N_vad:6143
  batch_num:30 start_index:15000 end_index:15500 N_vad:6143
  batch_num:40 start_index:20000 end_index:20500 N_vad:6143
  batch_num:50 start_index:25000 end_index:25500 N_vad:6143
  batch_num:60 start_index:30000 end_index:30500 N_vad:6143
  batch_num:70 start_index:35000 end_index:35500 N_vad:6143
  batch_num:80 start_index:40000 end_index:40500 N_vad:6143
  batch_num:90 start_index:45000 end_index:45500 N_vad:6143
  batch_num:0 start_index:0 end_index:500 N_vad:6

  batch_num:30 start_index:15000 end_index:15500 N_vad:6143
  batch_num:40 start_index:20000 end_index:20500 N_vad:6143
  batch_num:50 start_index:25000 end_index:25500 N_vad:6143
  batch_num:60 start_index:30000 end_index:30500 N_vad:6143
  batch_num:70 start_index:35000 end_index:35500 N_vad:6143
  batch_num:80 start_index:40000 end_index:40500 N_vad:6143
  batch_num:90 start_index:45000 end_index:45500 N_vad:6143
  batch_num:0 start_index:0 end_index:500 N_vad:6143
  batch_num:10 start_index:5000 end_index:5500 N_vad:6143
  cur_ndcg:0.6944919221517097 best_ndcg:0.6962222673385885
epoch:129
  batch_num:0 start_index:0 end_index:500 N_vad:6143
  batch_num:10 start_index:5000 end_index:5500 N_vad:6143
  batch_num:20 start_index:10000 end_index:10500 N_vad:6143
  batch_num:30 start_index:15000 end_index:15500 N_vad:6143
  batch_num:40 start_index:20000 end_index:20500 N_vad:6143
  batch_num:50 start_index:25000 end_index:25500 N_vad:6143
  batch_num:60 start_index:30000 end_index:30500 

  cur_ndcg:0.6942570012844853 best_ndcg:0.6962222673385885
epoch:139
  batch_num:0 start_index:0 end_index:500 N_vad:6143
  batch_num:10 start_index:5000 end_index:5500 N_vad:6143
  batch_num:20 start_index:10000 end_index:10500 N_vad:6143
  batch_num:30 start_index:15000 end_index:15500 N_vad:6143
  batch_num:40 start_index:20000 end_index:20500 N_vad:6143
  batch_num:50 start_index:25000 end_index:25500 N_vad:6143
  batch_num:60 start_index:30000 end_index:30500 N_vad:6143
  batch_num:70 start_index:35000 end_index:35500 N_vad:6143
  batch_num:80 start_index:40000 end_index:40500 N_vad:6143
  batch_num:90 start_index:45000 end_index:45500 N_vad:6143
  batch_num:0 start_index:0 end_index:500 N_vad:6143
  batch_num:10 start_index:5000 end_index:5500 N_vad:6143
  cur_ndcg:0.6917608201035172 best_ndcg:0.6962222673385885
epoch:140
  batch_num:0 start_index:0 end_index:500 N_vad:6143
  batch_num:10 start_index:5000 end_index:5500 N_vad:6143
  batch_num:20 start_index:10000 end_index:10500 

  batch_num:80 start_index:40000 end_index:40500 N_vad:6143
  batch_num:90 start_index:45000 end_index:45500 N_vad:6143
  batch_num:0 start_index:0 end_index:500 N_vad:6143
  batch_num:10 start_index:5000 end_index:5500 N_vad:6143
  cur_ndcg:0.6929032444127483 best_ndcg:0.6962222673385885
epoch:150
  batch_num:0 start_index:0 end_index:500 N_vad:6143
  batch_num:10 start_index:5000 end_index:5500 N_vad:6143
  batch_num:20 start_index:10000 end_index:10500 N_vad:6143
  batch_num:30 start_index:15000 end_index:15500 N_vad:6143
  batch_num:40 start_index:20000 end_index:20500 N_vad:6143
  batch_num:50 start_index:25000 end_index:25500 N_vad:6143
  batch_num:60 start_index:30000 end_index:30500 N_vad:6143
  batch_num:70 start_index:35000 end_index:35500 N_vad:6143
  batch_num:80 start_index:40000 end_index:40500 N_vad:6143
  batch_num:90 start_index:45000 end_index:45500 N_vad:6143
  batch_num:0 start_index:0 end_index:500 N_vad:6143
  batch_num:10 start_index:5000 end_index:5500 N_vad:614

  batch_num:40 start_index:20000 end_index:20500 N_vad:6143
  batch_num:50 start_index:25000 end_index:25500 N_vad:6143
  batch_num:60 start_index:30000 end_index:30500 N_vad:6143
  batch_num:70 start_index:35000 end_index:35500 N_vad:6143
  batch_num:80 start_index:40000 end_index:40500 N_vad:6143
  batch_num:90 start_index:45000 end_index:45500 N_vad:6143
  batch_num:0 start_index:0 end_index:500 N_vad:6143
  batch_num:10 start_index:5000 end_index:5500 N_vad:6143
  cur_ndcg:0.6925800511107528 best_ndcg:0.6962222673385885
epoch:161
  batch_num:0 start_index:0 end_index:500 N_vad:6143
  batch_num:10 start_index:5000 end_index:5500 N_vad:6143
  batch_num:20 start_index:10000 end_index:10500 N_vad:6143
  batch_num:30 start_index:15000 end_index:15500 N_vad:6143
  batch_num:40 start_index:20000 end_index:20500 N_vad:6143
  batch_num:50 start_index:25000 end_index:25500 N_vad:6143
  batch_num:60 start_index:30000 end_index:30500 N_vad:6143
  batch_num:70 start_index:35000 end_index:35500 

  batch_num:10 start_index:5000 end_index:5500 N_vad:6143
  batch_num:20 start_index:10000 end_index:10500 N_vad:6143
  batch_num:30 start_index:15000 end_index:15500 N_vad:6143
  batch_num:40 start_index:20000 end_index:20500 N_vad:6143
  batch_num:50 start_index:25000 end_index:25500 N_vad:6143
  batch_num:60 start_index:30000 end_index:30500 N_vad:6143
  batch_num:70 start_index:35000 end_index:35500 N_vad:6143
  batch_num:80 start_index:40000 end_index:40500 N_vad:6143
  batch_num:90 start_index:45000 end_index:45500 N_vad:6143
  batch_num:0 start_index:0 end_index:500 N_vad:6143
  batch_num:10 start_index:5000 end_index:5500 N_vad:6143
  cur_ndcg:0.6925251817787059 best_ndcg:0.6962222673385885
epoch:172
  batch_num:0 start_index:0 end_index:500 N_vad:6143
  batch_num:10 start_index:5000 end_index:5500 N_vad:6143
  batch_num:20 start_index:10000 end_index:10500 N_vad:6143
  batch_num:30 start_index:15000 end_index:15500 N_vad:6143
  batch_num:40 start_index:20000 end_index:20500 N_

  batch_num:10 start_index:5000 end_index:5500 N_vad:6143
  cur_ndcg:0.6928305256954953 best_ndcg:0.6962222673385885
epoch:182
  batch_num:0 start_index:0 end_index:500 N_vad:6143
  batch_num:10 start_index:5000 end_index:5500 N_vad:6143
  batch_num:20 start_index:10000 end_index:10500 N_vad:6143
  batch_num:30 start_index:15000 end_index:15500 N_vad:6143
  batch_num:40 start_index:20000 end_index:20500 N_vad:6143
  batch_num:50 start_index:25000 end_index:25500 N_vad:6143
  batch_num:60 start_index:30000 end_index:30500 N_vad:6143
  batch_num:70 start_index:35000 end_index:35500 N_vad:6143
  batch_num:80 start_index:40000 end_index:40500 N_vad:6143
  batch_num:90 start_index:45000 end_index:45500 N_vad:6143
  batch_num:0 start_index:0 end_index:500 N_vad:6143
  batch_num:10 start_index:5000 end_index:5500 N_vad:6143
  cur_ndcg:0.6928912317737742 best_ndcg:0.6962222673385885
epoch:183
  batch_num:0 start_index:0 end_index:500 N_vad:6143
  batch_num:10 start_index:5000 end_index:5500 N_

  batch_num:70 start_index:35000 end_index:35500 N_vad:6143
  batch_num:80 start_index:40000 end_index:40500 N_vad:6143
  batch_num:90 start_index:45000 end_index:45500 N_vad:6143
  batch_num:0 start_index:0 end_index:500 N_vad:6143
  batch_num:10 start_index:5000 end_index:5500 N_vad:6143
  cur_ndcg:0.6932124162501824 best_ndcg:0.6962222673385885
epoch:193
  batch_num:0 start_index:0 end_index:500 N_vad:6143
  batch_num:10 start_index:5000 end_index:5500 N_vad:6143
  batch_num:20 start_index:10000 end_index:10500 N_vad:6143
  batch_num:30 start_index:15000 end_index:15500 N_vad:6143
  batch_num:40 start_index:20000 end_index:20500 N_vad:6143
  batch_num:50 start_index:25000 end_index:25500 N_vad:6143
  batch_num:60 start_index:30000 end_index:30500 N_vad:6143
  batch_num:70 start_index:35000 end_index:35500 N_vad:6143
  batch_num:80 start_index:40000 end_index:40500 N_vad:6143
  batch_num:90 start_index:45000 end_index:45500 N_vad:6143
  batch_num:0 start_index:0 end_index:500 N_vad:6

In [57]:
### evaluate

In [58]:
evaluator.evaluate(n_users, n_items, train_data, vad_data_tr, vad_data_te, test_data_tr, test_data_te)

INFO:tensorflow:Scale of 0 disables regularizer.
chkpt directory: ./chkpt/VAE_anneal200.0K_cap2.0E-01/I-600-200-600-I
INFO:tensorflow:Restoring parameters from ./chkpt/VAE_anneal200.0K_cap2.0E-01/I-600-200-600-I/model
Test NDCG@100=0.69703 (0.00380)
Test Recall@20=0.73074 (0.00404)
Test Recall@50=0.78431 (0.00372)


In [59]:
### predict

In [60]:
print(list(uid2idx.keys())[:20])

['507875849', '318854800', '58475092', '460432350', '510901096', '513748570', '398925575', '18283091', '106969945', '420861843', '452994916', '453563583', '5816970', '116414475', '509114115', '525729164', '136263578', '479411376', '508466581', '525297601']


In [61]:
base_sididx_arr = None
uid = '318854800'
if uid not in uid2idx:
    print('uid not in uid2idx')
    print('len(uid2idx):{}'.format(len(uid2idx)))
else:
    raw_sids = uid_sids_dict[uid]
    sids = []
    for sid in raw_sids:
        if sid in sid2idx:
            sids.append(sid)
    base_sid_arr = sids
    base_sididx_arr = [sid2idx[sid] for sid in base_sid_arr]
print(base_sid_arr)

['U57734695115', 'U57737534783', 'U57740717962', 'U57740717968', 'U57737058230', 'U57734720348', 'U57740717953', 'U57760589370', 'U57737535620', 'U57738400282', 'U57737535626', 'U57761030928', 'U57760589376', 'U57741196702', 'U57760589373', 'U57737058236', 'U57738400369', 'U57740717974', 'U57761030094', 'U57737058209', 'U57737058215', 'U57734720354', 'U57761030082', 'U57734695109', 'U57740717950', 'U57760589382', 'U57734719631']


In [62]:
count = 30
input_sididx_arr = base_sididx_arr[:count]
print([idx2sid[idx] for idx in input_sididx_arr])

['U57734695115', 'U57737534783', 'U57740717962', 'U57740717968', 'U57737058230', 'U57734720348', 'U57740717953', 'U57760589370', 'U57737535620', 'U57738400282', 'U57737535626', 'U57761030928', 'U57760589376', 'U57741196702', 'U57760589373', 'U57737058236', 'U57738400369', 'U57740717974', 'U57761030094', 'U57737058209', 'U57737058215', 'U57734720354', 'U57761030082', 'U57734695109', 'U57740717950', 'U57760589382', 'U57734719631']


In [63]:
scores = evaluator.predict(input_sididx_arr, n_items)

INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Restoring parameters from ./chkpt/VAE_anneal200.0K_cap2.0E-01/I-600-200-600-I/model


In [64]:
scores = scores.ravel()
topk = 30
indices = np.argpartition(-scores, topk)[:topk]
print([idx2sid[sididx] for sididx in indices])
#print(scores[indices][:topk])

['U57737134337', 'U57737098835', 'U57758264247', 'U57760589397', 'U57738400366', 'U57758242242', 'U57740717983', 'U57740762053', 'U57758244162', 'U57736403426', 'U66287601713', 'U57740741686', 'U57737058248', 'U57734695106', 'U57760589394', 'U57738377365', 'U57737058242', 'U57738377362', 'U57734695082', 'U57737058239', 'U57737535614', 'U57758242257', 'U57758244171', 'U57734695091', 'U57760589385', 'U57760589403', 'U57758264241', 'U57738400360', 'U57758242251', 'U57734695085']


In [65]:
## output tag
get_sid_title("U57737098835")

'好饿的小蛇'

In [66]:
# input
arr = [idx2sid[idx] for idx in input_sididx_arr]
[(sid, get_sid_title(sid)) for sid in arr]

[('U57734695115', '狼与鹭鸶'),
 ('U57737534783', '爷爷变成了幽灵'),
 ('U57740717962', '种西瓜'),
 ('U57740717968', '小狗斑斑去郊游'),
 ('U57737058230', '妮妮的糖果'),
 ('U57734720348', '谁吃了蜂蜜'),
 ('U57740717953', '南瓜小七'),
 ('U57760589370', '小木屋的秘密'),
 ('U57737535620', '青蛙只能吸一口'),
 ('U57738400282', '春天卷'),
 ('U57737535626', '玛蒂娜学音乐'),
 ('U57761030928', '巫婆的孩子'),
 ('U57760589376', '最特别的礼物'),
 ('U57741196702', '你们都是我的最爱'),
 ('U57760589373', '冬天里的精灵'),
 ('U57737058236', '爱心小桥'),
 ('U57738400369', '神镜'),
 ('U57740717974', '蚕宝宝去哪儿了'),
 ('U57761030094', '小章鱼遇到大麻烦'),
 ('U57737058209', '绿色的大伞'),
 ('U57737058215', '森林夜校'),
 ('U57734720354', '兔子吓跑了老虎'),
 ('U57761030082', '我不是故意的'),
 ('U57734695109', '小男孩与蝎子'),
 ('U57740717950', '小豚鼠的礼物'),
 ('U57760589382', '小香肠历险记'),
 ('U57734719631', '春天卷')]

In [67]:
# predicted
arr = [idx2sid[sididx] for sididx in indices]
[(sid, get_sid_title(sid)) for sid in arr]

[('U57737134337', '爱米拉的猫'),
 ('U57737098835', '好饿的小蛇'),
 ('U57758264247', '月亮和姑娘'),
 ('U57760589397', '最漂亮的房子'),
 ('U57738400366', '灰毛小羊羔'),
 ('U57758242242', '山鹰与狐狸'),
 ('U57740717983', '小青蛙比肚皮'),
 ('U57740762053', '遇见你，真好'),
 ('U57758244162', '病鹿'),
 ('U57736403426', '冰上的挑战'),
 ('U66287601713', '森林中的三个小矮人'),
 ('U57740741686', '菲菲的温馨圣诞'),
 ('U57737058248', '送星星回家'),
 ('U57734695106', '山震'),
 ('U57760589394', '小狐狸交朋友'),
 ('U57738377365', '掉进井里的狐狸和公山羊'),
 ('U57737058242', '彩色小风车'),
 ('U57738377362', '寡妇与母鸡'),
 ('U57734695082', '小鹿和他的父亲'),
 ('U57737058239', '小鸡学飞'),
 ('U57737535614', '小怪兽'),
 ('U57758242257', '徒劳的寒鸦'),
 ('U57758244171', '狐狸和葡萄'),
 ('U57734695091', '善与恶'),
 ('U57760589385', '贪心的狮子'),
 ('U57760589403', '一个大南瓜'),
 ('U57758264241', '贪心的皇帝和公主'),
 ('U57738400360', '魔术师和老鼠'),
 ('U57758242251', '站在屋顶上的山羊与狼'),
 ('U57734695085', '蚂蚁与屎壳郎')]

In [68]:
# save model
!rm -r '../model/'
SIGNATURE_NAME = "serving_default"

rm: cannot remove ‘../model/’: No such file or directory


In [69]:
model = imp.reload(model)
total_anneal_steps = 200000
anneal_cap = 0.2
p_dims = [200, 600, n_items]
tf.reset_default_graph()
vae = model.MultiVAE(p_dims, lam=0.0)
saver, logits_var, _, _, _ = vae.build_graph()
arch_str = "I-%s-I" % ('-'.join([str(d) for d in vae.dims[1:-1]]))
chkpt_dir = './chkpt/VAE_anneal{}K_cap{:1.1E}/{}'.format(
    total_anneal_steps/1000, anneal_cap, arch_str)

X = sparse.csr_matrix((np.ones_like(base_sididx_arr), (np.zeros_like(base_sididx_arr), base_sididx_arr)), shape=(1, n_items), dtype=np.int16)
if sparse.isspmatrix(X):
    X = X.toarray()
X = X.astype('float32')

with tf.Session() as sess:
    init = tf.global_variables_initializer()
    sess.run(init)
    saver.restore(sess, '{}/model'.format(chkpt_dir))
    pred_val = sess.run(vae.logits, feed_dict={vae.input_ph: X, vae.is_training_ph: 0})
    pred_val[X.nonzero()] = -np.inf
    scores = pred_val
    scores = scores.ravel()
    topk = 30
    indices = np.argpartition(-scores, topk)[:topk]
    print([idx2sid[sididx] for sididx in indices])
    builder = tf.saved_model.builder.SavedModelBuilder(MODEL_DIR)
    builder.add_meta_graph_and_variables(sess, [tf.saved_model.tag_constants.SERVING], signature_def_map= {
            SIGNATURE_NAME: tf.saved_model.signature_def_utils.build_signature_def(
                inputs= {"X": tf.saved_model.utils.build_tensor_info(vae.input_ph)},
                outputs= {"y_values": tf.saved_model.utils.build_tensor_info(vae.logits_top_values),
                          "y_indices": tf.saved_model.utils.build_tensor_info(vae.logits_top_indices)},
                method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME)
            })
    #builder.save(as_text=True)
    builder.save()

INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Restoring parameters from ./chkpt/VAE_anneal200.0K_cap2.0E-01/I-600-200-600-I/model
['U57737134337', 'U57737098835', 'U57758264247', 'U57760589397', 'U57738400366', 'U57758242242', 'U57740717983', 'U57740762053', 'U57758244162', 'U57736403426', 'U66287601713', 'U57740741686', 'U57737058248', 'U57734695106', 'U57760589394', 'U57738377365', 'U57737058242', 'U57738377362', 'U57734695082', 'U57737058239', 'U57737535614', 'U57758242257', 'U57758244171', 'U57734695091', 'U57760589385', 'U57760589403', 'U57758264241', 'U57738400360', 'U57758242251', 'U57734695085']
INFO:tensorflow:No assets to save.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: b'../model/saved_model.pb'


In [70]:
# load model
np.shape(X)

(1, 10000)

In [75]:
pred_val = None
with tf.Session(graph=tf.Graph()) as sess:
    meta_graph_def = tf.saved_model.loader.load(sess, [tf.saved_model.tag_constants.SERVING], MODEL_DIR)
    signature = meta_graph_def.signature_def
    
    X_tensor_name = signature[SIGNATURE_NAME].inputs["X"].name
    y_values_tensor_name = signature[SIGNATURE_NAME].outputs["y_values"].name
    y_indices_tensor_name = signature[SIGNATURE_NAME].outputs["y_indices"].name
    
    X_t = sess.graph.get_tensor_by_name(X_tensor_name)
    y_val_t = sess.graph.get_tensor_by_name(y_values_tensor_name)
    y_ind_t = sess.graph.get_tensor_by_name(y_indices_tensor_name)

    y_val, y_ind = sess.run([y_val_t, y_ind_t], feed_dict={X_t: X})
    print(y_val_t, y_ind_t)

INFO:tensorflow:Restoring parameters from b'../model/variables/variables'
Tensor("TopKV2:0", shape=(?, 500), dtype=float32) Tensor("TopKV2:1", shape=(?, 500), dtype=int32)


In [82]:
topk = 20
arr = [idx2sid[sididx] for sididx in y_ind.tolist()[0][:topk]]
[(sid, get_sid_title(sid)) for sid in arr]

[('U57741196702', '你们都是我的最爱'),
 ('U57760589376', '最特别的礼物'),
 ('U57734719631', '春天卷'),
 ('U57737058230', '妮妮的糖果'),
 ('U57740717968', '小狗斑斑去郊游'),
 ('U57760589382', '小香肠历险记'),
 ('U57737058209', '绿色的大伞'),
 ('U57738400282', '春天卷'),
 ('U57737535626', '玛蒂娜学音乐'),
 ('U57737535620', '青蛙只能吸一口'),
 ('U57761030094', '小章鱼遇到大麻烦'),
 ('U57760589373', '冬天里的精灵'),
 ('U57734695109', '小男孩与蝎子'),
 ('U57740717950', '小豚鼠的礼物'),
 ('U57761030082', '我不是故意的'),
 ('U57740717983', '小青蛙比肚皮'),
 ('U57737058215', '森林夜校'),
 ('U57737534783', '爷爷变成了幽灵'),
 ('U57740717953', '南瓜小七'),
 ('U57734720354', '兔子吓跑了老虎')]