# \__ init \__.py

In [1]:
from __future__ import print_function
from __future__ import division

# data_utils.py

In [2]:
from __future__ import division
from __future__ import print_function

import numpy as np
import pandas as pd

import scipy.sparse as sp

import random

# For automatic dataset downloading
from urllib.request import urlopen
from zipfile import ZipFile
from io import StringIO, BytesIO
import shutil
import os.path

In [3]:
# def data_iterator(data, batch_size):
#     """
#     A simple data iterator from https://indico.io/blog/tensorflow-data-inputs-part1-placeholders-protobufs-queues/
#     :param data: list of numpy tensors that need to be randomly batched across their first dimension.
#     :param batch_size: int, batch_size of data_iterator.
#     Assumes same first dimension size of all numpy tensors.
#     :return: iterator over batches of numpy tensors
#     """
#     # shuffle labels and features
#     max_idx = len(data[0])
#     idxs = np.arange(0, max_idx)
#     np.random.shuffle(idxs)       
#     shuf_data = [dat[idxs] for dat in data]

#     # Does not yield last remainder of size less than batch_size
#     for i in range(max_idx//batch_size):
#         data_batch = [dat[i*batch_size:(i+1)*batch_size] for dat in shuf_data]
#         yield data_batch

**np.random.shuffle( )** will change `idxs` forever.

In [4]:
def map_data(data):
    """
    Map data to proper indices in case they are not in a continues [0, N) range

    Parameters
    ----------
    data : np.int32 arrays

    Returns
    -------
    mapped_data : np.int32 arrays
    n : length of mapped_data

    """
    uniq = list(set(data))

    id_dict = {old: new for new, old in enumerate(sorted(uniq))}
    data = np.array(list(map(lambda x: id_dict[x], data)))
    n = len(uniq)

    return data, id_dict, n

In [5]:
# def download_dataset(dataset, files, data_dir):
#     """ Downloads dataset if files are not present. """

#     if not np.all([os.path.isfile(data_dir + f) for f in files]):
#         url = "http://files.grouplens.org/datasets/movielens/" + dataset.replace('_', '-') + '.zip'
#         request = urlopen(url)

#         print('Downloading %s dataset' % dataset)
#         if dataset in ['ml_100k', 'ml_1m']:
#             target_dir = 'data/' + dataset.replace('_', '-')
#         elif dataset == 'ml_10m':
#             target_dir = 'data/' + 'ml-10M100K'
#         else:
#             raise ValueError('Invalid dataset option %s' % dataset)

#         with ZipFile(BytesIO(request.read())) as zip_ref:
#             zip_ref.extractall('data/')

#         source = [target_dir + '/' + s for s in os.listdir(target_dir)]
#         destination = data_dir+'/'
#         for f in source:
#             shutil.copy(f, destination)

#         shutil.rmtree(target_dir)

In [6]:
def load_data(fname, seed=1234, verbose=True):
    """ Loads dataset and creates adjacency matrix
    and feature matrix

    Parameters
    ----------
    fname: str, dataset
    seed: int, dataset shuffling seed
    verbose: to print out statements or not

    Returns
    -------
    num_users : int
        Number of users and items respectively

    num_items : int

    u_nodes : np.int32 arrays
        User indices

    v_nodes : np.int32 array
        item (movie) indices

    ratings : np.float32 array
        User/item ratings s.t. ratings[k] is the rating given by user u_nodes[k] to
        item v_nodes[k]. Note that that the all pairs u_nodes[k]/v_nodes[k] are unique, but
        not necessarily all u_nodes[k] or all v_nodes[k] separately.

    u_features: np.float32 array, or None
        If present in dataset, contains the features of the users.

    v_features: np.float32 array, or None
        If present in dataset, contains the features of the users.

    seed: int,
        For datashuffling seed with pythons own random.shuffle, as in CF-NADE.

    """

    u_features = None
    v_features = None

    print('Loading dataset', fname)

    data_dir = 'data/' + fname

    if fname == 'ml_100k':

        # Check if files exist and download otherwise
        files = ['/u.data', '/u.item', '/u.user']

        download_dataset(fname, files, data_dir)

        sep = '\t'
        filename = data_dir + files[0]

        dtypes = {
            'u_nodes': np.int32, 'v_nodes': np.int32,
            'ratings': np.float32, 'timestamp': np.float64}
        
        # ----------------------------------------------------------------------------------------------------- # 
        # read
        data = pd.read_csv(
            filename, sep=sep, header=None,
            names=['u_nodes', 'v_nodes', 'ratings', 'timestamp'], dtype=dtypes)
        
        # ----------------------------------------------------------------------------------------------------- # 
        # shuffle here like cf-nade paper with python's own random class
        # make sure to convert to list, otherwise random.shuffle acts weird on it without a warning
        # df.as_matrix() was depriciated after the version 0.23.0
        # so I use df.to_numpy() instead
        data_array = data.to_numpy().tolist()  
        random.seed(seed)
        random.shuffle(data_array)
        data_array = np.array(data_array)

        u_nodes_ratings = data_array[:, 0].astype(dtypes['u_nodes'])
        v_nodes_ratings = data_array[:, 1].astype(dtypes['v_nodes'])
        ratings = data_array[:, 2].astype(dtypes['ratings'])
        
        # map_data will give each entry new index
        # which will be in in a continues [0, N) range
        u_nodes_ratings, u_dict, num_users = map_data(u_nodes_ratings)
        v_nodes_ratings, v_dict, num_items = map_data(v_nodes_ratings)

        # adjust the datatype of u_nodes_ratings, v_nodes_ratings as well as ratings
        u_nodes_ratings, v_nodes_ratings = u_nodes_ratings.astype(np.int64), v_nodes_ratings.astype(np.int32)
        ratings = ratings.astype(np.float64)
        
        # ----------------------------------------------------------------------------------------------------- # 
        # Movie features (genres)
        sep = r'|'
        movie_file = data_dir + files[1]
        movie_headers = ['movie id', 'movie title', 'release date', 'video release date',
                         'IMDb URL', 'unknown', 'Action', 'Adventure', 'Animation',
                         'Childrens', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy',
                         'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi',
                         'Thriller', 'War', 'Western']
        movie_df = pd.read_csv(movie_file, sep=sep, header=None,
                               names=movie_headers, engine='python')

        # genre_headers is still a numpy.array and starts from "Action"
        genre_headers = movie_df.columns.values[6:]    
        num_genres = genre_headers.shape[0]    # the number of features for items(movies)

        v_features = np.zeros((num_items, num_genres), dtype=np.float32)
        for movie_id, g_vec in zip(movie_df['movie id'].values.tolist(), movie_df[genre_headers].values.tolist()):
            # Check if movie_id was listed in ratings file and therefore in mapping dictionary
            # movie_id is obtained from movie_df(movie_file), which corresponds to the old id in mapping
            # we need to compare it with the key of v_dict and v_dict[movie_id] is the new id for item(movie)
            if movie_id in v_dict.keys():
                v_features[v_dict[movie_id], :] = g_vec
        
        # ----------------------------------------------------------------------------------------------------- # 
        # User features
        sep = r'|'
        users_file = data_dir + files[2]
        users_headers = ['user id', 'age', 'gender', 'occupation', 'zip code']
        users_df = pd.read_csv(users_file, sep=sep, header=None,
                               names=users_headers, engine='python')

        occupation = set(users_df['occupation'].values.tolist())

        gender_dict = {'M': 0., 'F': 1.}
        # i will start from 2 rather than 0 since we set `start` to 2
        occupation_dict = {f: i for i, f in enumerate(occupation, start=2)}

        num_feats = 2 + len(occupation_dict)

        u_features = np.zeros((num_users, num_feats), dtype=np.float32)
        for _, row in users_df.iterrows():
            # the datatype of row is Series
            u_id = row['user id']
            if u_id in u_dict.keys():
                # age
                u_features[u_dict[u_id], 0] = row['age']
                # gender
                u_features[u_dict[u_id], 1] = gender_dict[row['gender']]
                # occupation
                # we use one-hot to encode `occupation`
                u_features[u_dict[u_id], occupation_dict[row['occupation']]] = 1.

        # ----------------------------------------------------------------------------------------------------- # 
        # use sparse matrix to store users' and items' features
        u_features = sp.csr_matrix(u_features)
        v_features = sp.csr_matrix(v_features)

    elif fname == 'ml_1m':

        # Check if files exist and download otherwise
        files = ['/ratings.dat', '/movies.dat', '/users.dat']
        download_dataset(fname, files, data_dir)

        sep = r'\:\:'
        filename = data_dir + files[0]

        dtypes = {
            'u_nodes': np.int64, 'v_nodes': np.int64,
            'ratings': np.float32, 'timestamp': np.float64}

        # use engine='python' to ignore warning about switching to python backend when using regexp for sep
        data = pd.read_csv(filename, sep=sep, header=None,
                           names=['u_nodes', 'v_nodes', 'ratings', 'timestamp'], converters=dtypes, engine='python')

        # shuffle here like cf-nade paper with python's own random class
        # make sure to convert to list, otherwise random.shuffle acts weird on it without a warning
        data_array = data.as_matrix().tolist()
        random.seed(seed)
        random.shuffle(data_array)
        data_array = np.array(data_array)

        u_nodes_ratings = data_array[:, 0].astype(dtypes['u_nodes'])
        v_nodes_ratings = data_array[:, 1].astype(dtypes['v_nodes'])
        ratings = data_array[:, 2].astype(dtypes['ratings'])

        u_nodes_ratings, u_dict, num_users = map_data(u_nodes_ratings)
        v_nodes_ratings, v_dict, num_items = map_data(v_nodes_ratings)

        u_nodes_ratings, v_nodes_ratings = u_nodes_ratings.astype(np.int64), v_nodes_ratings.astype(np.int64)
        ratings = ratings.astype(np.float32)

        # Load movie features
        movies_file = data_dir + files[1]

        movies_headers = ['movie_id', 'title', 'genre']
        movies_df = pd.read_csv(movies_file, sep=sep, header=None,
                                names=movies_headers, engine='python')

        # Extracting all genres
        genres = []
        for s in movies_df['genre'].values:
            genres.extend(s.split('|'))

        genres = list(set(genres))
        num_genres = len(genres)

        genres_dict = {g: idx for idx, g in enumerate(genres)}

        # Creating 0 or 1 valued features for all genres
        v_features = np.zeros((num_items, num_genres), dtype=np.float32)
        for movie_id, s in zip(movies_df['movie_id'].values.tolist(), movies_df['genre'].values.tolist()):
            # Check if movie_id was listed in ratings file and therefore in mapping dictionary
            if movie_id in v_dict.keys():
                gen = s.split('|')
                for g in gen:
                    v_features[v_dict[movie_id], genres_dict[g]] = 1.

        # Load user features
        users_file = data_dir + files[2]
        users_headers = ['user_id', 'gender', 'age', 'occupation', 'zip-code']
        users_df = pd.read_csv(users_file, sep=sep, header=None,
                               names=users_headers, engine='python')

        # Extracting all features
        cols = users_df.columns.values[1:]

        cntr = 0
        feat_dicts = []
        for header in cols:
            d = dict()
            feats = np.unique(users_df[header].values).tolist()
            d.update({f: i for i, f in enumerate(feats, start=cntr)})
            feat_dicts.append(d)
            cntr += len(d)

        num_feats = sum(len(d) for d in feat_dicts)

        u_features = np.zeros((num_users, num_feats), dtype=np.float32)
        for _, row in users_df.iterrows():
            u_id = row['user_id']
            if u_id in u_dict.keys():
                for k, header in enumerate(cols):
                    u_features[u_dict[u_id], feat_dicts[k][row[header]]] = 1.

        u_features = sp.csr_matrix(u_features)
        v_features = sp.csr_matrix(v_features)

    elif fname == 'ml_10m':

        # Check if files exist and download otherwise
        files = ['/ratings.dat']
        download_dataset(fname, files, data_dir)

        sep = r'\:\:'

        filename = data_dir + files[0]

        dtypes = {
            'u_nodes': np.int64, 'v_nodes': np.int64,
            'ratings': np.float32, 'timestamp': np.float64}

        # use engine='python' to ignore warning about switching to python backend when using regexp for sep
        data = pd.read_csv(filename, sep=sep, header=None,
                           names=['u_nodes', 'v_nodes', 'ratings', 'timestamp'], converters=dtypes, engine='python')

        # shuffle here like cf-nade paper with python's own random class
        # make sure to convert to list, otherwise random.shuffle acts weird on it without a warning
        data_array = data.as_matrix().tolist()
        random.seed(seed)
        random.shuffle(data_array)
        data_array = np.array(data_array)

        u_nodes_ratings = data_array[:, 0].astype(dtypes['u_nodes'])
        v_nodes_ratings = data_array[:, 1].astype(dtypes['v_nodes'])
        ratings = data_array[:, 2].astype(dtypes['ratings'])

        u_nodes_ratings, u_dict, num_users = map_data(u_nodes_ratings)
        v_nodes_ratings, v_dict, num_items = map_data(v_nodes_ratings)

        u_nodes_ratings, v_nodes_ratings = u_nodes_ratings.astype(np.int64), v_nodes_ratings.astype(np.int64)
        ratings = ratings.astype(np.float32)

    else:
        raise ValueError('Dataset name not recognized: ' + fname)

    if verbose:
        print('Number of users = %d' % num_users)
        print('Number of items = %d' % num_items)
        print('Number of links = %d' % ratings.shape[0])
        print('Fraction of positive links = %.4f' % (float(ratings.shape[0]) / (num_users * num_items),))

    return num_users, num_items, u_nodes_ratings, v_nodes_ratings, ratings, u_features, v_features

# initializations.py

In [7]:
import tensorflow as tf
import numpy as np

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


## weight

In [8]:
def weight_variable_truncated_normal(input_dim, output_dim, name=""):
    """Create a weight variable with truncated normal distribution, values
    that are more than 2 stddev away from the mean are redrawn."""

    initial = tf.truncated_normal([input_dim, output_dim], stddev=0.5)
    return tf.Variable(initial, name=name)


def weight_variable_random_uniform(input_dim, output_dim=None, name=""):
    """Create a weight variable with variables drawn from a
    random uniform distribution. Parameters used are taken from paper by
    Xavier Glorot and Yoshua Bengio:
    http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf"""
    if output_dim is not None:
        init_range = np.sqrt(6.0 / (input_dim + output_dim))
        initial = tf.random_uniform([input_dim, output_dim], minval=-init_range, maxval=init_range, dtype=tf.float32)
    else:
        init_range = np.sqrt(6.0 / input_dim)
        initial = tf.random_uniform([input_dim], minval=-init_range, maxval=init_range, dtype=tf.float32)
    return tf.Variable(initial, name=name)


def weight_variable_random_uniform_relu(input_dim, output_dim, name=""):
    """Create a weight variable with variables drawn from a
    random uniform distribution. Parameters used are taken from paper by
    Xavier Glorot and Yoshua Bengio:
    http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf
    and are optimized for ReLU activation function."""

    init_range = np.sqrt(2.0 / (input_dim + output_dim))
    initial = tf.random_uniform([input_dim, output_dim], minval=-init_range, maxval=init_range, dtype=tf.float32)
    return tf.Variable(initial, name=name)

## bias

In [9]:
def bias_variable_truncated_normal(shape, name=""):
    """Create a bias variable with appropriate initialization."""
    initial = tf.truncated_normal(shape, stddev=0.5)
    return tf.Variable(initial, name=name)


def bias_variable_zero(shape, name=""):
    """Create a bias variable initialized as zero."""
    initial = tf.zeros(shape, dtype=tf.float32)
    return tf.Variable(initial, name=name)


def bias_variable_one(shape, name=""):
    """Create a bias variable initialized as ones."""
    initial = tf.ones(shape, dtype=tf.float32)
    return tf.Variable(initial, name=name)


def orthogonal(shape, scale=1.1, name=None):
    """
    From Lasagne. Reference: Saxe et al., http://arxiv.org/abs/1312.6120
    """
    flat_shape = (shape[0], np.prod(shape[1:]))
    a = np.random.normal(0.0, 1.0, flat_shape)
    u, _, v = np.linalg.svd(a, full_matrices=False)

    # pick the one with the correct shape
    q = u if u.shape == flat_shape else v
    q = q.reshape(shape)
    return tf.Variable(scale * q[:shape[0], :shape[1]], name=name, dtype=tf.float32)


def bias_variable_const(shape, val, name=""):
    """Create a bias variable initialized as zero."""
    value = tf.to_float(val)
    initial = tf.fill(shape, value, name=name)
    return tf.Variable(initial, name=name)

# layers.py

In [10]:
from __future__ import print_function

from gcmc.initializations import *
import tensorflow as tf

In [11]:
# global unique layer ID dictionary for layer name assignment
_LAYER_UIDS = {}

In [12]:
def dot(x, y, sparse=False):
    """Wrapper for tf.matmul (sparse vs dense)."""
    if sparse:
        res = tf.sparse_tensor_dense_matmul(x, y)
    else:
        res = tf.matmul(x, y)
    return res

In [13]:
def get_layer_uid(layer_name=''):
    """Helper function, assigns unique layer IDs
    """
    if layer_name not in _LAYER_UIDS:
        _LAYER_UIDS[layer_name] = 1
        return 1
    else:
        _LAYER_UIDS[layer_name] += 1
        return _LAYER_UIDS[layer_name]

In [14]:
def dropout_sparse(x, keep_prob, num_nonzero_elems):
    """Dropout for sparse tensors. Currently fails for very large sparse tensors (>1M elements)
    """
    noise_shape = [num_nonzero_elems]
    random_tensor = keep_prob
    random_tensor += tf.random_uniform(noise_shape)
    dropout_mask = tf.cast(tf.floor(random_tensor), dtype=tf.bool)
    pre_out = tf.sparse_retain(x, dropout_mask)

    return pre_out * tf.div(1., keep_prob)

**Note:**
 
+ In Python, the single-asterisk form of ***args** can be used as a parameter to send **a non-keyworded variable-length argument list** to functions.
+ The double asterisk form of ****kwargs** is used to pass **a keyworded, variable-length argument dictionary** to a function.
+ When ordering arguments within a function or function call, arguments need to occur in a particular order:
    + Formal positional arguments
    + *args
    + Keyword arguments
    + **kwargs
    
    def example2(arg_1, arg_2, \*args, kw_1="shark", kw_2="blobfish", **kwargs):

In [15]:
class Layer(object):
    """Base layer class. Defines basic API for all layer objects.
    # Properties
        name: String, defines the variable scope of the layer.
            Layers with common name share variables. (TODO)
        logging: Boolean, switches Tensorflow histogram logging on/off
    # Methods
        _call(inputs): Defines computation graph of layer
            (i.e. takes input, returns output)
        __call__(inputs): Wrapper for _call()
        _log_vars(): Log all variables
    """

    def __init__(self, **kwargs):
        # **kwargs will pass a keyword, variable-length argument dictionary
        allowed_kwargs = {'name', 'logging'}
        for kwarg in kwargs.keys():
            assert kwarg in allowed_kwargs, 'Invalid keyword argument: ' + kwarg
        name = kwargs.get('name')
        if not name:
            layer = self.__class__.__name__.lower()
            name = layer + '_' + str(get_layer_uid(layer))
        self.name = name
        self.vars = {}
        # if "logging" doesn't exist in kwargs, logging will be False
        logging = kwargs.get('logging', False)
        self.logging = logging
        self.sparse_inputs = False

    def _call(self, inputs):
        return inputs

    def __call__(self, inputs):
        with tf.name_scope(self.name):
            if self.logging and not self.sparse_inputs:
                tf.summary.histogram(self.name + '/inputs', inputs)
            outputs = self._call(inputs)
            if self.logging:
                tf.summary.histogram(self.name + '/outputs', outputs)
            return outputs

    def _log_vars(self):
        for var in self.vars:
            tf.summary.histogram(self.name + '/vars/' + var, self.vars[var])

## Graph Convolution Layer

In [16]:
class StackGCN(Layer):
    """Graph convolution layer for bipartite graphs and sparse inputs."""

    def __init__(self, 
                 input_dim, output_dim, 
                 support, support_t, num_support, 
                 u_features_nonzero=None, v_features_nonzero=None, 
                 sparse_inputs=False, dropout=0.,
                 act=tf.nn.relu, 
                 share_user_item_weights=True, 
                 **kwargs):
        
        # Python 2.7
        # super(StackGCN, self).__init__(**kwargs)
        # Python 3.6
        super().__init__(**kwargs)
        
        assert output_dim % num_support == 0, 'output_dim must be multiple of num_support for stackGC layer'

        with tf.variable_scope(self.name + '_vars'):
            self.vars['weights_u'] = weight_variable_random_uniform(input_dim, output_dim, name='weights_u')

            if not share_user_item_weights:
                self.vars['weights_v'] = weight_variable_random_uniform(input_dim, output_dim, name='weights_v')

            else:
                self.vars['weights_v'] = self.vars['weights_u']

        self.weights_u = tf.split(value=self.vars['weights_u'], axis=1, num_or_size_splits=num_support)
        self.weights_v = tf.split(value=self.vars['weights_v'], axis=1, num_or_size_splits=num_support)

        self.dropout = dropout

        self.sparse_inputs = sparse_inputs
        self.u_features_nonzero = u_features_nonzero
        self.v_features_nonzero = v_features_nonzero
        if sparse_inputs:
            assert u_features_nonzero is not None and v_features_nonzero is not None, \
                'u_features_nonzero and v_features_nonzero can not be None when sparse_inputs is True'

        # support should be the adjacency matrix, which is stored in the form of sparse matrix
        self.support = tf.sparse_split(axis=1, num_split=num_support, sp_input=support)
        self.support_transpose = tf.sparse_split(axis=1, num_split=num_support, sp_input=support_t)

        # activation function (relu, etc.)
        self.act = act

        if self.logging:
            self._log_vars()

    def _call(self, inputs):
        x_u = inputs[0]
        x_v = inputs[1]

        if self.sparse_inputs:
            # dropout_sparse is a function defined for sparse tensors
            x_u = dropout_sparse(x_u, 1 - self.dropout, self.u_features_nonzero)
            x_v = dropout_sparse(x_v, 1 - self.dropout, self.v_features_nonzero)
        else:
            x_u = tf.nn.dropout(x_u, 1 - self.dropout)
            x_v = tf.nn.dropout(x_v, 1 - self.dropout)

        supports_u = []
        supports_v = []

        # self.support has been splited along the axis 1
        for i in range(len(self.support)):
            tmp_u = dot(x_u, self.weights_u[i], sparse=self.sparse_inputs)
            tmp_v = dot(x_v, self.weights_v[i], sparse=self.sparse_inputs)

            support = self.support[i]
            support_transpose = self.support_transpose[i]

            supports_u.append(tf.sparse_tensor_dense_matmul(support, tmp_v))
            supports_v.append(tf.sparse_tensor_dense_matmul(support_transpose, tmp_u))

        z_u = tf.concat(axis=1, values=supports_u)
        z_v = tf.concat(axis=1, values=supports_v)

        u_outputs = self.act(z_u)
        v_outputs = self.act(z_v)

        return u_outputs, v_outputs

    def __call__(self, inputs):
        with tf.name_scope(self.name):
            if self.logging and not self.sparse_inputs:
                tf.summary.histogram(self.name + '/inputs_u', inputs[0])
                tf.summary.histogram(self.name + '/inputs_v', inputs[1])
            outputs_u, outputs_v = self._call(inputs)
            if self.logging:
                tf.summary.histogram(self.name + '/outputs_u', outputs_u)
                tf.summary.histogram(self.name + '/outputs_v', outputs_v)
            return outputs_u, outputs_v

In [17]:
class OrdinalMixtureGCN(Layer):

    """Graph convolution layer for bipartite graphs and sparse inputs."""

    def __init__(self, input_dim, output_dim, 
                 support, support_t, num_support, 
                 u_features_nonzero=None, v_features_nonzero=None, 
                 sparse_inputs=False, dropout=0.,
                 act=tf.nn.relu, bias=False, 
                 share_user_item_weights=False, 
                 self_connections=False, **kwargs):
        
        # usage in Python 2
        # super(OrdinalMixtureGCN, self).__init__(**kwargs)
        # Python 3
        super().__init__(**kwargs)

        with tf.variable_scope(self.name + '_vars'):

            self.vars['weights_u'] = tf.stack([weight_variable_random_uniform(input_dim, output_dim,
                                                                             name='weights_u_%d' % i)
                                              for i in range(num_support)], axis=0)

            if bias:
                self.vars['bias_u'] = bias_variable_const([output_dim], 0.01, name="bias_u")

            if not share_user_item_weights:
                self.vars['weights_v'] = tf.stack([weight_variable_random_uniform(input_dim, output_dim,
                                                                                 name='weights_v_%d' % i)
                                                  for i in range(num_support)], axis=0)

                if bias:
                    self.vars['bias_v'] = bias_variable_const([output_dim], 0.01, name="bias_v")

            else:
                self.vars['weights_v'] = self.vars['weights_u']
                if bias:
                    self.vars['bias_v'] = self.vars['bias_u']

        self.weights_u = self.vars['weights_u']
        self.weights_v = self.vars['weights_v']

        self.dropout = dropout

        self.sparse_inputs = sparse_inputs
        self.u_features_nonzero = u_features_nonzero
        self.v_features_nonzero = v_features_nonzero
        if sparse_inputs:
            assert u_features_nonzero is not None and v_features_nonzero is not None, \
                'u_features_nonzero and v_features_nonzero can not be None when sparse_inputs is True'

        self.self_connections = self_connections

        self.bias = bias
        support = tf.sparse_split(axis=1, num_split=num_support, sp_input=support)

        support_t = tf.sparse_split(axis=1, num_split=num_support, sp_input=support_t)

        if self_connections:
            self.support = support[:-1]
            self.support_transpose = support_t[:-1]
            self.u_self_connections = support[-1]
            self.v_self_connections = support_t[-1]
            self.weights_u = self.weights_u[:-1]
            self.weights_v = self.weights_v[:-1]
            self.weights_u_self_conn = self.weights_u[-1]
            self.weights_v_self_conn = self.weights_v[-1]

        else:
            self.support = support
            self.support_transpose = support_t
            self.u_self_connections = None
            self.v_self_connections = None
            self.weights_u_self_conn = None
            self.weights_v_self_conn = None

        self.support_nnz = []
        self.support_transpose_nnz = []
        for i in range(len(self.support)):
            nnz = tf.reduce_sum(tf.shape(self.support[i].values))
            self.support_nnz.append(nnz)
            self.support_transpose_nnz.append(nnz)

        self.act = act

        if self.logging:
            self._log_vars()

    def _call(self, inputs):

        if self.sparse_inputs:
            x_u = dropout_sparse(inputs[0], 1 - self.dropout, self.u_features_nonzero)
            x_v = dropout_sparse(inputs[1], 1 - self.dropout, self.v_features_nonzero)
        else:
            x_u = tf.nn.dropout(inputs[0], 1 - self.dropout)
            x_v = tf.nn.dropout(inputs[1], 1 - self.dropout)

        supports_u = []
        supports_v = []

        # self-connections with identity matrix as support
        if self.self_connections:
            uw = dot(x_u, self.weights_u_self_conn, sparse=self.sparse_inputs)
            supports_u.append(tf.sparse_tensor_dense_matmul(self.u_self_connections, uw))

            vw = dot(x_v, self.weights_v_self_conn, sparse=self.sparse_inputs)
            supports_v.append(tf.sparse_tensor_dense_matmul(self.v_self_connections, vw))

        wu = 0.
        wv = 0.
        for i in range(len(self.support)):
            wu += self.weights_u[i]
            wv += self.weights_v[i]

            # multiply feature matrices with weights
            tmp_u = dot(x_u, wu, sparse=self.sparse_inputs)

            tmp_v = dot(x_v, wv, sparse=self.sparse_inputs)

            support = self.support[i]
            support_transpose = self.support_transpose[i]

            # then multiply with rating matrices
            supports_u.append(tf.sparse_tensor_dense_matmul(support, tmp_v))
            supports_v.append(tf.sparse_tensor_dense_matmul(support_transpose, tmp_u))

        z_u = tf.add_n(supports_u)
        z_v = tf.add_n(supports_v)

        if self.bias:
            z_u = tf.nn.bias_add(z_u, self.vars['bias_u'])
            z_v = tf.nn.bias_add(z_v, self.vars['bias_v'])

        u_outputs = self.act(z_u)
        v_outputs = self.act(z_v)

        return u_outputs, v_outputs

    def __call__(self, inputs):
        with tf.name_scope(self.name):
            if self.logging and not self.sparse_inputs:
                tf.summary.histogram(self.name + '/inputs_u', inputs[0])
                tf.summary.histogram(self.name + '/inputs_v', inputs[1])
            outputs_u, outputs_v = self._call(inputs)
            if self.logging:
                tf.summary.histogram(self.name + '/outputs_u', outputs_u)
                tf.summary.histogram(self.name + '/outputs_v', outputs_v)
            return outputs_u, outputs_v

## Dense Layer

In [18]:
class Dense(Layer):
    """Dense layer for two types of nodes in a bipartite graph. """

    def __init__(self, 
                 input_dim, output_dim, 
                 dropout=0., 
                 act=tf.nn.relu, 
                 share_user_item_weights=False,
                 bias=False, 
                 **kwargs):

        # super(Dense, self).__init__(**kwargs)    Python 2.7
        # in Python 3.7
        super().__init__(**kwargs)

        with tf.variable_scope(self.name + '_vars'):
            if not share_user_item_weights:
                # without weight sharing 
                self.vars['weights_u'] = weight_variable_random_uniform(input_dim, output_dim, name="weights_u")
                self.vars['weights_v'] = weight_variable_random_uniform(input_dim, output_dim, name="weights_v")

                if bias:
                    self.vars['user_bias'] = bias_variable_truncated_normal([output_dim], name="bias_u")
                    self.vars['item_bias'] = bias_variable_truncated_normal([output_dim], name="bias_v")


            else:
                self.vars['weights_u'] = weight_variable_random_uniform(input_dim, output_dim, name="weights")
                self.vars['weights_v'] = self.vars['weights_u']

                if bias:
                    self.vars['user_bias'] = bias_variable_truncated_normal([output_dim], name="bias_u")
                    self.vars['item_bias'] = self.vars['user_bias']

        self.bias = bias

        self.dropout = dropout
        self.act = act    # activation function 
        if self.logging:
            self._log_vars()

    def _call(self, inputs):
        x_u = inputs[0]
        x_u = tf.nn.dropout(x_u, 1 - self.dropout)
        x_u = tf.matmul(x_u, self.vars['weights_u'])

        x_v = inputs[1]
        x_v = tf.nn.dropout(x_v, 1 - self.dropout)
        x_v = tf.matmul(x_v, self.vars['weights_v'])

        u_outputs = self.act(x_u)
        v_outputs = self.act(x_v)

        if self.bias:
            u_outputs += self.vars['user_bias']
            v_outputs += self.vars['item_bias']

        return u_outputs, v_outputs

    def __call__(self, inputs):
        with tf.name_scope(self.name):
            if self.logging:
                tf.summary.histogram(self.name + '/inputs_u', inputs[0])
                tf.summary.histogram(self.name + '/inputs_v', inputs[1])
                
            outputs_u, outputs_v = self._call(inputs)
            
            if self.logging:
                tf.summary.histogram(self.name + '/outputs_u', outputs_u)
                tf.summary.histogram(self.name + '/outputs_v', outputs_v)
            return outputs_u, outputs_v

## BilinearMixture (Decode)

In [19]:
class BilinearMixture(Layer):
    """
    Decoder model layer for link-prediction with ratings
    To use in combination with bipartite layers.
    """

    def __init__(self, num_classes, 
                 u_indices, v_indices, 
                 input_dim, 
                 num_users, num_items, 
                 user_item_bias=False,
                 dropout=0., act=tf.nn.softmax, num_weights=3,
                 diagonal=True, **kwargs):
        
        # usage in Python 2
        # super(BilinearMixture, self).__init__(**kwargs)
        # Python 3.6
        super().__init__(**kwargs)
        
        with tf.variable_scope(self.name + '_vars'):

            for i in range(num_weights):
                if diagonal:
                    #  Diagonal weight matrices for each class stored as vectors
                    self.vars['weights_%d' % i] = weight_variable_random_uniform(1, input_dim, name='weights_%d' % i)

                else:
                    self.vars['weights_%d' % i] = orthogonal([input_dim, input_dim], name='weights_%d' % i)

            # self.var["weights_scalars"] is the matrix composed of a_{rs} noted in (8)
            self.vars['weights_scalars'] = weight_variable_random_uniform(num_weights, num_classes,
                                                                          name='weights_u_scalars')

            if user_item_bias:
                self.vars['user_bias'] = bias_variable_zero([num_users, num_classes], name='user_bias')
                self.vars['item_bias'] = bias_variable_zero([num_items, num_classes], name='item_bias')

        self.user_item_bias = user_item_bias

        if diagonal:
            self._multiply_inputs_weights = tf.multiply
        else:
            self._multiply_inputs_weights = tf.matmul

        self.num_classes = num_classes
        self.num_weights = num_weights
        self.u_indices = u_indices
        self.v_indices = v_indices

        self.dropout = dropout
        self.act = act
        if self.logging:
            self._log_vars()

    def _call(self, inputs):

        u_inputs = tf.nn.dropout(inputs[0], 1 - self.dropout)
        v_inputs = tf.nn.dropout(inputs[1], 1 - self.dropout)

        u_inputs = tf.gather(u_inputs, self.u_indices)
        v_inputs = tf.gather(v_inputs, self.v_indices)

        if self.user_item_bias:
            u_bias = tf.gather(self.vars['user_bias'], self.u_indices)
            v_bias = tf.gather(self.vars['item_bias'], self.v_indices)
        else:
            u_bias = None
            v_bias = None

        basis_outputs = []
        # calculate for each rating level
        for i in range(self.num_weights):

            u_w = self._multiply_inputs_weights(u_inputs, self.vars['weights_%d' % i])
            x = tf.reduce_sum(tf.multiply(u_w, v_inputs), axis=1)

            basis_outputs.append(x)

        # Store outputs in (Nu x Nv) x num_classes tensor and apply activation function
        basis_outputs = tf.stack(basis_outputs, axis=1)

        outputs = tf.matmul(basis_outputs,  self.vars['weights_scalars'], transpose_b=False)

        if self.user_item_bias:
            outputs += u_bias
            outputs += v_bias

        outputs = self.act(outputs)

        return outputs

    def __call__(self, inputs):
        with tf.name_scope(self.name):
            if self.logging and not self.sparse_inputs:
                tf.summary.histogram(self.name + '/inputs_u', inputs[0])
                tf.summary.histogram(self.name + '/inputs_v', inputs[1])

            outputs = self._call(inputs)
            if self.logging:
                tf.summary.histogram(self.name + '/outputs', outputs)
            return outputs

# metrics.py

In [20]:
import tensorflow as tf

In [21]:
def softmax_accuracy(preds, labels):
    """
    Accuracy for multiclass model.
    :param preds: predictions
    :param labels: ground truth labelt
    :return: average accuracy
    """
    correct_prediction = tf.equal(tf.argmax(preds, 1), tf.to_int64(labels))
    accuracy_all = tf.cast(correct_prediction, tf.float32)
    return tf.reduce_mean(accuracy_all)

In [22]:
def expected_rmse(logits, labels, class_values=None):
    """
    Computes the root mean square error with the predictions
    computed as average predictions. Note that without the average
    this cannot be used as a loss function as it would not be differentiable.
    :param logits: predicted logits
    :param labels: ground truth label
    :param class_values: rating values corresponding to each class.
    :return: rmse
    """

    probs = tf.nn.softmax(logits)
    if class_values is None:
        scores = tf.to_float(tf.range(start=0, limit=logits.get_shape()[1]) + 1)
        y = tf.to_float(labels) + 1.  # assumes class values are 1, ..., num_classes
    else:
        scores = class_values
        y = tf.gather(class_values, labels)

    pred_y = tf.reduce_sum(probs * scores, 1)

    diff = tf.subtract(y, pred_y)
    exp_rmse = tf.square(diff)
    exp_rmse = tf.cast(exp_rmse, dtype=tf.float32)

    return tf.sqrt(tf.reduce_mean(exp_rmse))

In [23]:
def rmse(logits, labels, class_values=None):
    """
    Computes the mean square error with the predictions
    computed as average predictions. Note that without the average
    this cannot be used as a loss function as it would not be differentiable.
    :param logits: predicted logits
    :param labels: ground truth labels for the ratings, 1-D array containing 0-num_classes-1 ratings
    :param class_values: rating values corresponding to each class.
    :return: mse
    """

    if class_values is None:
        y = tf.to_float(labels) + 1.  # assumes class values are 1, ..., num_classes
    else:
        y = tf.gather(class_values, labels)

    pred_y = logits

    diff = tf.subtract(y, pred_y)
    mse = tf.square(diff)
    mse = tf.cast(mse, dtype=tf.float32)

    return tf.sqrt(tf.reduce_mean(mse))

In [24]:
def softmax_cross_entropy(outputs, labels):
    """ computes average softmax cross entropy """

    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=outputs, labels=labels)
    return tf.reduce_mean(loss)

# model.py

In [25]:
from __future__ import print_function
from gcmc.layers import *

from gcmc.metrics import softmax_accuracy, expected_rmse, softmax_cross_entropy

In [26]:
flags = tf.app.flags
FLAGS = flags.FLAGS

## Model

In [27]:
class Model(object):
    def __init__(self, **kwargs):
        allowed_kwargs = {'name', 'logging'}
        for kwarg in kwargs.keys():
            assert kwarg in allowed_kwargs, 'Invalid keyword argument: ' + kwarg

        for kwarg in kwargs.keys():
            assert kwarg in allowed_kwargs, 'Invalid keyword argument: ' + kwarg
        name = kwargs.get('name')
        if not name:
            name = self.__class__.__name__.lower()
        self.name = name

        logging = kwargs.get('logging', False)
        self.logging = logging

        self.vars = {}
        self.placeholders = {}

        self.layers = []
        self.activations = []

        self.inputs = None
        self.outputs = None

        self.loss = 0
        self.accuracy = 0
        self.optimizer = None
        self.opt_op = None
        self.global_step = tf.Variable(0, trainable=False)

    def _build(self):
        raise NotImplementedError

    def build(self):
        """ Wrapper for _build() """
        with tf.variable_scope(self.name):
            self._build()

        # Build sequential layer model
        self.activations.append(self.inputs)
        for layer in self.layers:
            hidden = layer(self.activations[-1])
            self.activations.append(hidden)
        self.outputs = self.activations[-1]

        # Store model variables for easy access
        variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.name)
        self.vars = {var.name: var for var in variables}

        # Build metrics
        self._loss()
        self._accuracy()

        self.opt_op = self.optimizer.minimize(self.loss, global_step=self.global_step)

    def predict(self):
        pass

    def _loss(self):
        raise NotImplementedError

    def _accuracy(self):
        raise NotImplementedError

    def save(self, sess=None):
        if not sess:
            raise AttributeError("TensorFlow session not provided.")
        saver = tf.train.Saver(self.vars)
        save_path = saver.save(sess, "tmp/%s.ckpt" % self.name)
        print("Model saved in file: %s" % save_path)

    def load(self, sess=None):
        if not sess:
            raise AttributeError("TensorFlow session not provided.")
        saver = tf.train.Saver(self.vars)
        save_path = "tmp/%s.ckpt" % self.name
        saver.restore(sess, save_path)
        print("Model restored from file: %s" % save_path)

## RecommenderGAE

In [28]:
class RecommenderGAE(Model):
    def __init__(self, 
                 placeholders, 
                 input_dim, 
                 num_classes, 
                 num_support,
                 learning_rate, 
                 num_basis_functions, 
                 hidden, 
                 num_users, num_items, 
                 accum,
                 self_connections=False, 
                 **kwargs):
        
        super().__init__(**kwargs)

        self.inputs = (placeholders['u_features'], placeholders['v_features'])
        self.u_features_nonzero = placeholders['u_features_nonzero']
        self.v_features_nonzero = placeholders['v_features_nonzero']
        self.support = placeholders['support']
        self.support_t = placeholders['support_t']
        self.dropout = placeholders['dropout']
        self.labels = placeholders['labels']
        self.u_indices = placeholders['user_indices']
        self.v_indices = placeholders['item_indices']
        self.class_values = placeholders['class_values']

        self.hidden = hidden
        self.num_basis_functions = num_basis_functions
        self.num_classes = num_classes
        self.num_support = num_support
        self.input_dim = input_dim
        self.self_connections = self_connections
        self.num_users = num_users
        self.num_items = num_items
        self.accum = accum
        self.learning_rate = learning_rate

        # standard settings: beta1=0.9, beta2=0.999, epsilon=1.e-8
        self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate, beta1=0.9, beta2=0.999, epsilon=1.e-8)

        self.build()

        moving_average_decay = 0.995
        self.variable_averages = tf.train.ExponentialMovingAverage(moving_average_decay, self.global_step)
        self.variables_averages_op = self.variable_averages.apply(tf.trainable_variables())

        with tf.control_dependencies([self.opt_op]):
            self.training_op = tf.group(self.variables_averages_op)

        self.embeddings = self.activations[2]

        self._rmse()

    def _loss(self):
        self.loss += softmax_cross_entropy(self.outputs, self.labels)

        tf.summary.scalar('loss', self.loss)

    def _accuracy(self):
        self.accuracy = softmax_accuracy(self.outputs, self.labels)

    def _rmse(self):
        self.rmse = expected_rmse(self.outputs, self.labels, self.class_values)

        tf.summary.scalar('rmse_score', self.rmse)

    def _build(self):
        if self.accum == 'sum':
            self.layers.append(OrdinalMixtureGCN(input_dim=self.input_dim,
                                                 output_dim=self.hidden[0],
                                                 support=self.support,
                                                 support_t=self.support_t,
                                                 num_support=self.num_support,
                                                 u_features_nonzero=self.u_features_nonzero,
                                                 v_features_nonzero=self.v_features_nonzero,
                                                 sparse_inputs=True,
                                                 act=tf.nn.relu,
                                                 bias=False,
                                                 dropout=self.dropout,
                                                 logging=self.logging,
                                                 share_user_item_weights=True,
                                                 self_connections=False))

        elif self.accum == 'stack':
            self.layers.append(StackGCN(input_dim=self.input_dim,
                                        output_dim=self.hidden[0],
                                        support=self.support,
                                        support_t=self.support_t,
                                        num_support=self.num_support,
                                        u_features_nonzero=self.u_features_nonzero,
                                        v_features_nonzero=self.v_features_nonzero,
                                        sparse_inputs=True,
                                        act=tf.nn.relu,
                                        dropout=self.dropout,
                                        logging=self.logging,
                                        share_user_item_weights=True))
        else:
            raise ValueError('accumulation function option invalid, can only be stack or sum.')

        self.layers.append(Dense(input_dim=self.hidden[0],
                                 output_dim=self.hidden[1],
                                 act=lambda x: x,
                                 dropout=self.dropout,
                                 logging=self.logging,
                                 share_user_item_weights=True))

        self.layers.append(BilinearMixture(num_classes=self.num_classes,
                                           u_indices=self.u_indices,
                                           v_indices=self.v_indices,
                                           input_dim=self.hidden[1],
                                           num_users=self.num_users,
                                           num_items=self.num_items,
                                           user_item_bias=False,
                                           dropout=0.,
                                           act=lambda x: x,
                                           num_weights=self.num_basis_functions,
                                           logging=self.logging,
                                           diagonal=False))

## RecommenderSideInfoGAE

In [29]:
class RecommenderSideInfoGAE(Model):
    def __init__(self,  placeholders, input_dim, feat_hidden_dim, num_classes, num_support,
                 learning_rate, num_basis_functions, hidden, num_users, num_items, accum,
                 num_side_features, self_connections=False, **kwargs):
        # super(RecommenderSideInfoGAE, self).__init__(**kwargs)
        super().__init__(**kwargs)

        self.inputs = (placeholders['u_features'], placeholders['v_features'])
        self.u_features_side = placeholders['u_features_side']
        self.v_features_side = placeholders['v_features_side']

        self.u_features_nonzero = placeholders['u_features_nonzero']
        self.v_features_nonzero = placeholders['v_features_nonzero']
        self.support = placeholders['support']
        self.support_t = placeholders['support_t']
        self.dropout = placeholders['dropout']
        self.labels = placeholders['labels']
        self.u_indices = placeholders['user_indices']
        self.v_indices = placeholders['item_indices']
        self.class_values = placeholders['class_values']

        self.num_side_features = num_side_features
        self.feat_hidden_dim = feat_hidden_dim
        if num_side_features > 0:
            self.u_features_side = placeholders['u_features_side']
            self.v_features_side = placeholders['v_features_side']

        else:
            self.u_features_side = None
            self.v_features_side = None

        self.hidden = hidden
        self.num_basis_functions = num_basis_functions
        self.num_classes = num_classes
        self.num_support = num_support
        self.input_dim = input_dim
        self.self_connections = self_connections
        self.num_users = num_users
        self.num_items = num_items
        self.accum = accum
        self.learning_rate = learning_rate

        # standard settings: beta1=0.9, beta2=0.999, epsilon=1.e-8
        self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate, beta1=0.9, beta2=0.999, epsilon=1.e-8)

        self.build()

        moving_average_decay = 0.995
        self.variable_averages = tf.train.ExponentialMovingAverage(moving_average_decay, self.global_step)
        self.variables_averages_op = self.variable_averages.apply(tf.trainable_variables())

        with tf.control_dependencies([self.opt_op]):
            self.training_op = tf.group(self.variables_averages_op)

        self.embeddings = self.activations[0]

        self._rmse()

    def _loss(self):
        self.loss += softmax_cross_entropy(self.outputs, self.labels)

        tf.summary.scalar('loss', self.loss)

    def _accuracy(self):
        self.accuracy = softmax_accuracy(self.outputs, self.labels)

    def _rmse(self):
        self.rmse = expected_rmse(self.outputs, self.labels, self.class_values)

        tf.summary.scalar('rmse_score', self.rmse)

    def _build(self):
        if self.accum == 'sum':
            self.layers.append(OrdinalMixtureGCN(input_dim=self.input_dim,
                                                 output_dim=self.hidden[0],
                                                 support=self.support,
                                                 support_t=self.support_t,
                                                 num_support=self.num_support,
                                                 u_features_nonzero=self.u_features_nonzero,
                                                 v_features_nonzero=self.v_features_nonzero,
                                                 sparse_inputs=True,
                                                 act=tf.nn.relu,
                                                 bias=False,
                                                 dropout=self.dropout,
                                                 logging=self.logging,
                                                 share_user_item_weights=True,
                                                 self_connections=self.self_connections))

        elif self.accum == 'stack':
            self.layers.append(StackGCN(input_dim=self.input_dim,
                                        output_dim=self.hidden[0],
                                        support=self.support,
                                        support_t=self.support_t,
                                        num_support=self.num_support,
                                        u_features_nonzero=self.u_features_nonzero,
                                        v_features_nonzero=self.v_features_nonzero,
                                        sparse_inputs=True,
                                        act=tf.nn.relu,
                                        dropout=self.dropout,
                                        logging=self.logging,
                                        share_user_item_weights=True))

        else:
            raise ValueError('accumulation function option invalid, can only be stack or sum.')

        self.layers.append(Dense(input_dim=self.num_side_features,
                                 output_dim=self.feat_hidden_dim,
                                 act=tf.nn.relu,
                                 dropout=0.,
                                 logging=self.logging,
                                 bias=True,
                                 share_user_item_weights=False))

        self.layers.append(Dense(input_dim=self.hidden[0]+self.feat_hidden_dim,
                                 output_dim=self.hidden[1],
                                 act=lambda x: x,
                                 dropout=self.dropout,
                                 logging=self.logging,
                                 share_user_item_weights=False))

        self.layers.append(BilinearMixture(num_classes=self.num_classes,
                                           u_indices=self.u_indices,
                                           v_indices=self.v_indices,
                                           input_dim=self.hidden[1],
                                           num_users=self.num_users,
                                           num_items=self.num_items,
                                           user_item_bias=False,
                                           dropout=0.,
                                           act=lambda x: x,
                                           num_weights=self.num_basis_functions,
                                           logging=self.logging,
                                           diagonal=False))

    def build(self):
        """ Wrapper for _build() """
        with tf.variable_scope(self.name):
            self._build()

        # Build split sequential layer model

        # gcn layer
        layer = self.layers[0]
        gcn_hidden = layer(self.inputs)

        # dense layer for features
        layer = self.layers[1]
        feat_hidden = layer([self.u_features_side, self.v_features_side])

        # concat dense layer
        layer = self.layers[2]

        gcn_u = gcn_hidden[0]
        gcn_v = gcn_hidden[1]
        feat_u = feat_hidden[0]
        feat_v = feat_hidden[1]

        input_u = tf.concat(values=[gcn_u, feat_u], axis=1)
        input_v = tf.concat(values=[gcn_v, feat_v], axis=1)

        concat_hidden = layer([input_u, input_v])

        self.activations.append(concat_hidden)

        # Build sequential layer model
        for layer in self.layers[3::]:
            hidden = layer(self.activations[-1])
            self.activations.append(hidden)
        self.outputs = self.activations[-1]

        self.outputs = self.activations[-1]

        # Store model variables for easy access
        variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.name)
        self.vars = {var.name: var for var in variables}

        # Build metrics
        self._loss()
        self._accuracy()

        self.opt_op = self.optimizer.minimize(self.loss, global_step=self.global_step)

# preprocessing.py

In [30]:
from __future__ import division
from __future__ import print_function

import numpy as np
import scipy.sparse as sp
import _pickle as pkl
import os
import h5py
import pandas as pd


from gcmc.data_utils import load_data, map_data, download_dataset

In [31]:
def normalize_features(feat):

    degree = np.asarray(feat.sum(1)).flatten()

    # set zeros to inf to avoid dividing by zero
    degree[degree == 0.] = np.inf

    degree_inv = 1. / degree
    degree_inv_mat = sp.diags([degree_inv], [0])
    feat_norm = degree_inv_mat.dot(feat)

    if feat_norm.nnz == 0:
        print('ERROR: normalized adjacency matrix has only zero entries!!!!!')
        exit

    return feat_norm


def load_matlab_file(path_file, name_field):
    """
    load '.mat' files
    inputs:
        path_file, string containing the file path
        name_field, string containig the field name (default='shape')
    warning:
        '.mat' files should be saved in the '-v7.3' format
    """
    db = h5py.File(path_file, 'r')
    ds = db[name_field]
    try:
        if 'ir' in ds.keys():
            data = np.asarray(ds['data'])
            ir = np.asarray(ds['ir'])
            jc = np.asarray(ds['jc'])
            out = sp.csc_matrix((data, ir, jc)).astype(np.float32)
    except AttributeError:
        # Transpose in case is a dense matrix because of the row- vs column- major ordering between python and matlab
        out = np.asarray(ds).astype(np.float32).T

    db.close()

    return out


def preprocess_user_item_features(u_features, v_features):
    """
    Creates one big feature matrix out of user features and item features.
    Stacks item features under the user features.
    """

    zero_csr_u = sp.csr_matrix((u_features.shape[0], v_features.shape[1]), dtype=u_features.dtype)
    zero_csr_v = sp.csr_matrix((v_features.shape[0], u_features.shape[1]), dtype=v_features.dtype)

    u_features = sp.hstack([u_features, zero_csr_u], format='csr')
    v_features = sp.hstack([zero_csr_v, v_features], format='csr')

    return u_features, v_features


def globally_normalize_bipartite_adjacency(adjacencies, verbose=False, symmetric=True):
    """ Globally Normalizes set of bipartite adjacency matrices """

    if verbose:
        print('Symmetrically normalizing bipartite adj')
    # degree_u and degree_v are row and column sums of adj+I

    adj_tot = np.sum(adj for adj in adjacencies)
    degree_u = np.asarray(adj_tot.sum(1)).flatten()
    degree_v = np.asarray(adj_tot.sum(0)).flatten()

    # set zeros to inf to avoid dividing by zero
    degree_u[degree_u == 0.] = np.inf
    degree_v[degree_v == 0.] = np.inf

    degree_u_inv_sqrt = 1. / np.sqrt(degree_u)
    degree_v_inv_sqrt = 1. / np.sqrt(degree_v)
    degree_u_inv_sqrt_mat = sp.diags([degree_u_inv_sqrt], [0])
    degree_v_inv_sqrt_mat = sp.diags([degree_v_inv_sqrt], [0])

    degree_u_inv = degree_u_inv_sqrt_mat.dot(degree_u_inv_sqrt_mat)

    if symmetric:
        adj_norm = [degree_u_inv_sqrt_mat.dot(adj).dot(degree_v_inv_sqrt_mat) for adj in adjacencies]

    else:
        adj_norm = [degree_u_inv.dot(adj) for adj in adjacencies]

    return adj_norm


def sparse_to_tuple(sparse_mx):
    """ change of format for sparse matrix. This format is used
    for the feed_dict where sparse matrices need to be linked to placeholders
    representing sparse matrices. """

    if not sp.isspmatrix_coo(sparse_mx):
        sparse_mx = sparse_mx.tocoo()
    coords = np.vstack((sparse_mx.row, sparse_mx.col)).transpose()
    values = sparse_mx.data
    shape = sparse_mx.shape
    return coords, values, shape


def create_trainvaltest_split(dataset, seed=1234, testing=False, datasplit_path=None, datasplit_from_file=False,
                              verbose=True):
    """
    Splits data set into train/val/test sets from full bipartite adjacency matrix. Shuffling of dataset is done in
    load_data function.
    For each split computes 1-of-num_classes labels. Also computes training
    adjacency matrix.
    """

    if datasplit_from_file and os.path.isfile(datasplit_path):
        print('Reading dataset splits from file...')
        with open(datasplit_path) as f:
            num_users, num_items, u_nodes, v_nodes, ratings, u_features, v_features = pkl.load(f)

        if verbose:
            print('Number of users = %d' % num_users)
            print('Number of items = %d' % num_items)
            print('Number of links = %d' % ratings.shape[0])
            print('Fraction of positive links = %.4f' % (float(ratings.shape[0]) / (num_users * num_items),))

    else:
        num_users, num_items, u_nodes, v_nodes, ratings, u_features, v_features = load_data(dataset, seed=seed,
                                                                                            verbose=verbose)

        with open(datasplit_path, 'w') as f:
            pkl.dump([num_users, num_items, u_nodes, v_nodes, ratings, u_features, v_features], f)

    neutral_rating = -1

    rating_dict = {r: i for i, r in enumerate(np.sort(np.unique(ratings)).tolist())}

    labels = np.full((num_users, num_items), neutral_rating, dtype=np.int32)
    labels[u_nodes, v_nodes] = np.array([rating_dict[r] for r in ratings])
    labels = labels.reshape([-1])

    # number of test and validation edges
    num_test = int(np.ceil(ratings.shape[0] * 0.1))
    if dataset == 'ml_100k':
        num_val = int(np.ceil(ratings.shape[0] * 0.9 * 0.05))
    else:
        num_val = int(np.ceil(ratings.shape[0] * 0.9 * 0.05))

    num_train = ratings.shape[0] - num_val - num_test

    pairs_nonzero = np.array([[u, v] for u, v in zip(u_nodes, v_nodes)])

    idx_nonzero = np.array([u * num_items + v for u, v in pairs_nonzero])

    train_idx = idx_nonzero[0:num_train]
    val_idx = idx_nonzero[num_train:num_train + num_val]
    test_idx = idx_nonzero[num_train + num_val:]

    train_pairs_idx = pairs_nonzero[0:num_train]
    val_pairs_idx = pairs_nonzero[num_train:num_train + num_val]
    test_pairs_idx = pairs_nonzero[num_train + num_val:]

    u_test_idx, v_test_idx = test_pairs_idx.transpose()
    u_val_idx, v_val_idx = val_pairs_idx.transpose()
    u_train_idx, v_train_idx = train_pairs_idx.transpose()

    # create labels
    train_labels = labels[train_idx]
    val_labels = labels[val_idx]
    test_labels = labels[test_idx]

    if testing:
        u_train_idx = np.hstack([u_train_idx, u_val_idx])
        v_train_idx = np.hstack([v_train_idx, v_val_idx])
        train_labels = np.hstack([train_labels, val_labels])
        # for adjacency matrix construction
        train_idx = np.hstack([train_idx, val_idx])

    # make training adjacency matrix
    rating_mx_train = np.zeros(num_users * num_items, dtype=np.float32)
    rating_mx_train[train_idx] = labels[train_idx].astype(np.float32) + 1.
    rating_mx_train = sp.csr_matrix(rating_mx_train.reshape(num_users, num_items))

    class_values = np.sort(np.unique(ratings))

    return u_features, v_features, rating_mx_train, train_labels, u_train_idx, v_train_idx, \
        val_labels, u_val_idx, v_val_idx, test_labels, u_test_idx, v_test_idx, class_values


def load_data_monti(dataset, testing=False):
    """
    Loads data from Monti et al. paper.
    """

    path_dataset = 'data/' + dataset + '/training_test_dataset.mat'

    M = load_matlab_file(path_dataset, 'M')
    Otraining = load_matlab_file(path_dataset, 'Otraining')
    Otest = load_matlab_file(path_dataset, 'Otest')

    num_users = M.shape[0]
    num_items = M.shape[1]

    if dataset == 'flixster':
        Wrow = load_matlab_file(path_dataset, 'W_users')
        Wcol = load_matlab_file(path_dataset, 'W_movies')
        u_features = Wrow
        v_features = Wcol
        # print(num_items, v_features.shape)
        # v_features = np.eye(num_items)

    elif dataset == 'douban':
        Wrow = load_matlab_file(path_dataset, 'W_users')
        u_features = Wrow
        v_features = np.eye(num_items)
    elif dataset == 'yahoo_music':
        Wcol = load_matlab_file(path_dataset, 'W_tracks')
        u_features = np.eye(num_users)
        v_features = Wcol

    u_nodes_ratings = np.where(M)[0]
    v_nodes_ratings = np.where(M)[1]
    ratings = M[np.where(M)]

    u_nodes_ratings, v_nodes_ratings = u_nodes_ratings.astype(np.int64), v_nodes_ratings.astype(np.int32)
    ratings = ratings.astype(np.float64)

    u_nodes = u_nodes_ratings
    v_nodes = v_nodes_ratings

    print('number of users = ', len(set(u_nodes)))
    print('number of item = ', len(set(v_nodes)))

    neutral_rating = -1  # int(np.ceil(np.float(num_classes)/2.)) - 1

    # assumes that ratings_train contains at least one example of every rating type
    rating_dict = {r: i for i, r in enumerate(np.sort(np.unique(ratings)).tolist())}

    labels = np.full((num_users, num_items), neutral_rating, dtype=np.int32)
    labels[u_nodes, v_nodes] = np.array([rating_dict[r] for r in ratings])

    for i in range(len(u_nodes)):
        assert(labels[u_nodes[i], v_nodes[i]] == rating_dict[ratings[i]])

    labels = labels.reshape([-1])

    # number of test and validation edges

    num_train = np.where(Otraining)[0].shape[0]
    num_test = np.where(Otest)[0].shape[0]
    num_val = int(np.ceil(num_train * 0.2))
    num_train = num_train - num_val

    pairs_nonzero_train = np.array([[u, v] for u, v in zip(np.where(Otraining)[0], np.where(Otraining)[1])])
    idx_nonzero_train = np.array([u * num_items + v for u, v in pairs_nonzero_train])

    pairs_nonzero_test = np.array([[u, v] for u, v in zip(np.where(Otest)[0], np.where(Otest)[1])])
    idx_nonzero_test = np.array([u * num_items + v for u, v in pairs_nonzero_test])

    # Internally shuffle training set (before splitting off validation set)
    rand_idx = range(len(idx_nonzero_train))
    np.random.seed(42)
    np.random.shuffle(rand_idx)
    idx_nonzero_train = idx_nonzero_train[rand_idx]
    pairs_nonzero_train = pairs_nonzero_train[rand_idx]

    idx_nonzero = np.concatenate([idx_nonzero_train, idx_nonzero_test], axis=0)
    pairs_nonzero = np.concatenate([pairs_nonzero_train, pairs_nonzero_test], axis=0)

    val_idx = idx_nonzero[0:num_val]
    train_idx = idx_nonzero[num_val:num_train + num_val]
    test_idx = idx_nonzero[num_train + num_val:]

    assert(len(test_idx) == num_test)

    val_pairs_idx = pairs_nonzero[0:num_val]
    train_pairs_idx = pairs_nonzero[num_val:num_train + num_val]
    test_pairs_idx = pairs_nonzero[num_train + num_val:]

    u_test_idx, v_test_idx = test_pairs_idx.transpose()
    u_val_idx, v_val_idx = val_pairs_idx.transpose()
    u_train_idx, v_train_idx = train_pairs_idx.transpose()

    # create labels
    train_labels = labels[train_idx]
    val_labels = labels[val_idx]
    test_labels = labels[test_idx]

    if testing:
        u_train_idx = np.hstack([u_train_idx, u_val_idx])
        v_train_idx = np.hstack([v_train_idx, v_val_idx])
        train_labels = np.hstack([train_labels, val_labels])
        # for adjacency matrix construction
        train_idx = np.hstack([train_idx, val_idx])

    # make training adjacency matrix
    rating_mx_train = np.zeros(num_users * num_items, dtype=np.float32)
    rating_mx_train[train_idx] = labels[train_idx].astype(np.float32) + 1.
    rating_mx_train = sp.csr_matrix(rating_mx_train.reshape(num_users, num_items))

    class_values = np.sort(np.unique(ratings))

    if u_features is not None:
        u_features = sp.csr_matrix(u_features)
        print("User features shape: " + str(u_features.shape))

    if v_features is not None:
        v_features = sp.csr_matrix(v_features)
        print("Item features shape: " + str(v_features.shape))

    return u_features, v_features, rating_mx_train, train_labels, u_train_idx, v_train_idx, \
        val_labels, u_val_idx, v_val_idx, test_labels, u_test_idx, v_test_idx, class_values


def load_official_trainvaltest_split(dataset, testing=False):
    """
    Loads official train/test split and uses 10% of training samples for validaiton
    For each split computes 1-of-num_classes labels. Also computes training
    adjacency matrix. Assumes flattening happens everywhere in row-major fashion.
    """

    sep = '\t'

    # Check if files exist and download otherwise
    files = ['/u1.base', '/u1.test', '/u.item', '/u.user']
    fname = dataset
    data_dir = 'data/' + fname

    download_dataset(fname, files, data_dir)

    dtypes = {
        'u_nodes': np.int32, 'v_nodes': np.int32,
        'ratings': np.float32, 'timestamp': np.float64}

    filename_train = 'data/' + dataset + '/u1.base'
    filename_test = 'data/' + dataset + '/u1.test'

    data_train = pd.read_csv(
        filename_train, sep=sep, header=None,
        names=['u_nodes', 'v_nodes', 'ratings', 'timestamp'], dtype=dtypes)

    data_test = pd.read_csv(
        filename_test, sep=sep, header=None,
        names=['u_nodes', 'v_nodes', 'ratings', 'timestamp'], dtype=dtypes)

    data_array_train = data_train.as_matrix().tolist()
    data_array_train = np.array(data_array_train)
    data_array_test = data_test.as_matrix().tolist()
    data_array_test = np.array(data_array_test)

    data_array = np.concatenate([data_array_train, data_array_test], axis=0)

    u_nodes_ratings = data_array[:, 0].astype(dtypes['u_nodes'])
    v_nodes_ratings = data_array[:, 1].astype(dtypes['v_nodes'])
    ratings = data_array[:, 2].astype(dtypes['ratings'])

    u_nodes_ratings, u_dict, num_users = map_data(u_nodes_ratings)
    v_nodes_ratings, v_dict, num_items = map_data(v_nodes_ratings)

    u_nodes_ratings, v_nodes_ratings = u_nodes_ratings.astype(np.int64), v_nodes_ratings.astype(np.int32)
    ratings = ratings.astype(np.float64)

    u_nodes = u_nodes_ratings
    v_nodes = v_nodes_ratings

    neutral_rating = -1  # int(np.ceil(np.float(num_classes)/2.)) - 1

    # assumes that ratings_train contains at least one example of every rating type
    rating_dict = {r: i for i, r in enumerate(np.sort(np.unique(ratings)).tolist())}

    labels = np.full((num_users, num_items), neutral_rating, dtype=np.int32)
    labels[u_nodes, v_nodes] = np.array([rating_dict[r] for r in ratings])

    for i in range(len(u_nodes)):
        assert(labels[u_nodes[i], v_nodes[i]] == rating_dict[ratings[i]])

    labels = labels.reshape([-1])

    # number of test and validation edges, see cf-nade code

    num_train = data_array_train.shape[0]
    num_test = data_array_test.shape[0]
    num_val = int(np.ceil(num_train * 0.2))
    num_train = num_train - num_val

    pairs_nonzero = np.array([[u, v] for u, v in zip(u_nodes, v_nodes)])
    idx_nonzero = np.array([u * num_items + v for u, v in pairs_nonzero])

    for i in range(len(ratings)):
        assert(labels[idx_nonzero[i]] == rating_dict[ratings[i]])

    idx_nonzero_train = idx_nonzero[0:num_train+num_val]
    idx_nonzero_test = idx_nonzero[num_train+num_val:]

    pairs_nonzero_train = pairs_nonzero[0:num_train+num_val]
    pairs_nonzero_test = pairs_nonzero[num_train+num_val:]

    # Internally shuffle training set (before splitting off validation set)
    rand_idx = range(len(idx_nonzero_train))
    np.random.seed(42)
    np.random.shuffle(rand_idx)
    idx_nonzero_train = idx_nonzero_train[rand_idx]
    pairs_nonzero_train = pairs_nonzero_train[rand_idx]

    idx_nonzero = np.concatenate([idx_nonzero_train, idx_nonzero_test], axis=0)
    pairs_nonzero = np.concatenate([pairs_nonzero_train, pairs_nonzero_test], axis=0)

    val_idx = idx_nonzero[0:num_val]
    train_idx = idx_nonzero[num_val:num_train + num_val]
    test_idx = idx_nonzero[num_train + num_val:]

    assert(len(test_idx) == num_test)

    val_pairs_idx = pairs_nonzero[0:num_val]
    train_pairs_idx = pairs_nonzero[num_val:num_train + num_val]
    test_pairs_idx = pairs_nonzero[num_train + num_val:]

    u_test_idx, v_test_idx = test_pairs_idx.transpose()
    u_val_idx, v_val_idx = val_pairs_idx.transpose()
    u_train_idx, v_train_idx = train_pairs_idx.transpose()

    # create labels
    train_labels = labels[train_idx]
    val_labels = labels[val_idx]
    test_labels = labels[test_idx]

    if testing:
        u_train_idx = np.hstack([u_train_idx, u_val_idx])
        v_train_idx = np.hstack([v_train_idx, v_val_idx])
        train_labels = np.hstack([train_labels, val_labels])
        # for adjacency matrix construction
        train_idx = np.hstack([train_idx, val_idx])

    # make training adjacency matrix
    rating_mx_train = np.zeros(num_users * num_items, dtype=np.float32)
    rating_mx_train[train_idx] = labels[train_idx].astype(np.float32) + 1.
    rating_mx_train = sp.csr_matrix(rating_mx_train.reshape(num_users, num_items))

    class_values = np.sort(np.unique(ratings))

    if dataset =='ml_100k':

        # movie features (genres)
        sep = r'|'
        movie_file = 'data/' + dataset + '/u.item'
        movie_headers = ['movie id', 'movie title', 'release date', 'video release date',
                         'IMDb URL', 'unknown', 'Action', 'Adventure', 'Animation',
                         'Childrens', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy',
                         'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi',
                         'Thriller', 'War', 'Western']
        movie_df = pd.read_csv(movie_file, sep=sep, header=None,
                               names=movie_headers, engine='python')

        genre_headers = movie_df.columns.values[6:]
        num_genres = genre_headers.shape[0]

        v_features = np.zeros((num_items, num_genres), dtype=np.float32)
        for movie_id, g_vec in zip(movie_df['movie id'].values.tolist(), movie_df[genre_headers].values.tolist()):
            # check if movie_id was listed in ratings file and therefore in mapping dictionary
            if movie_id in v_dict.keys():
                v_features[v_dict[movie_id], :] = g_vec

        # user features

        sep = r'|'
        users_file = 'data/' + dataset + '/u.user'
        users_headers = ['user id', 'age', 'gender', 'occupation', 'zip code']
        users_df = pd.read_csv(users_file, sep=sep, header=None,
                               names=users_headers, engine='python')

        occupation = set(users_df['occupation'].values.tolist())

        age = users_df['age'].values
        age_max = age.max()

        gender_dict = {'M': 0., 'F': 1.}
        occupation_dict = {f: i for i, f in enumerate(occupation, start=2)}

        num_feats = 2 + len(occupation_dict)

        u_features = np.zeros((num_users, num_feats), dtype=np.float32)
        for _, row in users_df.iterrows():
            u_id = row['user id']
            if u_id in u_dict.keys():
                # age
                u_features[u_dict[u_id], 0] = row['age'] / np.float(age_max)
                # gender
                u_features[u_dict[u_id], 1] = gender_dict[row['gender']]
                # occupation
                u_features[u_dict[u_id], occupation_dict[row['occupation']]] = 1.

    elif dataset == 'ml_1m':

        # load movie features
        movies_file = 'data/' + dataset + '/movies.dat'

        movies_headers = ['movie_id', 'title', 'genre']
        movies_df = pd.read_csv(movies_file, sep=sep, header=None,
                                names=movies_headers, engine='python')

        # extracting all genres
        genres = []
        for s in movies_df['genre'].values:
            genres.extend(s.split('|'))

        genres = list(set(genres))
        num_genres = len(genres)

        genres_dict = {g: idx for idx, g in enumerate(genres)}

        # creating 0 or 1 valued features for all genres
        v_features = np.zeros((num_items, num_genres), dtype=np.float32)
        for movie_id, s in zip(movies_df['movie_id'].values.tolist(), movies_df['genre'].values.tolist()):
            # check if movie_id was listed in ratings file and therefore in mapping dictionary
            if movie_id in v_dict.keys():
                gen = s.split('|')
                for g in gen:
                    v_features[v_dict[movie_id], genres_dict[g]] = 1.

        # load user features
        users_file = 'data/' + dataset + '/users.dat'
        users_headers = ['user_id', 'gender', 'age', 'occupation', 'zip-code']
        users_df = pd.read_csv(users_file, sep=sep, header=None,
                               names=users_headers, engine='python')

        # extracting all features
        cols = users_df.columns.values[1:]

        cntr = 0
        feat_dicts = []
        for header in cols:
            d = dict()
            feats = np.unique(users_df[header].values).tolist()
            d.update({f: i for i, f in enumerate(feats, start=cntr)})
            feat_dicts.append(d)
            cntr += len(d)

        num_feats = sum(len(d) for d in feat_dicts)

        u_features = np.zeros((num_users, num_feats), dtype=np.float32)
        for _, row in users_df.iterrows():
            u_id = row['user_id']
            if u_id in u_dict.keys():
                for k, header in enumerate(cols):
                    u_features[u_dict[u_id], feat_dicts[k][row[header]]] = 1.
    else:
        raise ValueError('Invalid dataset option %s' % dataset)

    u_features = sp.csr_matrix(u_features)
    v_features = sp.csr_matrix(v_features)

    print("User features shape: "+str(u_features.shape))
    print("Item features shape: "+str(v_features.shape))

    return u_features, v_features, rating_mx_train, train_labels, u_train_idx, v_train_idx, \
        val_labels, u_val_idx, v_val_idx, test_labels, u_test_idx, v_test_idx, class_values

# train.py

In [32]:
from __future__ import division
from __future__ import print_function

import argparse
import datetime
import time

import tensorflow as tf
import numpy as np
import scipy.sparse as sp
import sys
import json

from gcmc.preprocessing import create_trainvaltest_split, \
    sparse_to_tuple, preprocess_user_item_features, globally_normalize_bipartite_adjacency, \
    load_data_monti, load_official_trainvaltest_split, normalize_features
from gcmc.model import RecommenderGAE, RecommenderSideInfoGAE
from gcmc.utils import construct_feed_dict

In [33]:
# Set random seed
# seed = 123 # use only for unit testing
seed = int(time.time())
np.random.seed(seed)
tf.set_random_seed(seed)

In [34]:
# Settings
ap = argparse.ArgumentParser(description="main")
ap.add_argument("-d", "--dataset", type=str, default="ml_100k",
                choices=['ml_100k', 'ml_1m', 'ml_10m', 'douban', 'yahoo_music', 'flixster'],
                help="Dataset string.")

ap.add_argument("-lr", "--learning_rate", type=float, default=0.01,
                help="Learning rate")

ap.add_argument("-e", "--epochs", type=int, default=2500,
                help="Number training epochs")

ap.add_argument("-hi", "--hidden", type=int, nargs=2, default=[500, 75],
                help="Number hidden units in 1st and 2nd layer")

ap.add_argument("-fhi", "--feat_hidden", type=int, default=64,
                help="Number hidden units in the dense layer for features")

ap.add_argument("-ac", "--accumulation", type=str, default="sum", choices=['sum', 'stack'],
                help="Accumulation function: sum or stack.")

ap.add_argument("-do", "--dropout", type=float, default=0.7,
                help="Dropout fraction")

ap.add_argument("-nb", "--num_basis_functions", type=int, default=2,
                help="Number of basis functions for Mixture Model GCN.")

ap.add_argument("-ds", "--data_seed", type=int, default=1234,
                help="""Seed used to shuffle data in data_utils, taken from cf-nade (1234, 2341, 3412, 4123, 1324).
                     Only used for ml_1m and ml_10m datasets. """)

ap.add_argument("-sdir", "--summaries_dir", type=str, default='logs/' + str(datetime.datetime.now()).replace(' ', '_'),
                help="Directory for saving tensorflow summaries.")


_StoreAction(option_strings=['-sdir', '--summaries_dir'], dest='summaries_dir', nargs=None, const=None, default='logs/2021-06-28_16:49:07.673056', type=<class 'str'>, choices=None, help='Directory for saving tensorflow summaries.', metavar=None)

In [35]:
# Boolean flags
fp = ap.add_mutually_exclusive_group(required=False)
fp.add_argument('-nsym', '--norm_symmetric', dest='norm_symmetric',
                help="Option to turn on symmetric global normalization", action='store_true')
fp.add_argument('-nleft', '--norm_left', dest='norm_symmetric',
                help="Option to turn on left global normalization", action='store_false')
ap.set_defaults(norm_symmetric=True)

fp = ap.add_mutually_exclusive_group(required=False)
fp.add_argument('-f', '--features', dest='features',
                help="Whether to use features (1) or not (0)", action='store_true')
fp.add_argument('-no_f', '--no_features', dest='features',
                help="Whether to use features (1) or not (0)", action='store_false')
ap.set_defaults(features=False)

fp = ap.add_mutually_exclusive_group(required=False)
fp.add_argument('-ws', '--write_summary', dest='write_summary',
                help="Option to turn on summary writing", action='store_true')
fp.add_argument('-no_ws', '--no_write_summary', dest='write_summary',
                help="Option to turn off summary writing", action='store_false')
ap.set_defaults(write_summary=False)

fp = ap.add_mutually_exclusive_group(required=False)
fp.add_argument('-t', '--testing', dest='testing',
                help="Option to turn on test set evaluation", action='store_true')
fp.add_argument('-v', '--validation', dest='testing',
                help="Option to only use validation set evaluation", action='store_false')
ap.set_defaults(testing=False)

args = vars(ap.parse_known_args()[0])

print('Settings:')
print(args, '\n')

Settings:
{'dataset': 'ml_100k', 'learning_rate': 0.01, 'epochs': 2500, 'hidden': [500, 75], 'feat_hidden': 64, 'accumulation': 'sum', 'dropout': 0.7, 'num_basis_functions': 2, 'data_seed': 1234, 'summaries_dir': 'logs/2021-06-28_16:49:07.673056', 'norm_symmetric': True, 'features': True, 'write_summary': False, 'testing': False} 



In [36]:
# Define parameters
DATASET = args['dataset']
DATASEED = args['data_seed']
NB_EPOCH = args['epochs']
DO = args['dropout']
HIDDEN = args['hidden']
FEATHIDDEN = args['feat_hidden']
BASES = args['num_basis_functions']
LR = args['learning_rate']
WRITESUMMARY = args['write_summary']
SUMMARIESDIR = args['summaries_dir']
FEATURES = args['features']
SYM = args['norm_symmetric']
TESTING = args['testing']
ACCUM = args['accumulation']

SELFCONNECTIONS = False
SPLITFROMFILE = True
VERBOSE = True

if DATASET == 'ml_1m' or DATASET == 'ml_100k' or DATASET == 'douban':
    NUMCLASSES = 5
elif DATASET == 'ml_10m':
    NUMCLASSES = 10
    print('\n WARNING: this might run out of RAM, consider using train_minibatch.py for dataset %s' % DATASET)
    print('If you want to proceed with this option anyway, uncomment this.\n')
    sys.exit(1)
elif DATASET == 'flixster':
    NUMCLASSES = 10
elif DATASET == 'yahoo_music':
    NUMCLASSES = 71
    if ACCUM == 'sum':
        print('\n WARNING: combining DATASET=%s with ACCUM=%s can cause memory issues due to large number of classes.')
        print('Consider using "--accum stack" as an option for this dataset.')
        print('If you want to proceed with this option anyway, uncomment this.\n')
        sys.exit(1)

In [37]:
# Splitting dataset in training, validation and test set

if DATASET == 'ml_1m' or DATASET == 'ml_10m':
    if FEATURES:
        datasplit_path = 'data/' + DATASET + '/withfeatures_split_seed' + str(DATASEED) + '.pickle'
    else:
        datasplit_path = 'data/' + DATASET + '/split_seed' + str(DATASEED) + '.pickle'
elif FEATURES:
    datasplit_path = 'data/' + DATASET + '/withfeatures.pickle'
else:
    datasplit_path = 'data/' + DATASET + '/nofeatures.pickle'


if DATASET == 'flixster' or DATASET == 'douban' or DATASET == 'yahoo_music':
    u_features, v_features, adj_train, train_labels, train_u_indices, train_v_indices, \
        val_labels, val_u_indices, val_v_indices, test_labels, \
        test_u_indices, test_v_indices, class_values = load_data_monti(DATASET, TESTING)

elif DATASET == 'ml_100k':
    print("Using official MovieLens dataset split u1.base/u1.test with 20% validation set size...")
    u_features, v_features, adj_train, train_labels, train_u_indices, train_v_indices, \
        val_labels, val_u_indices, val_v_indices, test_labels, \
        test_u_indices, test_v_indices, class_values = load_official_trainvaltest_split(DATASET, TESTING)
else:
    print("Using random dataset split ...")
    u_features, v_features, adj_train, train_labels, train_u_indices, train_v_indices, \
        val_labels, val_u_indices, val_v_indices, test_labels, \
        test_u_indices, test_v_indices, class_values = create_trainvaltest_split(DATASET, DATASEED, TESTING,
                                                                                 datasplit_path, SPLITFROMFILE,
                                                                                 VERBOSE)

num_users, num_items = adj_train.shape

num_side_features = 0

Using official MovieLens dataset split u1.base/u1.test with 20% validation set size...
User features shape: (943, 23)
Item features shape: (1682, 18)


In [38]:
# feature loading
if not FEATURES:
    u_features = sp.identity(num_users, format='csr')
    v_features = sp.identity(num_items, format='csr')

    u_features, v_features = preprocess_user_item_features(u_features, v_features)

elif FEATURES and u_features is not None and v_features is not None:
    # use features as side information and node_id's as node input features

    print("Normalizing feature vectors...")
    u_features_side = normalize_features(u_features)
    v_features_side = normalize_features(v_features)

    u_features_side, v_features_side = preprocess_user_item_features(u_features_side, v_features_side)

    u_features_side = np.array(u_features_side.todense(), dtype=np.float32)
    v_features_side = np.array(v_features_side.todense(), dtype=np.float32)

    num_side_features = u_features_side.shape[1]

    # node id's for node input features
    id_csr_v = sp.identity(num_items, format='csr')
    id_csr_u = sp.identity(num_users, format='csr')

    u_features, v_features = preprocess_user_item_features(id_csr_u, id_csr_v)

else:
    raise ValueError('Features flag is set to true but no features are loaded from dataset ' + DATASET)

Normalizing feature vectors...


In [40]:
# global normalization
support = []
support_t = []
adj_train_int = sp.csr_matrix(adj_train, dtype=np.int32)

for i in range(NUMCLASSES):
    # build individual binary rating matrices (supports) for each rating
    support_unnormalized = sp.csr_matrix(adj_train_int == i + 1, dtype=np.float32)

    if support_unnormalized.nnz == 0 and DATASET != 'yahoo_music':
        # yahoo music has dataset split with not all ratings types present in training set.
        # this produces empty adjacency matrices for these ratings.
        sys.exit('ERROR: normalized bipartite adjacency matrix has only zero entries!!!!!')

    support_unnormalized_transpose = support_unnormalized.T
    support.append(support_unnormalized)
    support_t.append(support_unnormalized_transpose)


support = globally_normalize_bipartite_adjacency(support, symmetric=SYM)
support_t = globally_normalize_bipartite_adjacency(support_t, symmetric=SYM)

if SELFCONNECTIONS:
    support.append(sp.identity(u_features.shape[0], format='csr'))
    support_t.append(sp.identity(v_features.shape[0], format='csr'))

num_support = len(support)
support = sp.hstack(support, format='csr')
support_t = sp.hstack(support_t, format='csr')

if ACCUM == 'stack':
    div = HIDDEN[0] // num_support
    if HIDDEN[0] % num_support != 0:
        print("""\nWARNING: HIDDEN[0] (=%d) of stack layer is adjusted to %d such that
                  it can be evenly split in %d splits.\n""" % (HIDDEN[0], num_support * div, num_support))
    HIDDEN[0] = num_support * div

In [41]:
# Collect all user and item nodes for test set
test_u = list(set(test_u_indices))
test_v = list(set(test_v_indices))
test_u_dict = {n: i for i, n in enumerate(test_u)}
test_v_dict = {n: i for i, n in enumerate(test_v)}

test_u_indices = np.array([test_u_dict[o] for o in test_u_indices])
test_v_indices = np.array([test_v_dict[o] for o in test_v_indices])

test_support = support[np.array(test_u)]
test_support_t = support_t[np.array(test_v)]

# Collect all user and item nodes for validation set
val_u = list(set(val_u_indices))
val_v = list(set(val_v_indices))
val_u_dict = {n: i for i, n in enumerate(val_u)}
val_v_dict = {n: i for i, n in enumerate(val_v)}

val_u_indices = np.array([val_u_dict[o] for o in val_u_indices])
val_v_indices = np.array([val_v_dict[o] for o in val_v_indices])

val_support = support[np.array(val_u)]
val_support_t = support_t[np.array(val_v)]

# Collect all user and item nodes for train set
train_u = list(set(train_u_indices))
train_v = list(set(train_v_indices))
train_u_dict = {n: i for i, n in enumerate(train_u)}
train_v_dict = {n: i for i, n in enumerate(train_v)}

train_u_indices = np.array([train_u_dict[o] for o in train_u_indices])
train_v_indices = np.array([train_v_dict[o] for o in train_v_indices])

train_support = support[np.array(train_u)]
train_support_t = support_t[np.array(train_v)]

In [42]:
# features as side info
if FEATURES:
    test_u_features_side = u_features_side[np.array(test_u)]
    test_v_features_side = v_features_side[np.array(test_v)]

    val_u_features_side = u_features_side[np.array(val_u)]
    val_v_features_side = v_features_side[np.array(val_v)]

    train_u_features_side = u_features_side[np.array(train_u)]
    train_v_features_side = v_features_side[np.array(train_v)]

else:
    test_u_features_side = None
    test_v_features_side = None

    val_u_features_side = None
    val_v_features_side = None

    train_u_features_side = None
    train_v_features_side = None

placeholders = {
    'u_features': tf.sparse_placeholder(tf.float32, shape=np.array(u_features.shape, dtype=np.int64)),
    'v_features': tf.sparse_placeholder(tf.float32, shape=np.array(v_features.shape, dtype=np.int64)),
    'u_features_nonzero': tf.placeholder(tf.int32, shape=()),
    'v_features_nonzero': tf.placeholder(tf.int32, shape=()),
    'labels': tf.placeholder(tf.int32, shape=(None,)),

    'u_features_side': tf.placeholder(tf.float32, shape=(None, num_side_features)),
    'v_features_side': tf.placeholder(tf.float32, shape=(None, num_side_features)),

    'user_indices': tf.placeholder(tf.int32, shape=(None,)),
    'item_indices': tf.placeholder(tf.int32, shape=(None,)),

    'class_values': tf.placeholder(tf.float32, shape=class_values.shape),

    'dropout': tf.placeholder_with_default(0., shape=()),
    'weight_decay': tf.placeholder_with_default(0., shape=()),

    'support': tf.sparse_placeholder(tf.float32, shape=(None, None)),
    'support_t': tf.sparse_placeholder(tf.float32, shape=(None, None)),
}

In [43]:
# create model
if FEATURES:
    model = RecommenderSideInfoGAE(placeholders,
                                   input_dim=u_features.shape[1],
                                   feat_hidden_dim=FEATHIDDEN,
                                   num_classes=NUMCLASSES,
                                   num_support=num_support,
                                   self_connections=SELFCONNECTIONS,
                                   num_basis_functions=BASES,
                                   hidden=HIDDEN,
                                   num_users=num_users,
                                   num_items=num_items,
                                   accum=ACCUM,
                                   learning_rate=LR,
                                   num_side_features=num_side_features,
                                   logging=True)
else:
    model = RecommenderGAE(placeholders,
                           input_dim=u_features.shape[1],
                           num_classes=NUMCLASSES,
                           num_support=num_support,
                           self_connections=SELFCONNECTIONS,
                           num_basis_functions=BASES,
                           hidden=HIDDEN,
                           num_users=num_users,
                           num_items=num_items,
                           accum=ACCUM,
                           learning_rate=LR,
                           logging=True)







Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.

Instructions for updating:
Use `tf.cast` instead.


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.


In [44]:
# Convert sparse placeholders to tuples to construct feed_dict
test_support = sparse_to_tuple(test_support)
test_support_t = sparse_to_tuple(test_support_t)

val_support = sparse_to_tuple(val_support)
val_support_t = sparse_to_tuple(val_support_t)

train_support = sparse_to_tuple(train_support)
train_support_t = sparse_to_tuple(train_support_t)

u_features = sparse_to_tuple(u_features)
v_features = sparse_to_tuple(v_features)
assert u_features[2][1] == v_features[2][1], 'Number of features of users and items must be the same!'

num_features = u_features[2][1]
u_features_nonzero = u_features[1].shape[0]
v_features_nonzero = v_features[1].shape[0]

In [45]:
# Feed_dicts for validation and test set stay constant over different update steps
train_feed_dict = construct_feed_dict(placeholders, u_features, v_features, u_features_nonzero,
                                      v_features_nonzero, train_support, train_support_t,
                                      train_labels, train_u_indices, train_v_indices, class_values, DO,
                                      train_u_features_side, train_v_features_side)
# No dropout for validation and test runs
val_feed_dict = construct_feed_dict(placeholders, u_features, v_features, u_features_nonzero,
                                    v_features_nonzero, val_support, val_support_t,
                                    val_labels, val_u_indices, val_v_indices, class_values, 0.,
                                    val_u_features_side, val_v_features_side)

test_feed_dict = construct_feed_dict(placeholders, u_features, v_features, u_features_nonzero,
                                     v_features_nonzero, test_support, test_support_t,
                                     test_labels, test_u_indices, test_v_indices, class_values, 0.,
                                     test_u_features_side, test_v_features_side)

In [46]:
# Collect all variables to be logged into summary
merged_summary = tf.summary.merge_all()

sess = tf.Session()
sess.run(tf.global_variables_initializer())

if WRITESUMMARY:
    train_summary_writer = tf.summary.FileWriter(SUMMARIESDIR + '/train', sess.graph)
    val_summary_writer = tf.summary.FileWriter(SUMMARIESDIR + '/val')
else:
    train_summary_writer = None
    val_summary_writer = None

best_val_score = np.inf
best_val_loss = np.inf
best_epoch = 0
wait = 0

In [47]:
print('Training...')

for epoch in range(NB_EPOCH):

    t = time.time()

    # Run single weight update
    # outs = sess.run([model.opt_op, model.loss, model.rmse], feed_dict=train_feed_dict)
    # with exponential moving averages
    outs = sess.run([model.training_op, model.loss, model.rmse], feed_dict=train_feed_dict)

    train_avg_loss = outs[1]
    train_rmse = outs[2]

    val_avg_loss, val_rmse = sess.run([model.loss, model.rmse], feed_dict=val_feed_dict)

    if VERBOSE:
        print("[*] Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(train_avg_loss),
              "train_rmse=", "{:.5f}".format(train_rmse),
              "val_loss=", "{:.5f}".format(val_avg_loss),
              "val_rmse=", "{:.5f}".format(val_rmse),
              "\t\ttime=", "{:.5f}".format(time.time() - t))

    if val_rmse < best_val_score:
        best_val_score = val_rmse
        best_epoch = epoch

    if epoch % 20 == 0 and WRITESUMMARY:
        # Train set summary
        summary = sess.run(merged_summary, feed_dict=train_feed_dict)
        train_summary_writer.add_summary(summary, epoch)
        train_summary_writer.flush()

        # Validation set summary
        summary = sess.run(merged_summary, feed_dict=val_feed_dict)
        val_summary_writer.add_summary(summary, epoch)
        val_summary_writer.flush()

    if epoch % 100 == 0 and epoch > 1000 and not TESTING and False:
        saver = tf.train.Saver()
        save_path = saver.save(sess, "tmp/%s_seed%d.ckpt" % (model.name, DATASEED), global_step=model.global_step)

        # load polyak averages
        variables_to_restore = model.variable_averages.variables_to_restore()
        saver = tf.train.Saver(variables_to_restore)
        saver.restore(sess, save_path)

        val_avg_loss, val_rmse = sess.run([model.loss, model.rmse], feed_dict=val_feed_dict)

        print('polyak val loss = ', val_avg_loss)
        print('polyak val rmse = ', val_rmse)

        # Load back normal variables
        saver = tf.train.Saver()
        saver.restore(sess, save_path)

Training...
[*] Epoch: 0001 train_loss= 2.13964 train_rmse= 1.20293 val_loss= 2.58343 val_rmse= 1.25460 		time= 1.25951
[*] Epoch: 0002 train_loss= 2.80828 train_rmse= 1.23295 val_loss= 8.13137 val_rmse= 1.19438 		time= 0.69214
[*] Epoch: 0003 train_loss= 10.50304 train_rmse= 1.16949 val_loss= 1.66683 val_rmse= 1.30030 		time= 0.63130
[*] Epoch: 0004 train_loss= 2.09541 train_rmse= 1.20628 val_loss= 3.12389 val_rmse= 1.26504 		time= 0.62932
[*] Epoch: 0005 train_loss= 3.99205 train_rmse= 1.24871 val_loss= 1.65855 val_rmse= 1.26392 		time= 0.63131
[*] Epoch: 0006 train_loss= 2.03082 train_rmse= 1.20085 val_loss= 2.86684 val_rmse= 1.21077 		time= 0.59640
[*] Epoch: 0007 train_loss= 3.04376 train_rmse= 1.17191 val_loss= 1.95373 val_rmse= 1.29805 		time= 0.63729
[*] Epoch: 0008 train_loss= 2.26731 train_rmse= 1.21053 val_loss= 2.29387 val_rmse= 1.24576 		time= 0.67220
[*] Epoch: 0009 train_loss= 2.49592 train_rmse= 1.20982 val_loss= 2.88226 val_rmse= 1.25773 		time= 0.60039
[*] Epoch: 0010

[*] Epoch: 0077 train_loss= 1.50574 train_rmse= 1.14724 val_loss= 1.51548 val_rmse= 1.19475 		time= 0.62933
[*] Epoch: 0078 train_loss= 1.49902 train_rmse= 1.14347 val_loss= 1.51286 val_rmse= 1.19218 		time= 0.74800
[*] Epoch: 0079 train_loss= 1.50097 train_rmse= 1.14519 val_loss= 1.51030 val_rmse= 1.18986 		time= 0.69913
[*] Epoch: 0080 train_loss= 1.49195 train_rmse= 1.13464 val_loss= 1.50779 val_rmse= 1.18754 		time= 0.57047
[*] Epoch: 0081 train_loss= 1.49493 train_rmse= 1.13809 val_loss= 1.50551 val_rmse= 1.18548 		time= 0.59541
[*] Epoch: 0082 train_loss= 1.48325 train_rmse= 1.13245 val_loss= 1.50318 val_rmse= 1.18314 		time= 0.65924
[*] Epoch: 0083 train_loss= 1.48758 train_rmse= 1.13134 val_loss= 1.50077 val_rmse= 1.18058 		time= 0.61835
[*] Epoch: 0084 train_loss= 1.47885 train_rmse= 1.12616 val_loss= 1.49817 val_rmse= 1.17761 		time= 0.59840
[*] Epoch: 0085 train_loss= 1.47663 train_rmse= 1.12258 val_loss= 1.49561 val_rmse= 1.17453 		time= 0.60140
[*] Epoch: 0086 train_loss= 

[*] Epoch: 0153 train_loss= 1.39677 train_rmse= 1.01989 val_loss= 1.45737 val_rmse= 1.10069 		time= 0.58443
[*] Epoch: 0154 train_loss= 1.40259 train_rmse= 1.02241 val_loss= 1.46120 val_rmse= 1.10382 		time= 0.62932
[*] Epoch: 0155 train_loss= 1.39586 train_rmse= 1.00875 val_loss= 1.45987 val_rmse= 1.10268 		time= 0.64627
[*] Epoch: 0156 train_loss= 1.39393 train_rmse= 1.01419 val_loss= 1.45598 val_rmse= 1.09921 		time= 0.54355
[*] Epoch: 0157 train_loss= 1.39105 train_rmse= 1.01300 val_loss= 1.45403 val_rmse= 1.09709 		time= 0.62233
[*] Epoch: 0158 train_loss= 1.39429 train_rmse= 1.01875 val_loss= 1.44907 val_rmse= 1.09250 		time= 0.62533
[*] Epoch: 0159 train_loss= 1.39910 train_rmse= 1.02253 val_loss= 1.44643 val_rmse= 1.09010 		time= 0.63131
[*] Epoch: 0160 train_loss= 1.38768 train_rmse= 1.00973 val_loss= 1.44440 val_rmse= 1.08816 		time= 0.59641
[*] Epoch: 0161 train_loss= 1.38974 train_rmse= 1.02157 val_loss= 1.44486 val_rmse= 1.08804 		time= 0.62034
[*] Epoch: 0162 train_loss= 

[*] Epoch: 0229 train_loss= 1.36928 train_rmse= 0.98469 val_loss= 1.45727 val_rmse= 1.09105 		time= 0.60139
[*] Epoch: 0230 train_loss= 1.37148 train_rmse= 0.99093 val_loss= 1.45939 val_rmse= 1.09226 		time= 0.60438
[*] Epoch: 0231 train_loss= 1.36799 train_rmse= 0.98381 val_loss= 1.46129 val_rmse= 1.09340 		time= 0.67420
[*] Epoch: 0232 train_loss= 1.37062 train_rmse= 0.97874 val_loss= 1.45896 val_rmse= 1.09173 		time= 0.56250
[*] Epoch: 0233 train_loss= 1.35987 train_rmse= 0.97185 val_loss= 1.45697 val_rmse= 1.09057 		time= 0.58743
[*] Epoch: 0234 train_loss= 1.36201 train_rmse= 0.97627 val_loss= 1.45672 val_rmse= 1.09081 		time= 0.59641
[*] Epoch: 0235 train_loss= 1.35436 train_rmse= 0.97010 val_loss= 1.45833 val_rmse= 1.09251 		time= 0.55452
[*] Epoch: 0236 train_loss= 1.36813 train_rmse= 0.98860 val_loss= 1.45997 val_rmse= 1.09384 		time= 0.59740
[*] Epoch: 0237 train_loss= 1.36294 train_rmse= 0.98317 val_loss= 1.45690 val_rmse= 1.09147 		time= 0.65825
[*] Epoch: 0238 train_loss= 

[*] Epoch: 0305 train_loss= 1.35005 train_rmse= 0.96041 val_loss= 1.45939 val_rmse= 1.09244 		time= 0.59020
[*] Epoch: 0306 train_loss= 1.33723 train_rmse= 0.96202 val_loss= 1.46741 val_rmse= 1.09949 		time= 0.61236
[*] Epoch: 0307 train_loss= 1.34527 train_rmse= 0.96396 val_loss= 1.46732 val_rmse= 1.10051 		time= 0.56349
[*] Epoch: 0308 train_loss= 1.34164 train_rmse= 0.95939 val_loss= 1.46156 val_rmse= 1.09667 		time= 0.62932
[*] Epoch: 0309 train_loss= 1.34797 train_rmse= 0.97026 val_loss= 1.45480 val_rmse= 1.09028 		time= 0.65824
[*] Epoch: 0310 train_loss= 1.35078 train_rmse= 0.96871 val_loss= 1.45583 val_rmse= 1.09159 		time= 0.57945
[*] Epoch: 0311 train_loss= 1.35100 train_rmse= 0.97659 val_loss= 1.46291 val_rmse= 1.09764 		time= 0.61535
[*] Epoch: 0312 train_loss= 1.34812 train_rmse= 0.96149 val_loss= 1.46409 val_rmse= 1.09749 		time= 0.62134
[*] Epoch: 0313 train_loss= 1.34383 train_rmse= 0.96457 val_loss= 1.45931 val_rmse= 1.09277 		time= 0.57147
[*] Epoch: 0314 train_loss= 

[*] Epoch: 0381 train_loss= 1.33059 train_rmse= 0.94719 val_loss= 1.46922 val_rmse= 1.10356 		time= 0.64128
[*] Epoch: 0382 train_loss= 1.32238 train_rmse= 0.94794 val_loss= 1.46696 val_rmse= 1.10190 		time= 0.60139
[*] Epoch: 0383 train_loss= 1.32299 train_rmse= 0.94434 val_loss= 1.46103 val_rmse= 1.09637 		time= 0.58244
[*] Epoch: 0384 train_loss= 1.31685 train_rmse= 0.93988 val_loss= 1.45942 val_rmse= 1.09522 		time= 0.58344
[*] Epoch: 0385 train_loss= 1.31565 train_rmse= 0.94925 val_loss= 1.46172 val_rmse= 1.09722 		time= 0.65128
[*] Epoch: 0386 train_loss= 1.31813 train_rmse= 0.94268 val_loss= 1.46136 val_rmse= 1.09659 		time= 0.65524
[*] Epoch: 0387 train_loss= 1.31994 train_rmse= 0.94589 val_loss= 1.45898 val_rmse= 1.09453 		time= 0.55651
[*] Epoch: 0388 train_loss= 1.32124 train_rmse= 0.95486 val_loss= 1.45993 val_rmse= 1.09563 		time= 0.60139
[*] Epoch: 0389 train_loss= 1.30659 train_rmse= 0.94307 val_loss= 1.46110 val_rmse= 1.09657 		time= 0.62832
[*] Epoch: 0390 train_loss= 

[*] Epoch: 0457 train_loss= 1.30334 train_rmse= 0.94018 val_loss= 1.46126 val_rmse= 1.09730 		time= 0.70910
[*] Epoch: 0458 train_loss= 1.30265 train_rmse= 0.93608 val_loss= 1.46430 val_rmse= 1.10110 		time= 0.63131
[*] Epoch: 0459 train_loss= 1.29574 train_rmse= 0.94034 val_loss= 1.47428 val_rmse= 1.11127 		time= 0.59142
[*] Epoch: 0460 train_loss= 1.30231 train_rmse= 0.93126 val_loss= 1.47642 val_rmse= 1.11373 		time= 0.61436
[*] Epoch: 0461 train_loss= 1.30500 train_rmse= 0.92952 val_loss= 1.47096 val_rmse= 1.10863 		time= 0.54355
[*] Epoch: 0462 train_loss= 1.29393 train_rmse= 0.92382 val_loss= 1.46306 val_rmse= 1.10053 		time= 0.61336
[*] Epoch: 0463 train_loss= 1.29975 train_rmse= 0.93627 val_loss= 1.45777 val_rmse= 1.09481 		time= 0.64029
[*] Epoch: 0464 train_loss= 1.31246 train_rmse= 0.94323 val_loss= 1.46136 val_rmse= 1.09783 		time= 0.59242
[*] Epoch: 0465 train_loss= 1.29272 train_rmse= 0.93077 val_loss= 1.46551 val_rmse= 1.10156 		time= 0.60538
[*] Epoch: 0466 train_loss= 

[*] Epoch: 0533 train_loss= 1.28157 train_rmse= 0.93163 val_loss= 1.46238 val_rmse= 1.10092 		time= 0.60937
[*] Epoch: 0534 train_loss= 1.28083 train_rmse= 0.92549 val_loss= 1.47156 val_rmse= 1.11046 		time= 0.54853
[*] Epoch: 0535 train_loss= 1.28232 train_rmse= 0.92046 val_loss= 1.47823 val_rmse= 1.11789 		time= 0.59142
[*] Epoch: 0536 train_loss= 1.27981 train_rmse= 0.91572 val_loss= 1.47838 val_rmse= 1.11872 		time= 0.64827
[*] Epoch: 0537 train_loss= 1.28310 train_rmse= 0.91488 val_loss= 1.47153 val_rmse= 1.11205 		time= 0.59142
[*] Epoch: 0538 train_loss= 1.27641 train_rmse= 0.91659 val_loss= 1.46524 val_rmse= 1.10590 		time= 0.61037
[*] Epoch: 0539 train_loss= 1.27669 train_rmse= 0.92496 val_loss= 1.46334 val_rmse= 1.10419 		time= 0.60438
[*] Epoch: 0540 train_loss= 1.27344 train_rmse= 0.92094 val_loss= 1.46649 val_rmse= 1.10748 		time= 0.58543
[*] Epoch: 0541 train_loss= 1.27531 train_rmse= 0.92425 val_loss= 1.47043 val_rmse= 1.11125 		time= 0.58246
[*] Epoch: 0542 train_loss= 

[*] Epoch: 0609 train_loss= 1.25326 train_rmse= 0.91244 val_loss= 1.47210 val_rmse= 1.11693 		time= 0.65226
[*] Epoch: 0610 train_loss= 1.24464 train_rmse= 0.89689 val_loss= 1.47255 val_rmse= 1.11749 		time= 0.59441
[*] Epoch: 0611 train_loss= 1.25756 train_rmse= 0.90777 val_loss= 1.46702 val_rmse= 1.11182 		time= 0.57745
[*] Epoch: 0612 train_loss= 1.24826 train_rmse= 0.89821 val_loss= 1.46339 val_rmse= 1.10844 		time= 0.62732
[*] Epoch: 0613 train_loss= 1.25383 train_rmse= 0.90919 val_loss= 1.46083 val_rmse= 1.10638 		time= 0.54354
[*] Epoch: 0614 train_loss= 1.25531 train_rmse= 0.91413 val_loss= 1.45899 val_rmse= 1.10528 		time= 0.59541
[*] Epoch: 0615 train_loss= 1.25028 train_rmse= 0.90863 val_loss= 1.46057 val_rmse= 1.10741 		time= 0.62633
[*] Epoch: 0616 train_loss= 1.25827 train_rmse= 0.91826 val_loss= 1.46342 val_rmse= 1.11049 		time= 0.60638
[*] Epoch: 0617 train_loss= 1.25212 train_rmse= 0.90871 val_loss= 1.46395 val_rmse= 1.11051 		time= 0.62333
[*] Epoch: 0618 train_loss= 

[*] Epoch: 0685 train_loss= 1.21440 train_rmse= 0.89421 val_loss= 1.45030 val_rmse= 1.11371 		time= 0.59441
[*] Epoch: 0686 train_loss= 1.21510 train_rmse= 0.89701 val_loss= 1.44951 val_rmse= 1.11254 		time= 0.56748
[*] Epoch: 0687 train_loss= 1.22018 train_rmse= 0.90571 val_loss= 1.44613 val_rmse= 1.10875 		time= 0.58743
[*] Epoch: 0688 train_loss= 1.20903 train_rmse= 0.88827 val_loss= 1.44810 val_rmse= 1.11102 		time= 0.67021
[*] Epoch: 0689 train_loss= 1.21886 train_rmse= 0.89741 val_loss= 1.44567 val_rmse= 1.10819 		time= 0.56846
[*] Epoch: 0690 train_loss= 1.20149 train_rmse= 0.88618 val_loss= 1.44613 val_rmse= 1.10872 		time= 0.59740
[*] Epoch: 0691 train_loss= 1.21830 train_rmse= 0.89788 val_loss= 1.45045 val_rmse= 1.11447 		time= 0.60438
[*] Epoch: 0692 train_loss= 1.20742 train_rmse= 0.89176 val_loss= 1.45221 val_rmse= 1.11674 		time= 0.55751
[*] Epoch: 0693 train_loss= 1.21031 train_rmse= 0.89341 val_loss= 1.44833 val_rmse= 1.11241 		time= 0.61037
[*] Epoch: 0694 train_loss= 

[*] Epoch: 0761 train_loss= 1.18980 train_rmse= 0.88039 val_loss= 1.44468 val_rmse= 1.10934 		time= 0.63031
[*] Epoch: 0762 train_loss= 1.18113 train_rmse= 0.87595 val_loss= 1.44615 val_rmse= 1.11180 		time= 0.58045
[*] Epoch: 0763 train_loss= 1.18363 train_rmse= 0.87201 val_loss= 1.44660 val_rmse= 1.11332 		time= 0.58045
[*] Epoch: 0764 train_loss= 1.18798 train_rmse= 0.87793 val_loss= 1.44499 val_rmse= 1.11044 		time= 0.59740
[*] Epoch: 0765 train_loss= 1.19057 train_rmse= 0.87964 val_loss= 1.44447 val_rmse= 1.10809 		time= 0.57047
[*] Epoch: 0766 train_loss= 1.19732 train_rmse= 0.88996 val_loss= 1.44766 val_rmse= 1.11087 		time= 0.59042
[*] Epoch: 0767 train_loss= 1.18237 train_rmse= 0.87823 val_loss= 1.44944 val_rmse= 1.11343 		time= 0.66821
[*] Epoch: 0768 train_loss= 1.18123 train_rmse= 0.87788 val_loss= 1.45005 val_rmse= 1.11467 		time= 0.59241
[*] Epoch: 0769 train_loss= 1.17657 train_rmse= 0.87013 val_loss= 1.45120 val_rmse= 1.11618 		time= 0.66323
[*] Epoch: 0770 train_loss= 

[*] Epoch: 0837 train_loss= 1.16522 train_rmse= 0.86518 val_loss= 1.45491 val_rmse= 1.12212 		time= 0.58145
[*] Epoch: 0838 train_loss= 1.17347 train_rmse= 0.87018 val_loss= 1.45788 val_rmse= 1.12410 		time= 0.60039
[*] Epoch: 0839 train_loss= 1.16100 train_rmse= 0.86119 val_loss= 1.45854 val_rmse= 1.12372 		time= 0.58245
[*] Epoch: 0840 train_loss= 1.16339 train_rmse= 0.86423 val_loss= 1.45675 val_rmse= 1.11989 		time= 0.66921
[*] Epoch: 0841 train_loss= 1.17273 train_rmse= 0.87016 val_loss= 1.45333 val_rmse= 1.11505 		time= 0.56549
[*] Epoch: 0842 train_loss= 1.17976 train_rmse= 0.87371 val_loss= 1.45264 val_rmse= 1.11605 		time= 0.62931
[*] Epoch: 0843 train_loss= 1.16103 train_rmse= 0.86405 val_loss= 1.45261 val_rmse= 1.11877 		time= 0.56948
[*] Epoch: 0844 train_loss= 1.16336 train_rmse= 0.86230 val_loss= 1.45175 val_rmse= 1.12013 		time= 0.52260
[*] Epoch: 0845 train_loss= 1.16207 train_rmse= 0.86239 val_loss= 1.44880 val_rmse= 1.11667 		time= 0.60239
[*] Epoch: 0846 train_loss= 

[*] Epoch: 0913 train_loss= 1.15142 train_rmse= 0.85908 val_loss= 1.46042 val_rmse= 1.12344 		time= 0.58942
[*] Epoch: 0914 train_loss= 1.15493 train_rmse= 0.86822 val_loss= 1.45717 val_rmse= 1.11910 		time= 0.61435
[*] Epoch: 0915 train_loss= 1.14688 train_rmse= 0.85222 val_loss= 1.45167 val_rmse= 1.11263 		time= 0.56150
[*] Epoch: 0916 train_loss= 1.15672 train_rmse= 0.86026 val_loss= 1.44855 val_rmse= 1.10927 		time= 0.59341
[*] Epoch: 0917 train_loss= 1.15414 train_rmse= 0.85726 val_loss= 1.44591 val_rmse= 1.10693 		time= 0.65225
[*] Epoch: 0918 train_loss= 1.14842 train_rmse= 0.85355 val_loss= 1.44599 val_rmse= 1.10816 		time= 0.55452
[*] Epoch: 0919 train_loss= 1.15138 train_rmse= 0.84854 val_loss= 1.45617 val_rmse= 1.12119 		time= 0.61136
[*] Epoch: 0920 train_loss= 1.15097 train_rmse= 0.85824 val_loss= 1.46431 val_rmse= 1.13042 		time= 0.58743
[*] Epoch: 0921 train_loss= 1.14966 train_rmse= 0.85810 val_loss= 1.46521 val_rmse= 1.12997 		time= 0.54554
[*] Epoch: 0922 train_loss= 

[*] Epoch: 0989 train_loss= 1.12619 train_rmse= 0.84009 val_loss= 1.45899 val_rmse= 1.12435 		time= 0.58942
[*] Epoch: 0990 train_loss= 1.13873 train_rmse= 0.84787 val_loss= 1.45950 val_rmse= 1.12286 		time= 0.63632
[*] Epoch: 0991 train_loss= 1.13294 train_rmse= 0.84119 val_loss= 1.45988 val_rmse= 1.12121 		time= 0.58144
[*] Epoch: 0992 train_loss= 1.13766 train_rmse= 0.84418 val_loss= 1.46054 val_rmse= 1.12226 		time= 0.59242
[*] Epoch: 0993 train_loss= 1.13023 train_rmse= 0.84058 val_loss= 1.45955 val_rmse= 1.12276 		time= 0.58743
[*] Epoch: 0994 train_loss= 1.12431 train_rmse= 0.83708 val_loss= 1.45865 val_rmse= 1.12367 		time= 0.58842
[*] Epoch: 0995 train_loss= 1.13064 train_rmse= 0.84384 val_loss= 1.45646 val_rmse= 1.12276 		time= 0.55551
[*] Epoch: 0996 train_loss= 1.14199 train_rmse= 0.85287 val_loss= 1.45160 val_rmse= 1.11731 		time= 0.58145
[*] Epoch: 0997 train_loss= 1.12432 train_rmse= 0.84069 val_loss= 1.45025 val_rmse= 1.11508 		time= 0.62832
[*] Epoch: 0998 train_loss= 

[*] Epoch: 1065 train_loss= 1.11362 train_rmse= 0.83249 val_loss= 1.46071 val_rmse= 1.12263 		time= 0.62832
[*] Epoch: 1066 train_loss= 1.11365 train_rmse= 0.83105 val_loss= 1.46597 val_rmse= 1.12807 		time= 0.61037
[*] Epoch: 1067 train_loss= 1.12014 train_rmse= 0.83712 val_loss= 1.46814 val_rmse= 1.13022 		time= 0.59341
[*] Epoch: 1068 train_loss= 1.11311 train_rmse= 0.83106 val_loss= 1.46939 val_rmse= 1.13090 		time= 0.57945
[*] Epoch: 1069 train_loss= 1.11008 train_rmse= 0.83209 val_loss= 1.46773 val_rmse= 1.12927 		time= 0.54854
[*] Epoch: 1070 train_loss= 1.13135 train_rmse= 0.84317 val_loss= 1.46264 val_rmse= 1.12456 		time= 0.60139
[*] Epoch: 1071 train_loss= 1.12044 train_rmse= 0.83813 val_loss= 1.45599 val_rmse= 1.11799 		time= 0.70512
[*] Epoch: 1072 train_loss= 1.12230 train_rmse= 0.83146 val_loss= 1.45367 val_rmse= 1.11585 		time= 0.53058
[*] Epoch: 1073 train_loss= 1.11457 train_rmse= 0.83124 val_loss= 1.45640 val_rmse= 1.11834 		time= 0.60438
[*] Epoch: 1074 train_loss= 

[*] Epoch: 1141 train_loss= 1.10582 train_rmse= 0.82737 val_loss= 1.46422 val_rmse= 1.12386 		time= 0.62133
[*] Epoch: 1142 train_loss= 1.10512 train_rmse= 0.82589 val_loss= 1.46176 val_rmse= 1.12131 		time= 0.55152
[*] Epoch: 1143 train_loss= 1.11363 train_rmse= 0.83101 val_loss= 1.45986 val_rmse= 1.12030 		time= 0.59042
[*] Epoch: 1144 train_loss= 1.10100 train_rmse= 0.82319 val_loss= 1.45994 val_rmse= 1.12170 		time= 0.63929
[*] Epoch: 1145 train_loss= 1.09673 train_rmse= 0.82287 val_loss= 1.46175 val_rmse= 1.12465 		time= 0.57147
[*] Epoch: 1146 train_loss= 1.10563 train_rmse= 0.82946 val_loss= 1.46149 val_rmse= 1.12409 		time= 0.62831
[*] Epoch: 1147 train_loss= 1.08676 train_rmse= 0.80949 val_loss= 1.46432 val_rmse= 1.12660 		time= 0.62134
[*] Epoch: 1148 train_loss= 1.10622 train_rmse= 0.82797 val_loss= 1.46484 val_rmse= 1.12568 		time= 0.55651
[*] Epoch: 1149 train_loss= 1.10025 train_rmse= 0.82466 val_loss= 1.46347 val_rmse= 1.12297 		time= 0.56748
[*] Epoch: 1150 train_loss= 

[*] Epoch: 1217 train_loss= 1.10345 train_rmse= 0.82281 val_loss= 1.46533 val_rmse= 1.12439 		time= 0.64228
[*] Epoch: 1218 train_loss= 1.08987 train_rmse= 0.81920 val_loss= 1.46593 val_rmse= 1.12581 		time= 0.57446
[*] Epoch: 1219 train_loss= 1.10085 train_rmse= 0.82541 val_loss= 1.46936 val_rmse= 1.13123 		time= 0.58543
[*] Epoch: 1220 train_loss= 1.10324 train_rmse= 0.82778 val_loss= 1.47091 val_rmse= 1.13384 		time= 0.60738
[*] Epoch: 1221 train_loss= 1.09201 train_rmse= 0.81627 val_loss= 1.47045 val_rmse= 1.13295 		time= 0.55950
[*] Epoch: 1222 train_loss= 1.09979 train_rmse= 0.82473 val_loss= 1.46793 val_rmse= 1.12926 		time= 0.59042
[*] Epoch: 1223 train_loss= 1.09182 train_rmse= 0.82202 val_loss= 1.46667 val_rmse= 1.12693 		time= 0.63131
[*] Epoch: 1224 train_loss= 1.08808 train_rmse= 0.81951 val_loss= 1.46837 val_rmse= 1.12885 		time= 0.58842
[*] Epoch: 1225 train_loss= 1.10134 train_rmse= 0.82478 val_loss= 1.47103 val_rmse= 1.13173 		time= 0.59242
[*] Epoch: 1226 train_loss= 

[*] Epoch: 1293 train_loss= 1.07985 train_rmse= 0.81432 val_loss= 1.47371 val_rmse= 1.13544 		time= 0.63829
[*] Epoch: 1294 train_loss= 1.10712 train_rmse= 0.83850 val_loss= 1.47363 val_rmse= 1.13509 		time= 0.54155
[*] Epoch: 1295 train_loss= 1.07755 train_rmse= 0.80806 val_loss= 1.47294 val_rmse= 1.13385 		time= 0.60937
[*] Epoch: 1296 train_loss= 1.08226 train_rmse= 0.81076 val_loss= 1.47182 val_rmse= 1.13220 		time= 0.66422
[*] Epoch: 1297 train_loss= 1.08624 train_rmse= 0.81388 val_loss= 1.46836 val_rmse= 1.12809 		time= 0.60538
[*] Epoch: 1298 train_loss= 1.07595 train_rmse= 0.80848 val_loss= 1.46516 val_rmse= 1.12547 		time= 0.61236
[*] Epoch: 1299 train_loss= 1.09075 train_rmse= 0.81688 val_loss= 1.46386 val_rmse= 1.12458 		time= 0.59640
[*] Epoch: 1300 train_loss= 1.08429 train_rmse= 0.81324 val_loss= 1.46803 val_rmse= 1.12907 		time= 0.54654
[*] Epoch: 1301 train_loss= 1.08406 train_rmse= 0.81273 val_loss= 1.47267 val_rmse= 1.13438 		time= 0.59840
[*] Epoch: 1302 train_loss= 

[*] Epoch: 1369 train_loss= 1.06870 train_rmse= 0.80677 val_loss= 1.46841 val_rmse= 1.12786 		time= 0.64328
[*] Epoch: 1370 train_loss= 1.06843 train_rmse= 0.79932 val_loss= 1.46671 val_rmse= 1.12548 		time= 0.65924
[*] Epoch: 1371 train_loss= 1.06662 train_rmse= 0.80443 val_loss= 1.46752 val_rmse= 1.12624 		time= 0.59242
[*] Epoch: 1372 train_loss= 1.06591 train_rmse= 0.80318 val_loss= 1.47125 val_rmse= 1.13053 		time= 0.66223
[*] Epoch: 1373 train_loss= 1.06748 train_rmse= 0.80044 val_loss= 1.47411 val_rmse= 1.13322 		time= 0.66921
[*] Epoch: 1374 train_loss= 1.06357 train_rmse= 0.79876 val_loss= 1.47703 val_rmse= 1.13536 		time= 0.60837
[*] Epoch: 1375 train_loss= 1.07000 train_rmse= 0.81230 val_loss= 1.47855 val_rmse= 1.13610 		time= 0.59740
[*] Epoch: 1376 train_loss= 1.07008 train_rmse= 0.81010 val_loss= 1.48260 val_rmse= 1.14058 		time= 0.59242
[*] Epoch: 1377 train_loss= 1.07760 train_rmse= 0.81009 val_loss= 1.48239 val_rmse= 1.14113 		time= 0.60737
[*] Epoch: 1378 train_loss= 

[*] Epoch: 1445 train_loss= 1.06102 train_rmse= 0.79835 val_loss= 1.47561 val_rmse= 1.13449 		time= 0.65525
[*] Epoch: 1446 train_loss= 1.06893 train_rmse= 0.80487 val_loss= 1.47666 val_rmse= 1.13611 		time= 0.62632
[*] Epoch: 1447 train_loss= 1.07432 train_rmse= 0.80644 val_loss= 1.47918 val_rmse= 1.14011 		time= 0.62533
[*] Epoch: 1448 train_loss= 1.07978 train_rmse= 0.81542 val_loss= 1.47920 val_rmse= 1.14034 		time= 0.61835
[*] Epoch: 1449 train_loss= 1.07637 train_rmse= 0.81034 val_loss= 1.47632 val_rmse= 1.13666 		time= 0.60538
[*] Epoch: 1450 train_loss= 1.05219 train_rmse= 0.79509 val_loss= 1.47415 val_rmse= 1.13342 		time= 0.59940
[*] Epoch: 1451 train_loss= 1.05037 train_rmse= 0.79373 val_loss= 1.47374 val_rmse= 1.13208 		time= 0.70511
[*] Epoch: 1452 train_loss= 1.05995 train_rmse= 0.79863 val_loss= 1.47244 val_rmse= 1.13051 		time= 0.61336
[*] Epoch: 1453 train_loss= 1.06661 train_rmse= 0.80225 val_loss= 1.47358 val_rmse= 1.13231 		time= 0.63530
[*] Epoch: 1454 train_loss= 

[*] Epoch: 1521 train_loss= 1.05575 train_rmse= 0.79530 val_loss= 1.47947 val_rmse= 1.13971 		time= 0.61735
[*] Epoch: 1522 train_loss= 1.05225 train_rmse= 0.79586 val_loss= 1.47987 val_rmse= 1.14052 		time= 0.67520
[*] Epoch: 1523 train_loss= 1.05619 train_rmse= 0.79866 val_loss= 1.48066 val_rmse= 1.14063 		time= 0.57347
[*] Epoch: 1524 train_loss= 1.04747 train_rmse= 0.79275 val_loss= 1.48142 val_rmse= 1.14036 		time= 0.60837
[*] Epoch: 1525 train_loss= 1.05179 train_rmse= 0.79448 val_loss= 1.48048 val_rmse= 1.13841 		time= 0.62732
[*] Epoch: 1526 train_loss= 1.05588 train_rmse= 0.79972 val_loss= 1.47870 val_rmse= 1.13594 		time= 0.57945
[*] Epoch: 1527 train_loss= 1.05707 train_rmse= 0.79746 val_loss= 1.47532 val_rmse= 1.13313 		time= 0.58843
[*] Epoch: 1528 train_loss= 1.06458 train_rmse= 0.80494 val_loss= 1.47284 val_rmse= 1.13150 		time= 0.59940
[*] Epoch: 1529 train_loss= 1.04912 train_rmse= 0.79182 val_loss= 1.47305 val_rmse= 1.13178 		time= 0.67220
[*] Epoch: 1530 train_loss= 

[*] Epoch: 1597 train_loss= 1.06234 train_rmse= 0.80424 val_loss= 1.48892 val_rmse= 1.15021 		time= 0.61137
[*] Epoch: 1598 train_loss= 1.05600 train_rmse= 0.80015 val_loss= 1.48859 val_rmse= 1.14927 		time= 0.69314
[*] Epoch: 1599 train_loss= 1.04244 train_rmse= 0.78780 val_loss= 1.48711 val_rmse= 1.14756 		time= 0.68417
[*] Epoch: 1600 train_loss= 1.05282 train_rmse= 0.79567 val_loss= 1.48305 val_rmse= 1.14364 		time= 0.66622
[*] Epoch: 1601 train_loss= 1.04181 train_rmse= 0.79162 val_loss= 1.48228 val_rmse= 1.14329 		time= 0.62134
[*] Epoch: 1602 train_loss= 1.04590 train_rmse= 0.79114 val_loss= 1.48203 val_rmse= 1.14280 		time= 0.63730
[*] Epoch: 1603 train_loss= 1.05240 train_rmse= 0.79635 val_loss= 1.48380 val_rmse= 1.14444 		time= 0.62033
[*] Epoch: 1604 train_loss= 1.04465 train_rmse= 0.78950 val_loss= 1.48548 val_rmse= 1.14586 		time= 0.65425
[*] Epoch: 1605 train_loss= 1.05448 train_rmse= 0.79518 val_loss= 1.48709 val_rmse= 1.14719 		time= 0.62931
[*] Epoch: 1606 train_loss= 

[*] Epoch: 1673 train_loss= 1.04938 train_rmse= 0.79325 val_loss= 1.48231 val_rmse= 1.14025 		time= 0.63031
[*] Epoch: 1674 train_loss= 1.02380 train_rmse= 0.77811 val_loss= 1.48483 val_rmse= 1.14296 		time= 0.60339
[*] Epoch: 1675 train_loss= 1.04406 train_rmse= 0.78612 val_loss= 1.48595 val_rmse= 1.14572 		time= 0.62433
[*] Epoch: 1676 train_loss= 1.04952 train_rmse= 0.79600 val_loss= 1.48258 val_rmse= 1.14363 		time= 0.68916
[*] Epoch: 1677 train_loss= 1.04490 train_rmse= 0.79151 val_loss= 1.48011 val_rmse= 1.14141 		time= 0.59142
[*] Epoch: 1678 train_loss= 1.04281 train_rmse= 0.78738 val_loss= 1.47906 val_rmse= 1.13928 		time= 0.62632
[*] Epoch: 1679 train_loss= 1.03820 train_rmse= 0.78525 val_loss= 1.48104 val_rmse= 1.13974 		time= 0.65424
[*] Epoch: 1680 train_loss= 1.04740 train_rmse= 0.79136 val_loss= 1.48404 val_rmse= 1.14149 		time= 0.63929
[*] Epoch: 1681 train_loss= 1.03610 train_rmse= 0.78563 val_loss= 1.48414 val_rmse= 1.14128 		time= 0.64328
[*] Epoch: 1682 train_loss= 

[*] Epoch: 1749 train_loss= 1.01656 train_rmse= 0.77225 val_loss= 1.48021 val_rmse= 1.13920 		time= 0.61635
[*] Epoch: 1750 train_loss= 1.01843 train_rmse= 0.77908 val_loss= 1.48051 val_rmse= 1.14021 		time= 0.60139
[*] Epoch: 1751 train_loss= 1.01969 train_rmse= 0.77896 val_loss= 1.48128 val_rmse= 1.14116 		time= 0.60239
[*] Epoch: 1752 train_loss= 1.02826 train_rmse= 0.78006 val_loss= 1.48121 val_rmse= 1.14039 		time= 0.60738
[*] Epoch: 1753 train_loss= 1.03396 train_rmse= 0.78491 val_loss= 1.48224 val_rmse= 1.13993 		time= 0.68617
[*] Epoch: 1754 train_loss= 1.03599 train_rmse= 0.78446 val_loss= 1.48227 val_rmse= 1.13940 		time= 0.60738
[*] Epoch: 1755 train_loss= 1.03746 train_rmse= 0.78433 val_loss= 1.48295 val_rmse= 1.14048 		time= 0.64727
[*] Epoch: 1756 train_loss= 1.03019 train_rmse= 0.77969 val_loss= 1.48489 val_rmse= 1.14368 		time= 0.63730
[*] Epoch: 1757 train_loss= 1.02961 train_rmse= 0.78308 val_loss= 1.48632 val_rmse= 1.14654 		time= 0.54853
[*] Epoch: 1758 train_loss= 

[*] Epoch: 1825 train_loss= 1.03182 train_rmse= 0.78167 val_loss= 1.48407 val_rmse= 1.14351 		time= 0.68217
[*] Epoch: 1826 train_loss= 1.02900 train_rmse= 0.77913 val_loss= 1.48311 val_rmse= 1.14118 		time= 0.60139
[*] Epoch: 1827 train_loss= 1.03250 train_rmse= 0.78815 val_loss= 1.48139 val_rmse= 1.13809 		time= 0.63929
[*] Epoch: 1828 train_loss= 1.03039 train_rmse= 0.77993 val_loss= 1.48397 val_rmse= 1.14011 		time= 0.58443
[*] Epoch: 1829 train_loss= 1.03971 train_rmse= 0.79050 val_loss= 1.48723 val_rmse= 1.14416 		time= 0.57945
[*] Epoch: 1830 train_loss= 1.05525 train_rmse= 0.79759 val_loss= 1.48905 val_rmse= 1.14753 		time= 0.58344
[*] Epoch: 1831 train_loss= 1.02748 train_rmse= 0.77939 val_loss= 1.48933 val_rmse= 1.14900 		time= 0.66821
[*] Epoch: 1832 train_loss= 1.03403 train_rmse= 0.78699 val_loss= 1.48965 val_rmse= 1.14975 		time= 0.56748
[*] Epoch: 1833 train_loss= 1.02767 train_rmse= 0.77762 val_loss= 1.49271 val_rmse= 1.15191 		time= 0.61436
[*] Epoch: 1834 train_loss= 

[*] Epoch: 1901 train_loss= 1.03385 train_rmse= 0.78219 val_loss= 1.48590 val_rmse= 1.14448 		time= 0.61436
[*] Epoch: 1902 train_loss= 1.01874 train_rmse= 0.77401 val_loss= 1.48606 val_rmse= 1.14338 		time= 0.66722
[*] Epoch: 1903 train_loss= 1.02431 train_rmse= 0.77766 val_loss= 1.48509 val_rmse= 1.14163 		time= 0.56449
[*] Epoch: 1904 train_loss= 1.01200 train_rmse= 0.77062 val_loss= 1.48552 val_rmse= 1.14321 		time= 0.60339
[*] Epoch: 1905 train_loss= 1.01258 train_rmse= 0.77490 val_loss= 1.48591 val_rmse= 1.14609 		time= 0.60737
[*] Epoch: 1906 train_loss= 1.03388 train_rmse= 0.78869 val_loss= 1.48619 val_rmse= 1.14778 		time= 0.58244
[*] Epoch: 1907 train_loss= 1.02854 train_rmse= 0.78783 val_loss= 1.48333 val_rmse= 1.14422 		time= 0.62433
[*] Epoch: 1908 train_loss= 1.00919 train_rmse= 0.77021 val_loss= 1.48026 val_rmse= 1.13931 		time= 0.61635
[*] Epoch: 1909 train_loss= 1.00951 train_rmse= 0.76546 val_loss= 1.48150 val_rmse= 1.13855 		time= 0.63630
[*] Epoch: 1910 train_loss= 

[*] Epoch: 1977 train_loss= 1.00980 train_rmse= 0.76740 val_loss= 1.48508 val_rmse= 1.14635 		time= 0.58144
[*] Epoch: 1978 train_loss= 1.00389 train_rmse= 0.76627 val_loss= 1.48789 val_rmse= 1.14824 		time= 0.61934
[*] Epoch: 1979 train_loss= 1.01281 train_rmse= 0.77091 val_loss= 1.48880 val_rmse= 1.14879 		time= 0.55651
[*] Epoch: 1980 train_loss= 1.00151 train_rmse= 0.76283 val_loss= 1.48823 val_rmse= 1.14787 		time= 0.60239
[*] Epoch: 1981 train_loss= 1.00655 train_rmse= 0.76969 val_loss= 1.48649 val_rmse= 1.14667 		time= 0.64670
[*] Epoch: 1982 train_loss= 0.99925 train_rmse= 0.76004 val_loss= 1.48774 val_rmse= 1.14821 		time= 0.60039
[*] Epoch: 1983 train_loss= 1.01441 train_rmse= 0.77296 val_loss= 1.49039 val_rmse= 1.15082 		time= 0.62232
[*] Epoch: 1984 train_loss= 1.02561 train_rmse= 0.78020 val_loss= 1.49219 val_rmse= 1.15234 		time= 0.60139
[*] Epoch: 1985 train_loss= 1.01156 train_rmse= 0.77128 val_loss= 1.49325 val_rmse= 1.15247 		time= 0.57047
[*] Epoch: 1986 train_loss= 

[*] Epoch: 2053 train_loss= 1.01227 train_rmse= 0.77016 val_loss= 1.49246 val_rmse= 1.14976 		time= 0.66622
[*] Epoch: 2054 train_loss= 1.02298 train_rmse= 0.77552 val_loss= 1.49022 val_rmse= 1.14891 		time= 0.54056
[*] Epoch: 2055 train_loss= 1.01299 train_rmse= 0.77215 val_loss= 1.48813 val_rmse= 1.14776 		time= 0.57047
[*] Epoch: 2056 train_loss= 1.00842 train_rmse= 0.77434 val_loss= 1.48682 val_rmse= 1.14606 		time= 0.59940
[*] Epoch: 2057 train_loss= 0.99634 train_rmse= 0.75654 val_loss= 1.48857 val_rmse= 1.14641 		time= 0.54554
[*] Epoch: 2058 train_loss= 1.01241 train_rmse= 0.77231 val_loss= 1.48799 val_rmse= 1.14423 		time= 0.59840
[*] Epoch: 2059 train_loss= 1.01775 train_rmse= 0.77366 val_loss= 1.48743 val_rmse= 1.14335 		time= 0.64129
[*] Epoch: 2060 train_loss= 1.01738 train_rmse= 0.77070 val_loss= 1.48716 val_rmse= 1.14504 		time= 0.64427
[*] Epoch: 2061 train_loss= 1.01381 train_rmse= 0.77399 val_loss= 1.48953 val_rmse= 1.14942 		time= 0.59142
[*] Epoch: 2062 train_loss= 

[*] Epoch: 2129 train_loss= 0.99220 train_rmse= 0.75733 val_loss= 1.48420 val_rmse= 1.14331 		time= 0.60339
[*] Epoch: 2130 train_loss= 1.01024 train_rmse= 0.77019 val_loss= 1.48523 val_rmse= 1.14484 		time= 0.56449
[*] Epoch: 2131 train_loss= 0.99783 train_rmse= 0.76103 val_loss= 1.48663 val_rmse= 1.14514 		time= 0.60538
[*] Epoch: 2132 train_loss= 1.00076 train_rmse= 0.76542 val_loss= 1.48733 val_rmse= 1.14420 		time= 0.65224
[*] Epoch: 2133 train_loss= 1.00455 train_rmse= 0.76651 val_loss= 1.48804 val_rmse= 1.14456 		time= 0.56250
[*] Epoch: 2134 train_loss= 0.99710 train_rmse= 0.76459 val_loss= 1.48813 val_rmse= 1.14514 		time= 0.59243
[*] Epoch: 2135 train_loss= 1.02201 train_rmse= 0.77722 val_loss= 1.48830 val_rmse= 1.14522 		time= 0.59940
[*] Epoch: 2136 train_loss= 1.00318 train_rmse= 0.76458 val_loss= 1.48801 val_rmse= 1.14451 		time= 0.58045
[*] Epoch: 2137 train_loss= 1.00196 train_rmse= 0.76913 val_loss= 1.48763 val_rmse= 1.14347 		time= 0.57746
[*] Epoch: 2138 train_loss= 

[*] Epoch: 2205 train_loss= 0.99822 train_rmse= 0.76384 val_loss= 1.48801 val_rmse= 1.14327 		time= 0.60738
[*] Epoch: 2206 train_loss= 0.99641 train_rmse= 0.76308 val_loss= 1.48708 val_rmse= 1.14359 		time= 0.59541
[*] Epoch: 2207 train_loss= 0.99553 train_rmse= 0.76079 val_loss= 1.48473 val_rmse= 1.14322 		time= 0.61037
[*] Epoch: 2208 train_loss= 1.00207 train_rmse= 0.76712 val_loss= 1.48313 val_rmse= 1.14254 		time= 0.59840
[*] Epoch: 2209 train_loss= 0.99778 train_rmse= 0.76020 val_loss= 1.48577 val_rmse= 1.14437 		time= 0.56948
[*] Epoch: 2210 train_loss= 1.00294 train_rmse= 0.76223 val_loss= 1.48897 val_rmse= 1.14607 		time= 0.59840
[*] Epoch: 2211 train_loss= 0.99171 train_rmse= 0.75966 val_loss= 1.49064 val_rmse= 1.14782 		time= 0.67121
[*] Epoch: 2212 train_loss= 1.00648 train_rmse= 0.76669 val_loss= 1.49120 val_rmse= 1.15010 		time= 0.58144
[*] Epoch: 2213 train_loss= 0.99794 train_rmse= 0.75874 val_loss= 1.49010 val_rmse= 1.15162 		time= 0.61037
[*] Epoch: 2214 train_loss= 

[*] Epoch: 2281 train_loss= 0.99772 train_rmse= 0.76635 val_loss= 1.48050 val_rmse= 1.13804 		time= 0.54654
[*] Epoch: 2282 train_loss= 0.98311 train_rmse= 0.75223 val_loss= 1.48273 val_rmse= 1.13977 		time= 0.63530
[*] Epoch: 2283 train_loss= 1.00923 train_rmse= 0.77074 val_loss= 1.48242 val_rmse= 1.13971 		time= 0.67819
[*] Epoch: 2284 train_loss= 0.98769 train_rmse= 0.75429 val_loss= 1.48300 val_rmse= 1.14193 		time= 0.59142
[*] Epoch: 2285 train_loss= 0.99913 train_rmse= 0.76369 val_loss= 1.48501 val_rmse= 1.14566 		time= 0.59940
[*] Epoch: 2286 train_loss= 0.99525 train_rmse= 0.76348 val_loss= 1.48663 val_rmse= 1.14731 		time= 0.67121
[*] Epoch: 2287 train_loss= 1.00333 train_rmse= 0.76975 val_loss= 1.48786 val_rmse= 1.14777 		time= 0.56848
[*] Epoch: 2288 train_loss= 1.00286 train_rmse= 0.77201 val_loss= 1.48792 val_rmse= 1.14651 		time= 0.62732
[*] Epoch: 2289 train_loss= 1.00058 train_rmse= 0.76467 val_loss= 1.48848 val_rmse= 1.14677 		time= 0.65325
[*] Epoch: 2290 train_loss= 

[*] Epoch: 2357 train_loss= 0.99022 train_rmse= 0.75541 val_loss= 1.48730 val_rmse= 1.14538 		time= 0.60338
[*] Epoch: 2358 train_loss= 1.01049 train_rmse= 0.76795 val_loss= 1.48717 val_rmse= 1.14452 		time= 0.62733
[*] Epoch: 2359 train_loss= 0.99263 train_rmse= 0.76234 val_loss= 1.48741 val_rmse= 1.14439 		time= 0.52660
[*] Epoch: 2360 train_loss= 0.99065 train_rmse= 0.75920 val_loss= 1.48779 val_rmse= 1.14432 		time= 0.60738
[*] Epoch: 2361 train_loss= 0.99993 train_rmse= 0.76180 val_loss= 1.48903 val_rmse= 1.14677 		time= 0.65127
[*] Epoch: 2362 train_loss= 0.98464 train_rmse= 0.75909 val_loss= 1.48948 val_rmse= 1.14865 		time= 0.63031
[*] Epoch: 2363 train_loss= 0.98136 train_rmse= 0.75621 val_loss= 1.49067 val_rmse= 1.15062 		time= 0.66821
[*] Epoch: 2364 train_loss= 0.99578 train_rmse= 0.76382 val_loss= 1.49180 val_rmse= 1.15125 		time= 0.64128
[*] Epoch: 2365 train_loss= 0.98827 train_rmse= 0.75893 val_loss= 1.49428 val_rmse= 1.15203 		time= 0.61436
[*] Epoch: 2366 train_loss= 

[*] Epoch: 2433 train_loss= 0.98983 train_rmse= 0.76095 val_loss= 1.49105 val_rmse= 1.15036 		time= 0.84914
[*] Epoch: 2434 train_loss= 0.99364 train_rmse= 0.75950 val_loss= 1.49072 val_rmse= 1.14873 		time= 0.74975
[*] Epoch: 2435 train_loss= 0.99182 train_rmse= 0.76452 val_loss= 1.49234 val_rmse= 1.14818 		time= 0.86148
[*] Epoch: 2436 train_loss= 0.98585 train_rmse= 0.75549 val_loss= 1.49358 val_rmse= 1.14918 		time= 0.87762
[*] Epoch: 2437 train_loss= 0.98502 train_rmse= 0.75266 val_loss= 1.49467 val_rmse= 1.15173 		time= 0.73830
[*] Epoch: 2438 train_loss= 1.00325 train_rmse= 0.76607 val_loss= 1.49532 val_rmse= 1.15407 		time= 0.78244
[*] Epoch: 2439 train_loss= 0.97970 train_rmse= 0.75513 val_loss= 1.49571 val_rmse= 1.15416 		time= 0.76511
[*] Epoch: 2440 train_loss= 0.99737 train_rmse= 0.76521 val_loss= 1.49581 val_rmse= 1.15210 		time= 0.81356
[*] Epoch: 2441 train_loss= 0.99244 train_rmse= 0.76453 val_loss= 1.49614 val_rmse= 1.15064 		time= 0.81682
[*] Epoch: 2442 train_loss= 

In [48]:
# store model including exponential moving averages
saver = tf.train.Saver()
save_path = saver.save(sess, "tmp/%s.ckpt" % model.name, global_step=model.global_step)


if VERBOSE:
    print("\nOptimization Finished!")
    print('best validation score =', best_val_score, 'at iteration', best_epoch)


if TESTING:
    test_avg_loss, test_rmse = sess.run([model.loss, model.rmse], feed_dict=test_feed_dict)
    print('test loss = ', test_avg_loss)
    print('test rmse = ', test_rmse)

    # restore with polyak averages of parameters
    variables_to_restore = model.variable_averages.variables_to_restore()
    saver = tf.train.Saver(variables_to_restore)
    saver.restore(sess, save_path)

    test_avg_loss, test_rmse = sess.run([model.loss, model.rmse], feed_dict=test_feed_dict)
    print('polyak test loss = ', test_avg_loss)
    print('polyak test rmse = ', test_rmse)

else:
    # restore with polyak averages of parameters
    variables_to_restore = model.variable_averages.variables_to_restore()
    saver = tf.train.Saver(variables_to_restore)
    saver.restore(sess, save_path)

    val_avg_loss, val_rmse = sess.run([model.loss, model.rmse], feed_dict=val_feed_dict)
    print('polyak val loss = ', val_avg_loss)
    print('polyak val rmse = ', val_rmse)

print('\nSETTINGS:\n')
for key, val in sorted(vars(ap.parse_args()).iteritems()):
    print(key, val)

print('global seed = ', seed)

# For parsing results from file
results = vars(ap.parse_args()).copy()
results.update({'best_val_score': float(best_val_score), 'best_epoch': best_epoch})
print(json.dumps(results))

sess.close()


Optimization Finished!
best validation score = 1.0786374 at iteration 315
Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from tmp/recommendersideinfogae.ckpt-2500
polyak val loss =  1.4909396
polyak val rmse =  1.1483742

SETTINGS:



usage: ipykernel_launcher.py [-h]
                             [-d {ml_100k,ml_1m,ml_10m,douban,yahoo_music,flixster}]
                             [-lr LEARNING_RATE] [-e EPOCHS]
                             [-hi HIDDEN HIDDEN] [-fhi FEAT_HIDDEN]
                             [-ac {sum,stack}] [-do DROPOUT]
                             [-nb NUM_BASIS_FUNCTIONS] [-ds DATA_SEED]
                             [-sdir SUMMARIES_DIR] [-nsym | -nleft]
                             [-f | -no_f] [-ws | -no_ws] [-t | -v]
ipykernel_launcher.py: error: unrecognized arguments: C:\Users\maoru\AppData\Roaming\jupyter\runtime\kernel-200b2127-e931-4a62-ac6f-ecdd7d7e7bdb.json


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
