In [1]:
%load_ext autoreload
%autoreload 1

In [2]:
%aimport utils

In [3]:
import numpy as np
import pandas as pd
import os
import tensorflow as tf
from os import path
from utils import load_embeddings_and_ids, User

In [4]:
# use a single GPU because we want to be nice with other people :)
os.environ["CUDA_VISIBLE_DEVICES"]="1"

###  Load pre-trained ResNet50 image embeddings

In [5]:
resnet50_embeddings,\
artwork_ids,\
artwork_id2index = load_embeddings_and_ids(
'/mnt/workspace/Ugallery/ResNet50/', 'flatten_1.npy', 'ids')

In [6]:
n_artworks = len(artwork_ids)
n_artworks

13297

### Load user list from transactions

In [7]:
sales_df = pd.read_csv('./valid_sales.csv')
user_ids = sales_df.customer_id.unique()
n_users = len(user_ids)

In [8]:
n_users

2919

### Tensorflow Network Graph

In [9]:
class Network:
    def __init__(self, n_users, n_items, user_latent_dim, item_latent_dim, item_visual_dim,
                 pretrained_dim=2048):
        
        # --- placeholders
        self._pretrained_image_embeddings = tf.placeholder(shape=[None, pretrained_dim], dtype=tf.float32)
        self._item_index = tf.placeholder(shape=[None], dtype=tf.int32)
            
        # ------------------------------------
        # ---- Global trainable variables
        
        # -- user latent factor matrix
        # (n_users x user_latent_dim)
        self._user_latent_factors = tf.Variable(
            tf.random_uniform([n_users, user_latent_dim], -1.0, 1.0),
            name='user_latent_factors'
        )
        
        # -- item latent factor matrix
        # (n_items x item_latent_dim)
        self._item_latent_factors = tf.Variable(
            tf.random_uniform([n_items, item_latent_dim], -1.0, 1.0),
            name='item_latent_factors'
        )
        
        # -- item latent biases
        self._item_latent_biases = tf.Variable(
            tf.random_uniform([n_items], -1.0, 1.0),
            name='item_latent_biases'
        )
        
        # -- global visual bias
        self._visual_bias = tf.Variable(
            tf.random_uniform([pretrained_dim], -1.0, 1.0),
            name='visual_bias'
        )
        
        # -------------------------------
        # ---- minibatch tensors
        
        item_pre_vector = tf.gather(self._pretrained_image_embeddings, self._item_index)
        
        # 1) item vector
        #    1.1) visual vector
        item_visual_vector = self.trainable_image_embedding(item_pre_vector, item_visual_dim)
        #    1.2) latent vector
        item_latent_vector = tf.gather(self._item_latent_factors, self._item_index)
        #    1.3) concatenation
        self._item_final_vector = tf.concat([item_visual_vector, item_latent_vector], 1)
        
        # 2) item bias
        #    1.1) visual bias
        item_visual_bias = tf.reduce_sum(item_pre_vector * self._visual_bias, 1)
        #    1.2) latent bias
        item_latent_bias = tf.gather(self._item_latent_biases, self._item_index)
        #    1.3) final bias
        self._item_final_bias = item_visual_bias + item_latent_bias
        
    @staticmethod
    def trainable_image_embedding(X, output_dim):
        with tf.variable_scope("trainable_image_embedding", reuse=tf.AUTO_REUSE):
            fc1 = tf.layers.dense( # None -> output_dim
                inputs=X,
                units=output_dim,
                name='fc1'
            )
            return fc1
    
    def get_item_final_vector_bias(self, sess, pretrained_image_embeddings, item_index):
        return sess.run([
            self._item_final_vector,
            self._item_final_bias,
        ], feed_dict={
            self._pretrained_image_embeddings: pretrained_image_embeddings,
            self._item_index: item_index,
        })
    
    def get_user_latent_vectors(self, sess):
        return sess.run(self._user_latent_factors)

In [10]:
MODEL_PATH = '/mnt/workspace/pamessina_models/ugallery/VBPR/v2_hidinglast/'

In [11]:
all_indexes = list(range(n_artworks))

In [12]:
with tf.Graph().as_default():
    network = Network(
        n_users=n_users,
        n_items=n_artworks,
        user_latent_dim=128,
        item_latent_dim=64,
        item_visual_dim=64,
        pretrained_dim=2048,
    )
    with tf.Session() as sess:
        saver = tf.train.Saver()
        saver.restore(sess, tf.train.latest_checkpoint(MODEL_PATH))
        user_latent_vectors = network.get_user_latent_vectors(sess)
        item_vectors, item_biases = network.get_item_final_vector_bias(sess, resnet50_embeddings, all_indexes)

INFO:tensorflow:Restoring parameters from /mnt/workspace/pamessina_models/ugallery/VBPR/v2_hidinglast/


In [13]:
item_vectors.shape, item_biases.shape

((13297, 128), (13297,))

In [14]:
user_latent_vectors.shape

(2919, 128)

In [15]:
dir_path = "/mnt/workspace/Ugallery/VBPR/v2_hidinglast/"
os.makedirs(dir_path, exist_ok=True)

# users
user_latent_vectors.dump(dir_path + "user_vectors.npy")
with open(dir_path + 'user_ids', 'w') as f:
    for _id in user_ids:
        f.write('%d\n' % _id)
        
# items
item_vectors.dump(dir_path + "item_vectors.npy")
item_biases.dump(dir_path + "item_biases.npy")
with open(dir_path + 'items_ids', 'w') as f:
    for _id in artwork_ids:
        f.write('%d\n' % _id)