In [1]:
%load_ext autoreload
%autoreload 1

In [2]:
%aimport utils
%aimport Networks

In [3]:
import numpy as np
import pandas as pd
import os
import tensorflow as tf
from os import path
from sklearn.preprocessing import StandardScaler
from utils import load_embeddings_and_ids, User, concatenate_featmats
from Networks import VBPR_Network_Evaluation

In [4]:
# use a single GPU because we want to be nice with other people :)
os.environ["CUDA_VISIBLE_DEVICES"]="0"

###  Load pre-trained image embeddings

In [5]:
resnet50 = load_embeddings_and_ids('/mnt/workspace/Ugallery/ResNet50/', 'flatten_1.npy', 'ids')

In [6]:
# resnext101 = load_embeddings_and_ids('/mnt/workspace/Ugallery/resnext101_32x8d_wsl/', 'features.npy', 'ids.npy')

###  Concatenate embeddings + z-score normalization

In [6]:
embedding_list = [
    resnet50,
#     resnext101,
]

In [7]:
artwork_ids_set = set()
for embedding in embedding_list:
    if len(artwork_ids_set) == 0:        
        artwork_ids_set.update(embedding['index2id'])
    else:
        artwork_ids_set.intersection_update(embedding['index2id'])
artwork_ids = list(artwork_ids_set)
artwork_id2index = {_id:i for i,_id in enumerate(artwork_ids)}
n_artworks = len(artwork_ids)
n_artworks

13297

In [8]:
featmat_list = [tmp['featmat'] for tmp in embedding_list]
id2index_list = [tmp['id2index'] for tmp in embedding_list]
concat_featmat = concatenate_featmats(artwork_ids, featmat_list, id2index_list)

In [9]:
concat_featmat = StandardScaler().fit_transform(concat_featmat)

In [10]:
concat_featmat.shape

(13297, 2048)

### Load user list from transactions

In [11]:
sales_df = pd.read_csv('./data/valid_sales.csv')
user_ids = sales_df.customer_id.unique()
n_users = len(user_ids)

In [12]:
n_users

2919

### Load saved VBPR network and compute tensors

In [13]:
all_indexes = list(range(n_artworks))

In [14]:
# MODEL_PATH = '/mnt/workspace/pamessina_models/ugallery/VBPR/v14(10M-500k,rsnt50+rsnxt101,i(100l+100v),hyb(fa+dnn),ab.2,cm.18,fg.7,wd.0001)/'
MODEL_PATH = '/home/mcartagena/Image-Embedding-Learning-and-Feature-Extraction/vbpr_resnet_10m/'
MODEL_PATH

'/home/mcartagena/Image-Embedding-Learning-and-Feature-Extraction/vbpr_resnet_10m/'

In [16]:
with tf.Graph().as_default():
    network = VBPR_Network_Evaluation(
        n_users=n_users,
        n_items=n_artworks,
        user_latent_dim=200,
        item_latent_dim=100,
        item_visual_dim=100,
        pretrained_dim=2048,
    )
    gpu_options = tf.GPUOptions(
        per_process_gpu_memory_fraction=0.99,
        allow_growth=True
    )
    config = tf.ConfigProto(gpu_options=gpu_options)
    with tf.Session(config = config) as sess:
        saver = tf.train.Saver()
        saver.restore(sess, tf.train.latest_checkpoint(MODEL_PATH))
        user_latent_vectors = network.get_user_latent_vectors(sess)
        item_vectors, item_biases = network.get_item_final_vector_bias(sess, concat_featmat, all_indexes)

INFO:tensorflow:Restoring parameters from /home/mcartagena/Image-Embedding-Learning-and-Feature-Extraction/vbpr_resnet_10m/


In [17]:
item_vectors.shape, item_biases.shape

((13297, 200), (13297,))

In [18]:
user_latent_vectors.shape

(2919, 200)

In [19]:
dir_path = '/mnt/workspace/Ugallery/VBPR/vbpr_resnet_10m/'
os.makedirs(dir_path, exist_ok=True)

# users
user_latent_vectors.dump(dir_path + "user_vectors.npy")
with open(dir_path + 'user_ids', 'w') as f:
    for _id in user_ids:
        f.write('%d\n' % _id)
        
# items
item_vectors.dump(dir_path + "item_vectors.npy")
item_biases.dump(dir_path + "item_biases.npy")
with open(dir_path + 'items_ids', 'w') as f:
    for _id in artwork_ids:
        f.write('%d\n' % _id)