In [1]:
%load_ext autoreload
%autoreload 1

In [2]:
# hack based on https://stackoverflow.com/a/33532002
from inspect import getsourcefile
import os.path as path, sys
current_dir = path.dirname(path.abspath(getsourcefile(lambda:0)))
sys.path.insert(0, current_dir[:current_dir.rfind(path.sep)])

In [3]:
%aimport FeatureUtils
%aimport ExperimentUtils
%aimport Networks

In [4]:
import os
import pandas as pd
import FeatureUtils as featils
from Classes import Customer, ProfileBase
from ExperimentUtils import sanity_check_purchase_upload_events, recommendations_to_csv,\
        run_personalized_recommendation_experiment
from TransactionsUtils import TransactionsHandler

In [5]:
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

In [6]:
tmp = featils.read_numpy_features_matrix(
    ('/mnt/workspace/Ugallery/Youtube-like/curatornet_resnet_10m/'),
    'item_vectors.npy', 'ids')
item_embeddings = tmp['features']
id2index = tmp['id2index']
index2id = tmp['index2id']

In [7]:
item_embeddings.shape

(13297, 200)

In [8]:
ids_with_features = set(index2id)
len(ids_with_features)

13297

In [9]:
artworks_dict = TransactionsHandler.artworks_dict

In [10]:
customers_dict = { cid : Customer() for cid in TransactionsHandler.valid_sales_df.customer_id.unique() }

In [11]:
# ---- upload events -----
upload_events = TransactionsHandler.upload_events

# ---- purchase events -----
purchase_session_events = TransactionsHandler.purchase_session_events

# distribute purchases among customers
for pe in purchase_session_events:
    customers_dict[pe.customer_id].append_purchase_session(pe)

# --- join events and sort by timestamp ----
time_events = upload_events + purchase_session_events
time_events.sort(key=lambda x : x.timestamp)

In [12]:
print("len(upload_events) = ", len(upload_events))
print("len(purchase_session_events) = ", len(purchase_session_events))
print("len(time_events) = ", len(time_events))

len(upload_events) =  7742
len(purchase_session_events) =  4897
len(time_events) =  12639


In [13]:
sanity_check_purchase_upload_events(time_events, artworks_dict)

CHECK: event types are correct
CHECK: events ordered by timestamp
CHECK: products are only uploaded once
CHECK: products can only be purchased if present in inventory


In [14]:
REC_SIZE = 20
MAX_PROFILE_SIZES = [None]
MAX_PROFILE_TAGS = ['_(maxprofsize=oo)' if size is None else ('_(maxprofsize=%d)' % size)
                                                                for size in MAX_PROFILE_SIZES]

In [15]:
MAX_PROFILE_SIZES, MAX_PROFILE_TAGS

([None], ['_(maxprofsize=oo)'])

In [18]:
class CuratorNetProfile(ProfileBase):
    # --- global -----        
    @classmethod
    def global_purchase_session_event_handler(cls, purch_sess):
        pass

    # --- instance ----    
    def __init__(self, maxprofsize, artworks_dict, network, sess):
        ProfileBase.__init__(self, maxprofsize, artworks_dict)
        self._network = network
        self._sess = sess
    def ready(self):
        return len(self.consumed_artworks) > 0    
    def handle_artwork_added(self, artwork):
        pass        
    def handle_artwork_removed(self, artwork):
        pass
    def rank_inventory_ids(self, inventory_artworks):
        profile_indexes = [id2index[a.id] for a in self.consumed_artworks]
        inventory_indexes = [id2index[a.id] for a in inventory_artworks]
        match_scores = self._network.get_match_scores(self._sess,
            item_embeddings, profile_indexes, inventory_indexes)
        pairs = [(s,i) for s,i in zip(match_scores, inventory_indexes)]
        pairs.sort(reverse=True)
        return [index2id[p[1]] for p in pairs]

In [19]:
import tensorflow as tf
from Networks import CuratorNet_Evaluation

In [23]:
def run_experiments(artworks_dict, customers_dict, time_events, version, version_kwargs, 
                    weights_path_template=None, version_alias=None):
    if version_alias is None:
        version_alias = version
    if weights_path_template is None:
        weights_path_template = '/mnt/workspace/pamessina_models/ugallery/youtube_like/%s/'
    with tf.Graph().as_default():
        network = CuratorNet_Evaluation(**version_kwargs)
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=0.99,
            allow_growth=True
        )
        config = tf.ConfigProto(gpu_options=gpu_options)
        with tf.Session(config=config) as sess:
            saver = tf.train.Saver()
            saver.restore(sess, tf.train.latest_checkpoint(weights_path_template % version))
            for maxprofsize, maxproftag in zip(MAX_PROFILE_SIZES, MAX_PROFILE_TAGS):
                create_profile_func = lambda _: CuratorNetProfile(maxprofsize, artworks_dict, network, sess)
                recommendations = run_personalized_recommendation_experiment(
                    artworks_dict, customers_dict, time_events, create_profile_func, rec_size=REC_SIZE)
                recommendations_to_csv(
                    recommendations,
                    ("/mnt/workspace/ugallery_experiment_results/@{}{}_curatornet-{}.csv").format(
                        REC_SIZE, maxproftag, version_alias))

In [24]:
version = 'curatornet_resnet_10m'
version

'curatornet_resnet_10m'

In [26]:
run_experiments(artworks_dict, customers_dict, time_events,
                weights_path_template='/home/mcartagena/Image-Embedding-Learning-and-Feature-Extraction/%s/',
                version=version,
                version_kwargs=dict(
                    user_layer_units=[300,300,200],
                    latent_space_dim=200,
                    profile_pooling_mode='AVG+MAX',
                ))

INFO:tensorflow:Restoring parameters from /home/mcartagena/Image-Embedding-Learning-and-Feature-Extraction/curatornet_resnet_10m/
---------- starting experiment ------------
500 tests done! elapsed time: 4.32 seconds
1000 tests done! elapsed time: 8.56 seconds
1500 tests done! elapsed time: 13.28 seconds
1978 tests done! elapsed time: 17.77 seconds
** recommendations successfully saved to /mnt/workspace/ugallery_experiment_results/@20_(maxprofsize=oo)_curatornet-curatornet_resnet_10m.csv
