In [1]:
%load_ext autoreload
%autoreload 1

In [36]:
%aimport FeatureUtils
%aimport ExperimentUtils
%aimport Classes

In [37]:
import numpy as np
import FeatureUtils as featils
from Classes import Customer, ProfileBase
from ExperimentUtils import sanity_check_purchase_upload_events, recommendations_to_csv,\
        run_personalized_recommendation_experiment, append_simfunc_and_tags
from TransactionsUtils import TransactionsHandler
from scipy.spatial.distance import pdist, squareform

In [6]:
dirpath = '/mnt/workspace/Ugallery/ResNet50/'
index2id, id2index = featils.read_ids_file(dirpath, 'ids')
resnet50 = np.load(dirpath + 'flatten_1.npy')

In [7]:
resnet50.shape

(13297, 2048)

In [8]:
ids_with_features = set(index2id)
len(ids_with_features)

13297

In [9]:
artworks_dict = TransactionsHandler.artworks_dict

In [14]:
artwork_list = list(artworks_dict.values())
for i in range(len(artwork_list)):
    artwork_list[i].index = i

In [19]:
tmp_matrix = featils.numpy_array_feature_matrix(resnet50, id2index, artwork_list)
distmat_resnet50_cosine = squareform(pdist(tmp_matrix, 'cosine'))

In [10]:
customers_dict = { cid : Customer() for cid in TransactionsHandler.valid_sales_df.customer_id.unique() }

In [11]:
# ---- upload events -----
upload_events = TransactionsHandler.upload_events

# ---- purchase events -----
purchase_session_events = TransactionsHandler.purchase_session_events

# distribute purchases among customers
for pe in purchase_session_events:
    customers_dict[pe.customer_id].append_purchase_session(pe)

# --- join events and sort by timestamp ----
time_events = upload_events + purchase_session_events
time_events.sort(key=lambda x : x.timestamp)

In [12]:
print("len(upload_events) = ", len(upload_events))
print("len(purchase_session_events) = ", len(purchase_session_events))
print("len(time_events) = ", len(time_events))

len(upload_events) =  7742
len(purchase_session_events) =  4897
len(time_events) =  12639


In [13]:
sanity_check_purchase_upload_events(time_events, artworks_dict)

CHECK: event types are correct
CHECK: events ordered by timestamp
CHECK: products are only uploaded once
CHECK: products can only be purchased if present in inventory


In [20]:
def get_pairwise_simfunc(distmat):
    return lambda a1, a2 : -distmat[a1.index][a2.index]

In [39]:
REC_SIZE = 20
MAX_PROFILE_SIZES = [None]
MAX_PROFILE_TAGS = ['_(maxprofsize=oo)' if size is None else ('_(maxprofsize=%d)' % size)
                                                                for size in MAX_PROFILE_SIZES]
# ------ similarity functions ------------
PROFILE_SIMFUNCS = []
SIMFUNC_TAGS = []

distmats = (
    distmat_resnet50_cosine,
)
labels = (
    'resnet50-cosine',
)

ks = [2,None]

In [40]:
for distmat, label in zip(distmats, labels):
    pwsimfunc = get_pairwise_simfunc(distmat)
    append_simfunc_and_tags(PROFILE_SIMFUNCS, SIMFUNC_TAGS, pwsimfunc, label, ks)

In [41]:
MAX_PROFILE_SIZES, MAX_PROFILE_TAGS

([None], ['_(maxprofsize=oo)'])

In [42]:
PROFILE_SIMFUNCS, SIMFUNC_TAGS

([<function ExperimentUtils.append_avgsimtopk.<locals>.<lambda>>,
  <function ExperimentUtils.append_simfunc_and_tags.<locals>.<lambda>>],
 ['resnet50-cosine-avgsmtp2', 'resnet50-cosine-avgsim'])

In [26]:
class Profile(ProfileBase):
    # --- global -----        
    @classmethod
    def global_purchase_session_event_handler(cls, purch_sess):
        pass

    # --- instance ----    
    def __init__(self, maxprofsize, artworks_dict, profile_scorefunc):
        ProfileBase.__init__(self, maxprofsize, artworks_dict)
        self.profile_scorefunc = profile_scorefunc        
    def ready(self):
        return len(self.consumed_artworks) > 0    
    def handle_artwork_added(self, artwork):
        pass        
    def handle_artwork_removed(self, artwork):
        pass    
    def get_score(self, artwork):
        return self.profile_scorefunc(artwork, self.consumed_artworks)    
    def rank_inventory_ids(self, inventory_artworks):
        pairs = [(self.get_score(a), a.id) for a in inventory_artworks]
        pairs.sort(reverse=True)
        return [p[1] for p in pairs]

In [29]:
def run_experiments(artworks_dict, customers_dict, time_events):
    
    for maxprofsize, maxproftag in zip(MAX_PROFILE_SIZES, MAX_PROFILE_TAGS):
        for profsimfunc, simfunctag in zip(PROFILE_SIMFUNCS, SIMFUNC_TAGS):
            
            create_profile_func = lambda _: Profile(maxprofsize, artworks_dict, profsimfunc)
            recommendations = run_personalized_recommendation_experiment(
                artworks_dict, customers_dict, time_events, create_profile_func, rec_size=REC_SIZE)
            recommendations_to_csv(
                recommendations, "/mnt/workspace/ugallery_experiment_results/@{}{}_visrank_{}.csv".format(
                        REC_SIZE, maxproftag, simfunctag))

In [43]:
run_experiments(artworks_dict, customers_dict, time_events)

---------- starting experiment ------------
500 tests done! elapsed time: 6.48 seconds
1000 tests done! elapsed time: 13.63 seconds
1500 tests done! elapsed time: 31.98 seconds
1978 tests done! elapsed time: 108.00 seconds
** recommendations successfully saved to /mnt/workspace/ugallery_experiment_results/@20_(maxprofsize=oo)_visrank_resnet50-cosine-avgsmtp2.csv
---------- starting experiment ------------
500 tests done! elapsed time: 5.09 seconds
1000 tests done! elapsed time: 10.65 seconds
1500 tests done! elapsed time: 27.09 seconds
1978 tests done! elapsed time: 74.87 seconds
** recommendations successfully saved to /mnt/workspace/ugallery_experiment_results/@20_(maxprofsize=oo)_visrank_resnet50-cosine-avgsim.csv
