### Goal: Build functions to create user and rating matrices for creating recommendations for new users.

In [1]:
import pandas as pd
import numpy as np

In [2]:
pd.set_option('display.max_colwidth', -1)

## Get modeled item factors

In [3]:
comic_factors = pd.read_pickle('support_data/comics_factors_20190916.pkl')

## Get user's comics

Run through a few ID's and get lay of the land

In [4]:
comic_ids = [2171, 2637]

In [5]:
comic_factors.loc[2171]

features       [0.0, 0.0, 0.0, 0.0, 0.7724556922912598, 0.0, 0.0, 0.0, 0.0, 0.0, 0.08149779587984085, 0.0, 0.0, 0.2823905050754547, 0.0, 0.0, 0.0, 0.22249548137187958, 0.13321615755558014, 0.0, 0.0, 0.0, 0.0, 0.0, 0.16317979991436005, 0.047810930758714676, 0.0, 0.39404574036598206, 0.5858305096626282, 0.15400823950767517]
comic_title    Fables (Vertigo)                                                                                                                                                                                                                                                                                                    
img_url        https://comrx.s3-us-west-2.amazonaws.com/covers_large/fables.jpg                                                                                                                                                                                                                                                    
Name: 2171, dtype: object

In [6]:
comic_factors.loc[2637]

features       [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.12191954255104065, 0.0, 0.8011226654052734, 0.0, 0.04013441130518913, 0.0, 0.028866415843367577, 0.0, 0.015712112188339233, 0.23378580808639526, 0.0, 0.1483168751001358, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.23967689275741577, 0.8150233626365662, 0.0025834161788225174, 0.0, 0.029867107048630714, 0.0012574304128065705]
comic_title    Gideon Falls (Image)                                                                                                                                                                                                                                                                                                                                      
img_url        https://comrx.s3-us-west-2.amazonaws.com/covers_large/gideon_falls.jpg                                                                                                                                                                                               

## Support Functions

In [7]:
def create_user_item_matrix(comic_ids, comic_factors):
    """
    Given list of user's comic preferences
    and a pandas df with item (comic) factors
    create a item matrix for the user
    """
    # Get rank
    num_latent_factors = len(comic_factors.features.iloc[0])

    # Initialize matrix
    comics_mtx = np.zeros(shape=(len(comic_ids), num_latent_factors))

    for index, comic in enumerate(comic_ids):
        comics_mtx[index, :] = np.array(comic_factors.loc[comic, 'features'])

    return comics_mtx

##### Let's test

In [8]:
item_mtx = create_user_item_matrix(comic_ids=comic_ids,
                                   comic_factors=comic_factors)

In [9]:
item_mtx.shape

(2, 30)

In [10]:
def create_user_impl_rate_matrix(comic_ids, ratings_list=None):
    """
    Given item matrix
    create implicit ratings matrix
    """
    if ratings_list is None:
        n = len(comic_ids)
        imp_rat_mtx = np.ones((n, 1), 'int')
    else:
        imp_rat_mtx = np.array((ratings_list, )).T

    return imp_rat_mtx

##### Let's test

In [11]:
rate_mtx = create_user_impl_rate_matrix(comic_ids=comic_ids, )

In [12]:
rate_mtx

array([[1],
       [1]])

In [13]:
def create_user_util_matrix(comics_matrix, user_rating_matrix):

    util_mtx = np.linalg.lstsq(comics_matrix, user_rating_matrix, rcond=None)

    # We just want the factors
    util_mtx = util_mtx[0].reshape((comics_matrix.shape[1], ))

    return util_mtx

##### Let's test

In [14]:
util_mtx = create_user_util_matrix(comics_matrix=item_mtx,
                                   user_rating_matrix=rate_mtx)

In [15]:
util_mtx

array([0.        , 0.        , 0.        , 0.        , 0.54481941,
       0.        , 0.07580164, 0.        , 0.49808597, 0.        ,
       0.08243404, 0.        , 0.01794726, 0.19917237, 0.00976877,
       0.14535281, 0.        , 0.2491417 , 0.09395846, 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.26410757,
       0.54044997, 0.0016062 , 0.27792373, 0.43176054, 0.10940508])

In [16]:
util_mtx.shape

(30,)

## Candidate Functions

In [17]:
def make_n_comic_recommendations(comics, comic_factors, top_n):
    # Create item matrix
    comic_matrix = create_user_item_matrix(comic_ids=comics,
                                           comic_factors=comic_factors)

    # Create user matrix
    user_matrix = create_user_impl_rate_matrix(comic_ids=comics)

    # Create utility matrix
    utility_matrix = create_user_util_matrix(comic_matrix, user_matrix)

    # Update comic_factors dataframe for this user -> predicted scores!
    cf = comic_factors.copy()
    u = utility_matrix
    cf['pred'] = cf['features'].apply(lambda x: np.dot(x, u))

    # Get recommendations
    top_n_df = cf.sort_values(by=['pred'], ascending=False).head(top_n).copy()
    top_n_df.reset_index(inplace=True)
    top_n_df = top_n_df.loc[:, ['comic_id', 'comic_title', 'img_url']].copy()

    return top_n_df

In [18]:
# This one, include score so can compare preds
def make_n_comic_recommendations_dev(comics, comic_factors, top_n):
    # Create item matrix
    comic_matrix = create_user_item_matrix(comic_ids=comics,
                                           comic_factors=comic_factors
                                           )

    # Create user matrix
    user_matrix = create_user_impl_rate_matrix(comic_ids=comics)

    # Create utility matrix
    utility_matrix = create_user_util_matrix(comic_matrix, user_matrix)

    # Update comic_factors dataframe for this user -> predicted scores!
    cf = comic_factors.copy()
    u = utility_matrix
    cf['pred'] = cf['features'].apply(lambda x: np.dot(x, u))

    # Get recommendations
    top_n_df = cf.sort_values(by=['pred'], ascending=False).head(top_n).copy()
    top_n_df.reset_index(inplace=True)
    top_n_df = ( top_n_df.loc[:, ['comic_id', 'comic_title', 'img_url', 'pred']]
                .copy())

    return top_n_df

In [51]:
def make_n_comic_recommendations(comics, comic_factors, top_n):
    """
    Make n comic recommendations
    Parameters
    ----------
    comics = list of comic ids (integers)
    comic_factors = pandas dataframe with comic factors
    top_n = integer, # of comic recommendations desired by user
    Results
    -------
    Pandas Dataframe of n comic recommendations, sorted descending
    by utility
    """
    # Create item matrix
    comic_matrix = create_user_item_matrix(comic_ids=comics,
                                           comic_factors=comic_factors
                                           )
    # Create user matrix
    user_matrix = create_user_impl_rate_matrix(comic_ids=comics)
    # Create utility matrix
    utility_matrix = create_user_util_matrix(comic_matrix, user_matrix)

    # Update comic_factors dataframe for this user -> predicted scores!
    cf = comic_factors.copy()
    u = utility_matrix
    cf['pred'] = cf['features'].apply(lambda x: np.dot(x, u))

    # Get recommendations
    top_n_df = cf.sort_values(by=['pred'], ascending=False).head(top_n+3).copy()
    top_n_df.reset_index(inplace=True)
    top_n_df = top_n_df.loc[:, ['comic_id', 'comic_title', 'img_url', 'pred']].copy()
    top_n_df = top_n_df[~top_n_df['comic_id'].isin(comics)].head(top_n).copy()

    return top_n_df

##### Let's test

In [33]:
comic_ids

[2171, 2637]

In [41]:
t_df

Unnamed: 0,comic_id,comic_title,img_url,pred
0,2171,Fables (Vertigo),https://comrx.s3-us-west-2.amazonaws.com/covers_large/fables.jpg,1.000000
1,2637,Gideon Falls (Image),https://comrx.s3-us-west-2.amazonaws.com/covers_large/gideon_falls.jpg,1.000000
2,5177,Royal City (Image),https://comrx.s3-us-west-2.amazonaws.com/covers_large/royal_city.jpg,0.939948
3,88,Ad After Death Book 02 (of 3 (Image),https://comrx.s3-us-west-2.amazonaws.com/covers_large/_no_cover_.jpg,0.885665
4,804,Black Hammer (Dark Horse),https://comrx.s3-us-west-2.amazonaws.com/covers_large/black_hammer.jpg,0.876073
...,...,...,...,...
785,1718,Deadpools Art of War (Marvel),https://comrx.s3-us-west-2.amazonaws.com/covers_large/_no_cover_.jpg,0.000995
786,3355,Inhumanity Spider-Man (Marvel),https://comrx.s3-us-west-2.amazonaws.com/covers_large/inhumanity_spider-man.jpg,0.000940
787,2977,Hawkeye Vs Deadpool (Marvel),https://comrx.s3-us-west-2.amazonaws.com/covers_large/hawkeye_vs_deadpool.jpg,0.000309
788,3479,Jem & the Holograms (IDW),https://comrx.s3-us-west-2.amazonaws.com/covers_large/jem_and_the_holograms.jpg,0.000000


In [38]:
all_recs = t_df['comic_id'].to_list()

In [40]:
list(set(comic_ids).intersection(set(all_recs)))

[2171, 2637]

In [28]:
t_df = make_n_comic_recommendations_dev(comics=comic_ids,
                                        comic_factors=comic_factors,
                                        top_n=10000)

In [29]:
t_df = make_n_comic_recommendations(comics=comic_ids,
                                        comic_factors=comic_factors,
                                        top_n=10000)

In [30]:
t_df.loc[t_df['comic_id'].isin(comic_ids)]

Unnamed: 0,comic_id,comic_title,img_url,pred
0,2171,Fables (Vertigo),https://comrx.s3-us-west-2.amazonaws.com/covers_large/fables.jpg,1.0
1,2637,Gideon Falls (Image),https://comrx.s3-us-west-2.amazonaws.com/covers_large/gideon_falls.jpg,1.0


Well, the model definitely got the actuals correct!

What about other preferred?

In [31]:
t_df.sort_values(by=['pred'],ascending=False).head(20)

Unnamed: 0,comic_id,comic_title,img_url,pred
0,2171,Fables (Vertigo),https://comrx.s3-us-west-2.amazonaws.com/covers_large/fables.jpg,1.0
1,2637,Gideon Falls (Image),https://comrx.s3-us-west-2.amazonaws.com/covers_large/gideon_falls.jpg,1.0
2,5177,Royal City (Image),https://comrx.s3-us-west-2.amazonaws.com/covers_large/royal_city.jpg,0.939948
3,88,Ad After Death Book 02 (of 3 (Image),https://comrx.s3-us-west-2.amazonaws.com/covers_large/_no_cover_.jpg,0.885665
4,804,Black Hammer (Dark Horse),https://comrx.s3-us-west-2.amazonaws.com/covers_large/black_hammer.jpg,0.876073
5,87,Ad After Death Book 01 (of 3 (Image),https://comrx.s3-us-west-2.amazonaws.com/covers_large/_no_cover_.jpg,0.842826
6,375,Ascender (Image),https://comrx.s3-us-west-2.amazonaws.com/covers_large/ascender.jpg,0.822348
7,3700,Kill Or Be Killed (Image),https://comrx.s3-us-west-2.amazonaws.com/covers_large/kill_or_be_killed.jpg,0.801815
8,542,Barrier (Image),https://comrx.s3-us-west-2.amazonaws.com/covers_large/barrier.jpg,0.797027
9,3856,Leviathan (Image),https://comrx.s3-us-west-2.amazonaws.com/covers_large/leviathan.jpg,0.779517


### Get recommendations!


In [42]:
top_n = 8

In [52]:
results = make_n_comic_recommendations(comic_ids, comic_factors, top_n)

results

Unnamed: 0,comic_id,comic_title,img_url,pred
2,5177,Royal City (Image),https://comrx.s3-us-west-2.amazonaws.com/covers_large/royal_city.jpg,0.939948
3,88,Ad After Death Book 02 (of 3 (Image),https://comrx.s3-us-west-2.amazonaws.com/covers_large/_no_cover_.jpg,0.885665
4,804,Black Hammer (Dark Horse),https://comrx.s3-us-west-2.amazonaws.com/covers_large/black_hammer.jpg,0.876073
5,87,Ad After Death Book 01 (of 3 (Image),https://comrx.s3-us-west-2.amazonaws.com/covers_large/_no_cover_.jpg,0.842826
6,375,Ascender (Image),https://comrx.s3-us-west-2.amazonaws.com/covers_large/ascender.jpg,0.822348
7,3700,Kill Or Be Killed (Image),https://comrx.s3-us-west-2.amazonaws.com/covers_large/kill_or_be_killed.jpg,0.801815
8,542,Barrier (Image),https://comrx.s3-us-west-2.amazonaws.com/covers_large/barrier.jpg,0.797027
9,3856,Leviathan (Image),https://comrx.s3-us-west-2.amazonaws.com/covers_large/leviathan.jpg,0.779517
