In [1]:
# import implicit

# implicit.__version__

'0.6.2'

In [71]:
import dill
import typing as tp

import numpy as np
import pandas as pd
from scipy.sparse import coo_matrix, csr_matrix
# from rectools import Columns

In [72]:
def inv_mapping_implicit(df: pd.DataFrame) -> tp.Tuple[tp.Dict[int, int]]:
    users_inv_mapping = dict(enumerate(df["user_id"].unique()))
    users_mapping = {v: k for k, v in users_inv_mapping.items()}
    items_inv_mapping = dict(enumerate(df["item_id"].unique()))
    items_mapping = {v: k for k, v in items_inv_mapping.items()}
    
    return (users_inv_mapping, users_mapping, 
            items_inv_mapping, items_mapping)


def create_data_implicit(
    df: pd.DataFrame,
    user_col: str = "user_id",
    item_col: str = "item_id",
    weight_col: str = None,
    users_mapping: tp.Dict[int, int] = None,
    items_mapping: tp.Dict[int, int] = None,
) -> csr_matrix:

    if weight_col:
        weights = df[weight_col].astype(np.float32)
    else:
        weights = np.ones(len(df), dtype=np.float32)

    interaction_matrix = coo_matrix((
        weights,
        (
            df[user_col].map(users_mapping.get),
            df[item_col].map(items_mapping.get)
        )
    ))

    return interaction_matrix.tocsr()

In [73]:
with open("../service/weights/als/als_model-implicit.dill", "rb") as file:
    als_model = dill.load(file)

In [74]:
als_model

<implicit.cpu.als.AlternatingLeastSquares at 0x7fd746f4d400>

In [75]:
interaction = pd.read_csv("../service/data/kion_train/interactions.csv")
users_inv_mapping, users_mapping, items_inv_mapping, items_mapping = inv_mapping_implicit(interaction)
interaction.head()

Unnamed: 0,user_id,item_id,last_watch_dt,total_dur,watched_pct
0,176549,9506,2021-05-11,4250,72.0
1,699317,1659,2021-05-29,8317,100.0
2,656683,7107,2021-05-09,10,0.0
3,864613,7638,2021-07-05,14483,100.0
4,964868,9506,2021-04-30,6725,100.0


In [76]:
interaction_csr = create_data_implicit(interaction, 
                                       users_mapping=users_mapping,
                                       items_mapping=items_mapping)

In [78]:
users_mapping[176549]

0

In [79]:
items_mapping[14961]

154

In [84]:
interaction_csr[users_mapping[176549], :]

<1x15706 sparse matrix of type '<class 'numpy.float32'>'
	with 82 stored elements in Compressed Sparse Row format>

In [85]:
total_score, top_contributions, user_weights = als_model.explain(
    userid=users_mapping[176549], 
    user_items=interaction_csr[users_mapping[176549], :], 
    itemid=items_mapping[14961], 
    N=1,    
)

In [86]:
total_score

0.2249994171324637

In [87]:
top_contributions

[(0, 0.039123139446846054)]

In [88]:
user_weights

(array([[ 3.36910139,  0.77692219, -0.44749499, ..., -0.07356536,
          0.10533668, -0.1945123 ],
        [ 2.61752963,  3.18335676, -0.12797315, ..., -0.03650882,
         -0.17577712,  0.15673421],
        [-1.50765598, -0.75505298,  3.28204727, ...,  0.74660746,
          0.66196007,  0.67710987],
        ...,
        [-0.24784915, -0.17337516,  2.48799324, ...,  2.82770224,
          0.28346185,  0.25478909],
        [ 0.35488996, -0.47772288,  2.14794135, ...,  1.97889721,
          2.8785997 ,  0.16512119],
        [-0.65533167,  0.34781998,  2.2892921 , ...,  1.84158289,
          1.3754648 ,  2.88617018]]),
 False)

In [89]:
reco = als_model.recommend(
    userid=users_mapping[176549],
    user_items=interaction_csr[users_mapping[176549], :],
    N=10,
    filter_already_liked_items=True,
    recalculate_user=True,
)

reco

(array([154,  34, 322,   7, 270, 811, 337,  46, 400, 103], dtype=int32),
 array([0.22499946, 0.19111016, 0.18348461, 0.17867999, 0.17425005,
        0.1571157 , 0.15565419, 0.15055306, 0.14768006, 0.14201121],
       dtype=float32))

In [90]:
interaction["user_id"]

0          176549
1          699317
2          656683
3          864613
4          964868
            ...  
5476246    648596
5476247    546862
5476248    697262
5476249    384202
5476250    319709
Name: user_id, Length: 5476251, dtype: int64

In [91]:
interaction_csr[users_mapping[213024], :]

<1x15706 sparse matrix of type '<class 'numpy.float32'>'
	with 7 stored elements in Compressed Sparse Row format>

In [92]:
users_mapping[546862]

224686

In [93]:
total_score, top_contributions, user_weights = als_model.explain(
    userid=users_mapping[546862], 
    user_items=interaction_csr[users_mapping[546862], :], 
    itemid=items_inv_mapping[154], 
    N=1,    
)

IndexError: index 224686 is out of bounds for axis 0 with size 2

In [80]:
total_score

0.22499938984518295

In [81]:
top_contributions

[(0, 0.03912313727917103)]

In [1]:
import numpy as np

In [31]:
np.random.randint(1, 3)

2