In [11]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [12]:
import pandas as pd
import numpy as np
import scipy.sparse as sp

from pipeliner.recommendations.transformer import (
    UserItemMatrixTransformer,
    SimilarityTransformer,
    UserItemMatrixTransformerNP,
    SimilarityTransformerNP,
)
from pipeliner.recommendations.recommender import ItemBasedRecommender, SimilarityRecommender


In [13]:
NUM_USERS = 6
NUM_ITEMS = 6


# Create arrays for each column
users = np.repeat(np.arange(NUM_USERS), 3) + 1
items = np.lib.stride_tricks.sliding_window_view(np.hstack([np.arange(NUM_ITEMS), np.arange(2)[:2]]), 3).reshape(-1) + 1
ratings = np.tile([1, 1, 0.5], NUM_USERS)

# Stack arrays to get (user, item, rating) combinations
user_item_ratings = np.column_stack((users, items, ratings))

user_item_ratings_df = pd.DataFrame(user_item_ratings, columns=["user_id", "item_id", "rating"])
user_item_ratings_df.user_id = user_item_ratings_df.user_id.astype(int).map(lambda i: f"U{i:05d}")
user_item_ratings_df.item_id = user_item_ratings_df.item_id.astype(int).map(lambda i: f"I{i:05d}")
user_item_ratings_df


Unnamed: 0,user_id,item_id,rating
0,U00001,I00001,1.0
1,U00001,I00002,1.0
2,U00001,I00003,0.5
3,U00002,I00002,1.0
4,U00002,I00003,1.0
5,U00002,I00004,0.5
6,U00003,I00003,1.0
7,U00003,I00004,1.0
8,U00003,I00005,0.5
9,U00004,I00004,1.0


In [14]:
user_item_ratings_df.to_csv("../../tests/test_data/user_item_ratings_toy.csv", index=False)

In [15]:
user_item_matrix_transformer = UserItemMatrixTransformer()
user_item_matrix = user_item_matrix_transformer.transform(user_item_ratings_df)
user_item_matrix

item_id,I00001,I00002,I00003,I00004,I00005,I00006
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
U00001,1.0,1.0,0.5,0.0,0.0,0.0
U00002,0.0,1.0,1.0,0.5,0.0,0.0
U00003,0.0,0.0,1.0,1.0,0.5,0.0
U00004,0.0,0.0,0.0,1.0,1.0,0.5
U00005,0.5,0.0,0.0,0.0,1.0,1.0
U00006,1.0,0.5,0.0,0.0,0.0,1.0


In [16]:
user_item_matrix_transformer_np  = UserItemMatrixTransformerNP()
user_item_matrix_np = user_item_matrix_transformer_np.transform(user_item_ratings)
assert user_item_matrix_np.shape == user_item_matrix.shape
np.testing.assert_array_equal(
    user_item_matrix_np.toarray(), 
    user_item_matrix.to_numpy()
)

In [17]:
similarity_matrix_transformer =  SimilarityTransformer(kind="item", metric="cosine", normalise=False)
similarity_matrix = similarity_matrix_transformer.transform(user_item_matrix)
similarity_matrix

item_id,I00001,I00002,I00003,I00004,I00005,I00006
item_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
I00001,1.0,0.666667,0.222222,0.0,0.222222,0.666667
I00002,0.666667,1.0,0.666667,0.222222,0.0,0.222222
I00003,0.222222,0.666667,1.0,0.666667,0.222222,0.0
I00004,0.0,0.222222,0.666667,1.0,0.666667,0.222222
I00005,0.222222,0.0,0.222222,0.666667,1.0,0.666667
I00006,0.666667,0.222222,0.0,0.222222,0.666667,1.0


In [18]:
similarity_matrix_transformer_np =  SimilarityTransformerNP()
similarity_matrix_np = similarity_matrix_transformer_np.transform(user_item_matrix_np.T)
assert similarity_matrix_np.shape == similarity_matrix.shape
np.testing.assert_array_equal(
    similarity_matrix_np.toarray().astype(np.float32).round(6), 
    similarity_matrix.to_numpy().astype(np.float32).round(6)
)

In [19]:
item_id = "I00001"
rec = ItemBasedRecommender(5)

rec.fit(similarity_matrix)
item_recs_1 = rec.predict([item_id])[0]
item_recs_1

array(['I00002', 'I00006', 'I00003', 'I00005', 'I00004'], dtype=object)

In [21]:
rec2 = SimilarityRecommender(5)
rec2.fit(similarity_matrix)
item_recs_2 = rec2.predict([item_id])[0]
print(item_recs_2)

['I00002' 'I00006' 'I00003' 'I00005']
