In [13]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [14]:
import pandas as pd
import numpy as np
import scipy.sparse as sp

from pipeliner.recommendations.transformer import (
    UserItemMatrixTransformer,
    SimilarityTransformer,
    UserItemMatrixTransformerNP,
    SimilarityTransformerNP,
)
from pipeliner.recommendations.recommender import ItemBasedRecommender, SimilarityRecommender


In [15]:
user_item_ratings_df = pd.read_csv(
        "../../tests/test_data/user_item_ratings_toy.csv",
        dtype={"user_id": str, "item_id": str, "rating": np.float32},
        header=0,
    )
user_item_ratings_df


Unnamed: 0,user_id,item_id,rating
0,U00001,I00001,1.0
1,U00001,I00002,0.5
2,U00001,I00003,0.5
3,U00002,I00002,1.0
4,U00002,I00003,0.5
5,U00002,I00004,0.5
6,U00003,I00003,1.0
7,U00003,I00004,0.5
8,U00003,I00005,0.5
9,U00004,I00004,1.0


In [16]:
user_item_matrix_toy = pd.read_csv(
        "../../tests/test_data/user_item_matrix_toy.csv",
        header=0,
        index_col=['user_id'],
    ).astype(np.float32)
user_item_matrix_toy

Unnamed: 0_level_0,I00001,I00002,I00003,I00004,I00005,I00006
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
U00001,1.0,0.5,0.5,0.0,0.0,0.0
U00002,0.0,1.0,0.5,0.5,0.0,0.0
U00003,0.0,0.0,1.0,0.5,0.5,0.0
U00004,0.0,0.0,0.0,1.0,0.5,0.5
U00005,0.5,0.0,0.0,0.0,1.0,0.5
U00006,0.5,0.5,0.0,0.0,0.0,1.0


In [17]:
item_similarity_matrix_toy = pd.read_csv(
        "../../tests/test_data/item_similarity_matrix_toy.csv",
        header=0,
        index_col=['item_id'],
    ).astype(np.float32)
item_similarity_matrix_toy

Unnamed: 0_level_0,I00001,I00002,I00003,I00004,I00005,I00006
item_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
I00001,1.0,0.5,0.333333,0.0,0.333333,0.5
I00002,0.5,1.0,0.5,0.333333,0.0,0.333333
I00003,0.333333,0.5,1.0,0.5,0.333333,0.0
I00004,0.0,0.333333,0.5,1.0,0.5,0.333333
I00005,0.333333,0.0,0.333333,0.5,1.0,0.5
I00006,0.5,0.333333,0.0,0.333333,0.5,1.0


In [18]:
user_similarity_matrix_toy = pd.read_csv(
        "../../tests/test_data/user_similarity_matrix_toy.csv",
        header=0,
        index_col=['user_id'],
    ).astype(np.float32)
user_similarity_matrix_toy

Unnamed: 0_level_0,U00001,U00002,U00003,U00004,U00005,U00006
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
U00001,1.0,0.5,0.333333,0.0,0.333333,0.5
U00002,0.5,1.0,0.5,0.333333,0.0,0.333333
U00003,0.333333,0.5,1.0,0.5,0.333333,0.0
U00004,0.0,0.333333,0.5,1.0,0.5,0.333333
U00005,0.333333,0.0,0.333333,0.5,1.0,0.5
U00006,0.5,0.333333,0.0,0.333333,0.5,1.0


In [19]:
user_item_matrix_transformer = UserItemMatrixTransformer()
user_item_matrix = user_item_matrix_transformer.transform(user_item_ratings_df)

assert user_item_matrix.shape == user_item_matrix_toy.shape
assert user_item_matrix.index.equals(user_item_matrix_toy.index)
assert user_item_matrix.columns.equals(user_item_matrix_toy.columns)
np.testing.assert_array_equal(
    user_item_matrix.to_numpy(), 
    user_item_matrix_toy.to_numpy(),
)


In [20]:
item_similarity_matrix_transformer =  SimilarityTransformer(kind="item", metric="cosine", normalise=False)
item_similarity_matrix = item_similarity_matrix_transformer.transform(user_item_matrix)

assert item_similarity_matrix.shape == item_similarity_matrix_toy.shape
assert item_similarity_matrix.index.equals(item_similarity_matrix_toy.index)
assert item_similarity_matrix.columns.equals(item_similarity_matrix_toy.columns)
np.testing.assert_array_equal(
    item_similarity_matrix.to_numpy(), 
    item_similarity_matrix_toy.to_numpy()
)

In [21]:
user_similarity_matrix_transformer =  SimilarityTransformer(kind="user", metric="cosine", normalise=False)
user_similarity_matrix = user_similarity_matrix_transformer.transform(user_item_matrix)

assert user_similarity_matrix.shape == user_similarity_matrix_toy.shape
assert user_similarity_matrix.index.equals(user_similarity_matrix_toy.index)
assert user_similarity_matrix.columns.equals(user_similarity_matrix_toy.columns)
np.testing.assert_array_equal(
    user_similarity_matrix.to_numpy(), 
    user_similarity_matrix_toy.to_numpy()
)

In [22]:
# item_similarity_matrix_transformer_np =  SimilarityTransformerNP()
# item_similarity_matrix_np = item_similarity_matrix_transformer_np.transform(user_item_matrix_np.T)
# assert item_similarity_matrix_np.shape == item_similarity_matrix.shape
# np.testing.assert_array_equal(
#     item_similarity_matrix_np.toarray().astype(np.float32).round(6), 
#     item_similarity_matrix.to_numpy().astype(np.float32).round(6)
# )

In [23]:
item_id = "I00001"
rec = ItemBasedRecommender(5)

rec.fit(item_similarity_matrix)
item_recs_1 = rec.predict([item_id])[0]
item_recs_1

array(['I00002', 'I00006', 'I00003', 'I00005', 'I00004'], dtype=object)

In [24]:
item_ids = ['I00001', 'I00002', 'I00003', 'I00004', 'I00005', 'I00006']
rec2 = SimilarityRecommender(5)
rec2.fit(item_similarity_matrix)
item_recs_2 = rec2.predict(item_ids)
print(item_recs_2)

[array(['I00002', 'I00006', 'I00003', 'I00005'], dtype=object)
 array(['I00001', 'I00003', 'I00004', 'I00006'], dtype=object)
 array(['I00002', 'I00004', 'I00001', 'I00005'], dtype=object)
 array(['I00003', 'I00005', 'I00002', 'I00006'], dtype=object)
 array(['I00006', 'I00004', 'I00001', 'I00003'], dtype=object)
 array(['I00001', 'I00005', 'I00002', 'I00004'], dtype=object)]
