### Dataset: MovieLens
### Algorithm: ItemKNN Incremental
### Padding unknown user/item: RecentPop
### Flow: Library Flow

In [1]:
from streamsight.datasets import MovieLens100K
from streamsight.settings import SlidingWindowSetting


k = 100
dataset = MovieLens100K()
data = dataset.load()

setting_window = SlidingWindowSetting(
    background_t=874724710 + 60 * 60 * 24 * 5,
    window_size=60 * 60 * 24 * 30,  # day times N
    top_K=k
)

setting_window.split(data)

INFO - streamsight package loaded.
DEBUG - MovieLens100K being initialized with '/Users/ngtzekean/personal/streamsight/data/movielens' as the base path.
DEBUG - MovieLens100K is initialized.
INFO - MovieLens100K is loading dataset...
INFO - Loading from cache: /Users/ngtzekean/personal/streamsight/data/movielens/ml-100k_u.data.processed.parquet
DEBUG - MovieLens100K applying filters set.
DEBUG - 	interactions before preprocess: 100000
DEBUG - 	items before preprocess: 1682
DEBUG - 	users before preprocess: 943
DEBUG - 	interactions after preprocess: 100000
DEBUG - 	items after preprocess: 1682
DEBUG - 	users after preprocess: 943
INFO - MovieLens100K dataset loaded - Took 0.0413s
DEBUG - Splitting data...
DEBUG - Performing lt(t, 2147483647)
DEBUG - Performing lt(t, 875156710)
DEBUG - Performing ge(t, 875156710)
DEBUG - TimestampSplitter(t=875156710,t_lower=None,t_upper=None) has complete split


  0%|          | 0/6 [00:00<?, ?it/s]

DEBUG - NLastInteractionTimestampSplitter(t=875156710,t_lower=None,t_upper=2592000,n_seq_data=0,include_all_past_data=False) - Updating split point to t=875156710
DEBUG - Performing lt(t, 877748710)
DEBUG - Performing ge(t, 875156710)
DEBUG - Performing get_user_n_last_interaction comparison
DEBUG - NLastInteractionTimestampSplitter(t=875156710,t_lower=None,t_upper=2592000,n_seq_data=0,include_all_past_data=False) has complete split
INFO - Split at time 875156710 resulted in empty unlabelled testing samples.
DEBUG - NLastInteractionTimestampSplitter(t=875156710,t_lower=None,t_upper=2592000,n_seq_data=0,include_all_past_data=False) - Updating split point to t=877748710
DEBUG - Performing lt(t, 880340710)
DEBUG - Performing ge(t, 877748710)
DEBUG - Performing get_user_n_last_interaction comparison
DEBUG - NLastInteractionTimestampSplitter(t=877748710,t_lower=None,t_upper=2592000,n_seq_data=0,include_all_past_data=False) has complete split
INFO - Split at time 877748710 resulted in empty 

7it [00:00, 133.44it/s]              

INFO - Finished split with window size 2592000 seconds. Number of splits: 7 in total.
INFO - SlidingWindowSetting data split - Took 0.0642s
DEBUG - Checking split attribute and sizes.
DEBUG - Checking split attributes.
DEBUG - Split attributes are set.
DEBUG - Checking size of split sets.
DEBUG - Size of split sets are checked.
INFO - SlidingWindowSetting data split complete.





In [2]:
from streamsight.algorithms import ItemKNNIncremental
from streamsight.evaluators import EvaluatorPipelineBuilder


builder = EvaluatorPipelineBuilder()
builder.add_setting(setting_window)
builder.set_metric_K(k)
builder.add_metric("PrecisionK")
builder.add_metric("RecallK")
builder.add_algorithm(ItemKNNIncremental, params={"K": k})
evaluator = builder.build()

In [3]:
evaluator.run()

INFO - Phase 1: Preparing the evaluator...
DEBUG - (user x item) shape defined is (943, 1682). Shape of dataframe stored in matrix was (100000, 5) before masking
DEBUG - Shape of dataframe stored in matrix is now (100000, 5) after masking
DEBUG - Final (user x item) shape defined is (943, 1682)
DEBUG - Fitting ItemKNNIncremental complete - Took 0.107s
DEBUG - Algorithms trained with background data...
DEBUG - Metric accumulator instantiated...
DEBUG - Restoring setting to iteration 0
DEBUG - Setting data generators ready...


Evaluating steps:   0%|          | 0/7 [00:00<?, ?it/s]

INFO - Running step 0
INFO - Phase 2: Evaluating the algorithms...
DEBUG - (user x item) shape defined is (943, 1682). Shape of dataframe stored in matrix was (61878, 5) before masking
DEBUG - Shape of dataframe stored in matrix is now (61878, 5) after masking
DEBUG - Final (user x item) shape defined is (943, 1682)
DEBUG - (user x item) shape defined is (943, 1682). Shape of dataframe stored in matrix was (61878, 5) before masking
DEBUG - Dropping unknown items from interaction matrix based on defined shape
DEBUG - Shape of dataframe stored in matrix is now (61878, 5) after masking
DEBUG - Final (user x item) shape defined is (943, 1682)
DEBUG - Performing items_in comparison
DEBUG - Precision compute started - PrecisionK_100
DEBUG - Number of users: 943
DEBUG - Number of ground truth interactions: 61878
DEBUG - Precision compute complete - PrecisionK_100
DEBUG - Metric PrecisionK(timestamp_limit=875156710) created for algorithm ItemKNNIncremental(K=100,pad_with_popularity=False)
DEBU

Evaluating steps:  14%|█▍        | 1/7 [00:00<00:00,  9.57it/s]

INFO - Phase 3: Releasing the data...
DEBUG - (user x item) shape defined is (943, 1682). Shape of dataframe stored in matrix was (100000, 5) before masking
DEBUG - Shape of dataframe stored in matrix is now (100000, 5) after masking
DEBUG - Final (user x item) shape defined is (943, 1682)
DEBUG - Fitting ItemKNNIncremental complete - Took 0.107s
INFO - Running step 1
INFO - Phase 2: Evaluating the algorithms...
DEBUG - (user x item) shape defined is (943, 1682). Shape of dataframe stored in matrix was (61878, 5) before masking
DEBUG - Shape of dataframe stored in matrix is now (61878, 5) after masking
DEBUG - Final (user x item) shape defined is (943, 1682)
DEBUG - (user x item) shape defined is (943, 1682). Shape of dataframe stored in matrix was (61878, 5) before masking
DEBUG - Dropping unknown items from interaction matrix based on defined shape
DEBUG - Shape of dataframe stored in matrix is now (61878, 5) after masking
DEBUG - Final (user x item) shape defined is (943, 1682)
DEBU

Evaluating steps:  29%|██▊       | 2/7 [00:00<00:00,  5.99it/s]

INFO - Phase 3: Releasing the data...
DEBUG - (user x item) shape defined is (943, 1682). Shape of dataframe stored in matrix was (100000, 5) before masking
DEBUG - Shape of dataframe stored in matrix is now (100000, 5) after masking
DEBUG - Final (user x item) shape defined is (943, 1682)
DEBUG - Fitting ItemKNNIncremental complete - Took 0.11s
INFO - Running step 2
INFO - Phase 2: Evaluating the algorithms...
DEBUG - (user x item) shape defined is (943, 1682). Shape of dataframe stored in matrix was (61878, 5) before masking
DEBUG - Shape of dataframe stored in matrix is now (61878, 5) after masking
DEBUG - Final (user x item) shape defined is (943, 1682)
DEBUG - (user x item) shape defined is (943, 1682). Shape of dataframe stored in matrix was (61878, 5) before masking
DEBUG - Dropping unknown items from interaction matrix based on defined shape
DEBUG - Shape of dataframe stored in matrix is now (61878, 5) after masking
DEBUG - Final (user x item) shape defined is (943, 1682)
DEBUG

Evaluating steps:  43%|████▎     | 3/7 [00:00<00:00,  5.31it/s]

INFO - Phase 3: Releasing the data...
DEBUG - (user x item) shape defined is (943, 1682). Shape of dataframe stored in matrix was (100000, 5) before masking
DEBUG - Shape of dataframe stored in matrix is now (100000, 5) after masking
DEBUG - Final (user x item) shape defined is (943, 1682)
DEBUG - Fitting ItemKNNIncremental complete - Took 0.109s
INFO - Running step 3
INFO - Phase 2: Evaluating the algorithms...
DEBUG - (user x item) shape defined is (943, 1682). Shape of dataframe stored in matrix was (61878, 5) before masking
DEBUG - Shape of dataframe stored in matrix is now (61878, 5) after masking
DEBUG - Final (user x item) shape defined is (943, 1682)
DEBUG - (user x item) shape defined is (943, 1682). Shape of dataframe stored in matrix was (61878, 5) before masking
DEBUG - Dropping unknown items from interaction matrix based on defined shape
DEBUG - Shape of dataframe stored in matrix is now (61878, 5) after masking
DEBUG - Final (user x item) shape defined is (943, 1682)
DEBU

Evaluating steps:  57%|█████▋    | 4/7 [00:00<00:00,  4.93it/s]

INFO - Phase 3: Releasing the data...
DEBUG - (user x item) shape defined is (943, 1682). Shape of dataframe stored in matrix was (100000, 5) before masking
DEBUG - Shape of dataframe stored in matrix is now (100000, 5) after masking
DEBUG - Final (user x item) shape defined is (943, 1682)
DEBUG - Fitting ItemKNNIncremental complete - Took 0.106s
INFO - Running step 4
INFO - Phase 2: Evaluating the algorithms...
DEBUG - (user x item) shape defined is (943, 1682). Shape of dataframe stored in matrix was (61878, 5) before masking
DEBUG - Shape of dataframe stored in matrix is now (61878, 5) after masking
DEBUG - Final (user x item) shape defined is (943, 1682)
DEBUG - (user x item) shape defined is (943, 1682). Shape of dataframe stored in matrix was (61878, 5) before masking
DEBUG - Dropping unknown items from interaction matrix based on defined shape
DEBUG - Shape of dataframe stored in matrix is now (61878, 5) after masking
DEBUG - Final (user x item) shape defined is (943, 1682)
DEBU

Evaluating steps:  71%|███████▏  | 5/7 [00:00<00:00,  4.86it/s]

INFO - Phase 3: Releasing the data...
DEBUG - (user x item) shape defined is (943, 1682). Shape of dataframe stored in matrix was (100000, 5) before masking
DEBUG - Shape of dataframe stored in matrix is now (100000, 5) after masking
DEBUG - Final (user x item) shape defined is (943, 1682)
DEBUG - Fitting ItemKNNIncremental complete - Took 0.105s
INFO - Running step 5
INFO - Phase 2: Evaluating the algorithms...
DEBUG - (user x item) shape defined is (943, 1682). Shape of dataframe stored in matrix was (61878, 5) before masking
DEBUG - Shape of dataframe stored in matrix is now (61878, 5) after masking
DEBUG - Final (user x item) shape defined is (943, 1682)
DEBUG - (user x item) shape defined is (943, 1682). Shape of dataframe stored in matrix was (61878, 5) before masking
DEBUG - Dropping unknown items from interaction matrix based on defined shape
DEBUG - Shape of dataframe stored in matrix is now (61878, 5) after masking
DEBUG - Final (user x item) shape defined is (943, 1682)
DEBU

Evaluating steps:  86%|████████▌ | 6/7 [00:01<00:00,  4.78it/s]

INFO - Phase 3: Releasing the data...
DEBUG - (user x item) shape defined is (943, 1682). Shape of dataframe stored in matrix was (100000, 5) before masking
DEBUG - Shape of dataframe stored in matrix is now (100000, 5) after masking
DEBUG - Final (user x item) shape defined is (943, 1682)
DEBUG - Fitting ItemKNNIncremental complete - Took 0.111s
INFO - Running step 6
INFO - Phase 2: Evaluating the algorithms...
DEBUG - (user x item) shape defined is (943, 1682). Shape of dataframe stored in matrix was (61878, 5) before masking
DEBUG - Shape of dataframe stored in matrix is now (61878, 5) after masking
DEBUG - Final (user x item) shape defined is (943, 1682)
DEBUG - (user x item) shape defined is (943, 1682). Shape of dataframe stored in matrix was (61878, 5) before masking
DEBUG - Dropping unknown items from interaction matrix based on defined shape
DEBUG - Shape of dataframe stored in matrix is now (61878, 5) after masking
DEBUG - Final (user x item) shape defined is (943, 1682)
DEBU

Evaluating steps: 100%|██████████| 7/7 [00:01<00:00,  4.98it/s]


# evaluate metrics

In [4]:
evaluator.metric_results("macro")

Unnamed: 0_level_0,Unnamed: 1_level_0,macro_score,num_window
Algorithm,Metric,Unnamed: 2_level_1,Unnamed: 3_level_1
"ItemKNNIncremental(K=100,pad_with_popularity=False)",PrecisionK_100,0.297947,7
"ItemKNNIncremental(K=100,pad_with_popularity=False)",RecallK_100,0.518619,7


In [5]:
evaluator.metric_results("micro")


Unnamed: 0_level_0,Unnamed: 1_level_0,micro_score,num_user
Algorithm,Metric,Unnamed: 2_level_1,Unnamed: 3_level_1
"ItemKNNIncremental(K=100,pad_with_popularity=False)",PrecisionK_100,0.297947,6601
"ItemKNNIncremental(K=100,pad_with_popularity=False)",RecallK_100,0.518619,6601


In [6]:
evaluator.metric_results("user")

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,user_id,score
Algorithm,Timestamp,Metric,Unnamed: 3_level_1,Unnamed: 4_level_1
"ItemKNNIncremental(K=100,pad_with_popularity=False)",t=875156710,PrecisionK_100,0,0.21
"ItemKNNIncremental(K=100,pad_with_popularity=False)",t=875156710,PrecisionK_100,1,0.22
"ItemKNNIncremental(K=100,pad_with_popularity=False)",t=875156710,PrecisionK_100,2,0.43
"ItemKNNIncremental(K=100,pad_with_popularity=False)",t=875156710,PrecisionK_100,3,0.36
"ItemKNNIncremental(K=100,pad_with_popularity=False)",t=875156710,PrecisionK_100,4,0.4
"ItemKNNIncremental(K=100,pad_with_popularity=False)",...,...,...,...
"ItemKNNIncremental(K=100,pad_with_popularity=False)",t=890708710,RecallK_100,938,0.744898
"ItemKNNIncremental(K=100,pad_with_popularity=False)",t=890708710,RecallK_100,939,0.576923
"ItemKNNIncremental(K=100,pad_with_popularity=False)",t=890708710,RecallK_100,940,0.35
"ItemKNNIncremental(K=100,pad_with_popularity=False)",t=890708710,RecallK_100,941,0.64


In [7]:
evaluator.metric_results("window")

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,window_score,num_user
Algorithm,Timestamp,Metric,Unnamed: 3_level_1,Unnamed: 4_level_1
"ItemKNNIncremental(K=100,pad_with_popularity=False)",t=875156710,PrecisionK_100,0.297932,943
"ItemKNNIncremental(K=100,pad_with_popularity=False)",t=875156710,RecallK_100,0.518582,943
"ItemKNNIncremental(K=100,pad_with_popularity=False)",t=877748710,PrecisionK_100,0.297932,943
"ItemKNNIncremental(K=100,pad_with_popularity=False)",t=877748710,RecallK_100,0.518582,943
"ItemKNNIncremental(K=100,pad_with_popularity=False)",t=880340710,PrecisionK_100,0.297964,943
"ItemKNNIncremental(K=100,pad_with_popularity=False)",t=880340710,RecallK_100,0.518659,943
"ItemKNNIncremental(K=100,pad_with_popularity=False)",t=882932710,PrecisionK_100,0.297932,943
"ItemKNNIncremental(K=100,pad_with_popularity=False)",t=882932710,RecallK_100,0.518582,943
"ItemKNNIncremental(K=100,pad_with_popularity=False)",t=885524710,PrecisionK_100,0.297943,943
"ItemKNNIncremental(K=100,pad_with_popularity=False)",t=885524710,RecallK_100,0.518608,943
