# Demo of streamsight Stream

- Dataset: MovieLens
- Algorithm: ItemKNN Incremental

In [1]:
from streamsight.datasets import MovieLens100K
from streamsight.settings import SlidingWindowSetting


k = 100
dataset = MovieLens100K()
data = dataset.load()

setting_window = SlidingWindowSetting(
    background_t=874724710 + 60 * 60 * 24 * 5,
    window_size=60 * 60 * 24 * 30,  # day times N
    top_K=k
)

setting_window.split(data)

INFO - streamsight package loaded.
DEBUG - MovieLens100K being initialized with '/Users/ngtzekean/personal/streamsight/data/movielens' as the base path.
DEBUG - MovieLens100K is initialized.
INFO - MovieLens100K is loading dataset...
INFO - Loading from cache: /Users/ngtzekean/personal/streamsight/data/movielens/ml-100k_u.data.processed.parquet
DEBUG - MovieLens100K applying filters set.
DEBUG - 	interactions before preprocess: 100000
DEBUG - 	items before preprocess: 1682
DEBUG - 	users before preprocess: 943
DEBUG - 	interactions after preprocess: 100000
DEBUG - 	items after preprocess: 1682
DEBUG - 	users after preprocess: 943
INFO - MovieLens100K dataset loaded - Took 0.0511s
DEBUG - Splitting data...
DEBUG - Performing lt(t, 2147483647)
DEBUG - Performing lt(t, 875156710)
DEBUG - Performing ge(t, 875156710)
DEBUG - TimestampSplitter(t=875156710,t_lower=None,t_upper=None) has complete split


  0%|          | 0/6 [00:00<?, ?it/s]

DEBUG - NLastInteractionTimestampSplitter(t=875156710,t_lower=None,t_upper=2592000,n_seq_data=0,include_all_past_data=False) - Updating split point to t=875156710
DEBUG - Performing lt(t, 877748710)
DEBUG - Performing ge(t, 875156710)
DEBUG - Performing get_user_n_last_interaction comparison
DEBUG - NLastInteractionTimestampSplitter(t=875156710,t_lower=None,t_upper=2592000,n_seq_data=0,include_all_past_data=False) has complete split
INFO - Split at time 875156710 resulted in empty unlabelled testing samples.
DEBUG - NLastInteractionTimestampSplitter(t=875156710,t_lower=None,t_upper=2592000,n_seq_data=0,include_all_past_data=False) - Updating split point to t=877748710
DEBUG - Performing lt(t, 880340710)
DEBUG - Performing ge(t, 877748710)
DEBUG - Performing get_user_n_last_interaction comparison
DEBUG - NLastInteractionTimestampSplitter(t=877748710,t_lower=None,t_upper=2592000,n_seq_data=0,include_all_past_data=False) has complete split
INFO - Split at time 877748710 resulted in empty 

7it [00:00, 180.79it/s]              

INFO - Finished split with window size 2592000 seconds. Number of splits: 7 in total.
INFO - SlidingWindowSetting data split - Took 0.0584s
DEBUG - Checking split attribute and sizes.
DEBUG - Checking split attributes.
DEBUG - Split attributes are set.
DEBUG - Checking size of split sets.

DEBUG - Size of split sets are checked.
INFO - SlidingWindowSetting data split complete.





In [2]:
from streamsight.evaluators import EvaluatorStreamerBuilder


builder = EvaluatorStreamerBuilder()
builder.add_setting(setting_window)
builder.set_metric_K(k)
builder.add_metric("PrecisionK")
builder.add_metric("RecallK")
evaluator = builder.build()

In [3]:
from streamsight.algorithms import ItemKNNIncremental


external_model = ItemKNNIncremental(K=k)
external_model_id = evaluator.register_algorithm(algorithm=external_model)
print(external_model_id)

INFO - Registered algorithm 'ItemKNNIncremental(K=100,pad_with_popularity=False)' with ID c0a47f39-8e5c-5a86-98cc-b9cca8349d5d
DEBUG - Algorithm c0a47f39-8e5c-5a86-98cc-b9cca8349d5d registered
c0a47f39-8e5c-5a86-98cc-b9cca8349d5d


In [4]:
evaluator.start_stream()

DEBUG - Restoring setting to iteration 0
DEBUG - Preparing evaluator for streaming
DEBUG - (user x item) shape defined is (41, 872). Shape of dataframe stored in matrix was (3446, 5) before masking
DEBUG - Shape of dataframe stored in matrix is now (3446, 5) after masking
DEBUG - Final (user x item) shape defined is (41, 872)
DEBUG - Caching evaluation data for step 0
DEBUG - (user x item) shape defined is (174, 872). Shape of dataframe stored in matrix was (8818, 5) before masking
DEBUG - Shape of dataframe stored in matrix is now (8818, 5) after masking
DEBUG - Final (user x item) shape defined is (174, 872)
DEBUG - (user x item) shape defined is (174, 872). Shape of dataframe stored in matrix was (8818, 5) before masking
DEBUG - Shape of dataframe stored in matrix is now (8818, 5) after masking
DEBUG - Final (user x item) shape defined is (174, 1189)
DEBUG - Data cached for step 1 complete
DEBUG - Algorithm 'ItemKNNIncremental(K=100,pad_with_popularity=False)' transitioned NEW -> RE

# Iteration 1

In [5]:
data = evaluator.get_training_data(external_model_id)
if data is None:
    raise ValueError("No data available for the external model.")
external_model.fit(data)

DEBUG - Getting data for algorithm c0a47f39-8e5c-5a86-98cc-b9cca8349d5d
DEBUG - Algorithm 'ItemKNNIncremental(K=100,pad_with_popularity=False)' transitioned READY -> RUNNING
DEBUG - Fitting ItemKNNIncremental complete - Took 0.0385s


0,1,2
,K,100
,pad_with_popularity,False


In [6]:
ul_data = evaluator.get_unlabeled_data(external_model_id)

DEBUG - Getting unlabeled data for algorithm c0a47f39-8e5c-5a86-98cc-b9cca8349d5d


In [7]:
prediction = external_model.predict(ul_data)

DEBUG - Performing items_in comparison
DEBUG - Padding user ID in range(41, 174) with random items
DEBUG - Padding by ItemKNNIncremental completed


In [8]:
evaluator.submit_prediction(external_model_id, prediction)

DEBUG - Submitting prediction for algorithm c0a47f39-8e5c-5a86-98cc-b9cca8349d5d
DEBUG - Precision compute started - PrecisionK_100
DEBUG - Number of users: 8818
DEBUG - Number of ground truth interactions: 8818
DEBUG - Precision compute complete - PrecisionK_100
DEBUG - Metric PrecisionK(timestamp_limit=875156710) created for algorithm ItemKNNIncremental(K=100,pad_with_popularity=False)_c0a47f39-8e5c-5a86-98cc-b9cca8349d5d
DEBUG - Recall compute started - RecallK_100
DEBUG - Number of users: 8818
DEBUG - Number of ground truth interactions: 8818
DEBUG - Recall compute complete - RecallK_100
DEBUG - Metric RecallK(timestamp_limit=875156710) created for algorithm ItemKNNIncremental(K=100,pad_with_popularity=False)_c0a47f39-8e5c-5a86-98cc-b9cca8349d5d
DEBUG - Prediction evaluated for algorithm c0a47f39-8e5c-5a86-98cc-b9cca8349d5d complete
DEBUG - Algorithm 'ItemKNNIncremental(K=100,pad_with_popularity=False)' transitioned RUNNING -> PREDICTED


# Iteration 2

In [9]:
data = evaluator.get_training_data(external_model_id)

DEBUG - Getting data for algorithm c0a47f39-8e5c-5a86-98cc-b9cca8349d5d
DEBUG - (user x item) shape defined is (174, 1192). Shape of dataframe stored in matrix was (11661, 5) before masking
DEBUG - Shape of dataframe stored in matrix is now (11661, 5) after masking
DEBUG - Final (user x item) shape defined is (174, 1192)
DEBUG - Caching evaluation data for step 1
DEBUG - (user x item) shape defined is (391, 1192). Shape of dataframe stored in matrix was (15037, 5) before masking
DEBUG - Shape of dataframe stored in matrix is now (15037, 5) after masking
DEBUG - Final (user x item) shape defined is (391, 1192)
DEBUG - (user x item) shape defined is (391, 1192). Shape of dataframe stored in matrix was (15037, 5) before masking
DEBUG - Shape of dataframe stored in matrix is now (15037, 5) after masking
DEBUG - Final (user x item) shape defined is (391, 1404)
DEBUG - Data cached for step 2 complete
DEBUG - Algorithm 'ItemKNNIncremental(K=100,pad_with_popularity=False)' transitioned PREDICT

In [10]:
external_model.fit(data)

DEBUG - Fitting ItemKNNIncremental complete - Took 0.0567s


0,1,2
,K,100
,pad_with_popularity,False


In [11]:
ul_data = evaluator.get_unlabeled_data(external_model_id)
prediction = external_model.predict(ul_data)
evaluator.submit_prediction(external_model_id, prediction)

DEBUG - Getting unlabeled data for algorithm c0a47f39-8e5c-5a86-98cc-b9cca8349d5d
DEBUG - Performing items_in comparison
DEBUG - Padding user ID in range(174, 391) with random items
DEBUG - Padding by ItemKNNIncremental completed
DEBUG - Submitting prediction for algorithm c0a47f39-8e5c-5a86-98cc-b9cca8349d5d
DEBUG - Precision compute started - PrecisionK_100
DEBUG - Number of users: 15037
DEBUG - Number of ground truth interactions: 15037
DEBUG - Precision compute complete - PrecisionK_100
DEBUG - Metric PrecisionK(timestamp_limit=877748710) created for algorithm ItemKNNIncremental(K=100,pad_with_popularity=False)_c0a47f39-8e5c-5a86-98cc-b9cca8349d5d
DEBUG - Recall compute started - RecallK_100
DEBUG - Number of users: 15037
DEBUG - Number of ground truth interactions: 15037
DEBUG - Recall compute complete - RecallK_100
DEBUG - Metric RecallK(timestamp_limit=877748710) created for algorithm ItemKNNIncremental(K=100,pad_with_popularity=False)_c0a47f39-8e5c-5a86-98cc-b9cca8349d5d
DEBUG 

# Run till completion

In [12]:
for i in range(setting_window.num_split - 2):
    data = evaluator.get_training_data(external_model_id)
    external_model.fit(data)
    ul_data = evaluator.get_unlabeled_data(external_model_id)
    prediction = external_model.predict(ul_data)
    evaluator.submit_prediction(external_model_id, prediction)

DEBUG - Getting data for algorithm c0a47f39-8e5c-5a86-98cc-b9cca8349d5d
DEBUG - (user x item) shape defined is (391, 1411). Shape of dataframe stored in matrix was (21810, 5) before masking
DEBUG - Shape of dataframe stored in matrix is now (21810, 5) after masking
DEBUG - Final (user x item) shape defined is (391, 1411)
DEBUG - Caching evaluation data for step 2
DEBUG - (user x item) shape defined is (497, 1411). Shape of dataframe stored in matrix was (8932, 5) before masking
DEBUG - Shape of dataframe stored in matrix is now (8932, 5) after masking
DEBUG - Final (user x item) shape defined is (497, 1411)
DEBUG - (user x item) shape defined is (497, 1411). Shape of dataframe stored in matrix was (8932, 5) before masking
DEBUG - Shape of dataframe stored in matrix is now (8932, 5) after masking
DEBUG - Final (user x item) shape defined is (497, 1467)
DEBUG - Data cached for step 3 complete
DEBUG - Algorithm 'ItemKNNIncremental(K=100,pad_with_popularity=False)' transitioned PREDICTED -

# evaluate metrics

In [13]:
evaluator.get_all_algorithm_status()

{'ItemKNNIncremental(K=100,pad_with_popularity=False)_c0a47f39-8e5c-5a86-98cc-b9cca8349d5d': <AlgorithmStateEnum.PREDICTED: 'PREDICTED'>}

In [14]:
evaluator.metric_results("macro")

Unnamed: 0_level_0,Unnamed: 1_level_0,macro_score,num_window
algorithm,metric,Unnamed: 2_level_1,Unnamed: 3_level_1
"ItemKNNIncremental(K=100,pad_with_popularity=False)_c0a47f39-8e5c-5a86-98cc-b9cca8349d5d",PrecisionK_100,0.000448,7
"ItemKNNIncremental(K=100,pad_with_popularity=False)_c0a47f39-8e5c-5a86-98cc-b9cca8349d5d",RecallK_100,0.044817,7


In [15]:
evaluator.metric_results("micro")


Unnamed: 0_level_0,Unnamed: 1_level_0,micro_score,num_user
algorithm,metric,Unnamed: 2_level_1,Unnamed: 3_level_1
"ItemKNNIncremental(K=100,pad_with_popularity=False)_c0a47f39-8e5c-5a86-98cc-b9cca8349d5d",PrecisionK_100,0.000421,66261
"ItemKNNIncremental(K=100,pad_with_popularity=False)_c0a47f39-8e5c-5a86-98cc-b9cca8349d5d",RecallK_100,0.042136,66261


In [16]:
evaluator.metric_results("user")

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,user_id,user_score
algorithm,timestamp,metric,Unnamed: 3_level_1,Unnamed: 4_level_1
"ItemKNNIncremental(K=100,pad_with_popularity=False)_c0a47f39-8e5c-5a86-98cc-b9cca8349d5d",t=875156710,PrecisionK_100,0,0.0
"ItemKNNIncremental(K=100,pad_with_popularity=False)_c0a47f39-8e5c-5a86-98cc-b9cca8349d5d",t=875156710,PrecisionK_100,1,0.0
"ItemKNNIncremental(K=100,pad_with_popularity=False)_c0a47f39-8e5c-5a86-98cc-b9cca8349d5d",t=875156710,PrecisionK_100,2,0.0
"ItemKNNIncremental(K=100,pad_with_popularity=False)_c0a47f39-8e5c-5a86-98cc-b9cca8349d5d",t=875156710,PrecisionK_100,3,0.0
"ItemKNNIncremental(K=100,pad_with_popularity=False)_c0a47f39-8e5c-5a86-98cc-b9cca8349d5d",t=875156710,PrecisionK_100,4,0.0
"ItemKNNIncremental(K=100,pad_with_popularity=False)_c0a47f39-8e5c-5a86-98cc-b9cca8349d5d",...,...,...,...
"ItemKNNIncremental(K=100,pad_with_popularity=False)_c0a47f39-8e5c-5a86-98cc-b9cca8349d5d",t=890708710,RecallK_100,11413,0.0
"ItemKNNIncremental(K=100,pad_with_popularity=False)_c0a47f39-8e5c-5a86-98cc-b9cca8349d5d",t=890708710,RecallK_100,11414,0.0
"ItemKNNIncremental(K=100,pad_with_popularity=False)_c0a47f39-8e5c-5a86-98cc-b9cca8349d5d",t=890708710,RecallK_100,11415,0.0
"ItemKNNIncremental(K=100,pad_with_popularity=False)_c0a47f39-8e5c-5a86-98cc-b9cca8349d5d",t=890708710,RecallK_100,11416,0.0


In [17]:
evaluator.metric_results("window")

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,window_score,num_user
algorithm,timestamp,metric,Unnamed: 3_level_1,Unnamed: 4_level_1
"ItemKNNIncremental(K=100,pad_with_popularity=False)_c0a47f39-8e5c-5a86-98cc-b9cca8349d5d",t=875156710,PrecisionK_100,0.000211,8818
"ItemKNNIncremental(K=100,pad_with_popularity=False)_c0a47f39-8e5c-5a86-98cc-b9cca8349d5d",t=875156710,RecallK_100,0.021093,8818
"ItemKNNIncremental(K=100,pad_with_popularity=False)_c0a47f39-8e5c-5a86-98cc-b9cca8349d5d",t=877748710,PrecisionK_100,0.000321,15037
"ItemKNNIncremental(K=100,pad_with_popularity=False)_c0a47f39-8e5c-5a86-98cc-b9cca8349d5d",t=877748710,RecallK_100,0.032121,15037
"ItemKNNIncremental(K=100,pad_with_popularity=False)_c0a47f39-8e5c-5a86-98cc-b9cca8349d5d",t=880340710,PrecisionK_100,0.00058,8932
"ItemKNNIncremental(K=100,pad_with_popularity=False)_c0a47f39-8e5c-5a86-98cc-b9cca8349d5d",t=880340710,RecallK_100,0.057994,8932
"ItemKNNIncremental(K=100,pad_with_popularity=False)_c0a47f39-8e5c-5a86-98cc-b9cca8349d5d",t=882932710,PrecisionK_100,0.000485,8574
"ItemKNNIncremental(K=100,pad_with_popularity=False)_c0a47f39-8e5c-5a86-98cc-b9cca8349d5d",t=882932710,RecallK_100,0.048519,8574
"ItemKNNIncremental(K=100,pad_with_popularity=False)_c0a47f39-8e5c-5a86-98cc-b9cca8349d5d",t=885524710,PrecisionK_100,0.000437,7041
"ItemKNNIncremental(K=100,pad_with_popularity=False)_c0a47f39-8e5c-5a86-98cc-b9cca8349d5d",t=885524710,RecallK_100,0.043744,7041
