### Dataset: Amazon Music
### Algorithm: ItemKNN Incremental
### Padding unknown user/item: Random
### Flow: Library Flow

In [1]:
k = 100
from streamsight.datasets.amazon import AmazonMusicDataset
from streamsight.preprocessing import MinItemsPerUser
from streamsight.settings import SlidingWindowSetting
dataset = AmazonMusicDataset()
dataset.add_filter(
    MinItemsPerUser(3, AmazonMusicDataset.ITEM_IX, AmazonMusicDataset.USER_IX)
)
data = dataset.load()

setting_window = SlidingWindowSetting(
    background_t=1546241746,
    window_size=60 * 60 * 24 * 365, # 365 days
    top_K=k
)

setting_window.split(data)

INFO - streamsight package loaded.
  from .autonotebook import tqdm as notebook_tqdm

DEBUG - AmazonMusicDataset being initialized with 'data' as the base path.
DEBUG - AmazonMusicDataset is initialized.
INFO - AmazonMusicDataset is loading dataset...
DEBUG - Data file is in memory and in dir specified.
DEBUG - AmazonMusicDataset applying filters set.
DEBUG - 	interactions before preprocess: 130434
DEBUG - 	items before preprocess: 70511
DEBUG - 	users before preprocess: 100952
DEBUG - applying filter: MinItemsPerUser(min_iu=3, count_duplicates=True, item_ix=parent_asin, user_ix=user_id)
DEBUG - 	interactions after filter: 25542
DEBUG - 	items after filter: 20836
DEBUG - 	users after filter: 4464
DEBUG - 	interactions after preprocess: 25542
DEBUG - 	items after preprocess: 20836
DEBUG - 	users after preprocess: 4464
INFO - AmazonMusicDataset dataset loaded - Took 1.06s
DEBUG - Splitting data...
DEBUG - Performing lt(t, 2147483647)
DEBUG - Performing lt(t, 1546241746)
DEBUG - Performin

  0%|          | 0/4 [00:00<?, ?it/s]

DEBUG - NPastInteractionTimestampSplitter(t=1546241746,t_lower=None,t_upper=31536000,n_seq_data=0,include_all_past_data=False) - Updating split point to t=1546241746
DEBUG - Performing lt(t, 1577777746)
DEBUG - Performing ge(t, 1546241746)
DEBUG - Performing get_user_n_last_interaction comparison
DEBUG - NPastInteractionTimestampSplitter(t=1546241746,t_lower=None,t_upper=31536000,n_seq_data=0,include_all_past_data=False) has complete split
INFO - Split at time 1546241746 resulted in empty unlabelled testing samples.
DEBUG - NPastInteractionTimestampSplitter(t=1546241746,t_lower=None,t_upper=31536000,n_seq_data=0,include_all_past_data=False) - Updating split point to t=1577777746
DEBUG - Performing lt(t, 1609313746)
DEBUG - Performing ge(t, 1577777746)
DEBUG - Performing get_user_n_last_interaction comparison
DEBUG - NPastInteractionTimestampSplitter(t=1577777746,t_lower=None,t_upper=31536000,n_seq_data=0,include_all_past_data=False) has complete split
INFO - Split at time 1577777746 re

5it [00:00, 100.03it/s]              

INFO - Finished split with window size 31536000 seconds. Number of splits: 5 in total.
INFO - SlidingWindowSetting data split - Took 0.0753s
DEBUG - Checking split attribute and sizes.
DEBUG - Checking split attributes.
DEBUG - Split attributes are set.
DEBUG - Checking size of split sets.
DEBUG - Size of split sets are checked.
INFO - SlidingWindowSetting data split complete.





In [2]:
from streamsight.evaluators import EvaluatorStreamerBuilder

builder = EvaluatorStreamerBuilder()
builder.add_setting(setting_window)
builder.set_metric_K(k)
builder.add_metric("PrecisionK")
builder.add_metric("RecallK")
builder.add_metric("NDCGK")
evaluator = builder.build()

In [3]:
from streamsight.algorithms import ItemKNNIncremental
external_model = ItemKNNIncremental(K=k, pad_with_popularity=False)
external_model_id = evaluator.register_algorithm(algorithm_name="AmazonMusic_ItemKNN_Inc_RandomPad")
print(external_model_id)

INFO - Registering algorithm name AmazonMusic_ItemKNN_Inc_RandomPad with ID: bdd640fb-0667-4ad1-9c80-317fa3b1799d
DEBUG - Algorithm bdd640fb-0667-4ad1-9c80-317fa3b1799d registered
bdd640fb-0667-4ad1-9c80-317fa3b1799d


In [4]:
evaluator.start_stream()

DEBUG - Resetting data generators.
DEBUG - Data generators are reset.
DEBUG - Preparing evaluator for streaming
DEBUG - (user x item) shape defined is (3733, 16060)
DEBUG - Shape of dataframe stored in matrix was (19253, 6) before masking
DEBUG - Shape of dataframe stored in matrix is now (19253, 6) after masking
DEBUG - Final (user x item) shape defined is (3733, 16060)
DEBUG - Caching evaluation data for step 1
DEBUG - (user x item) shape defined is (3733, 16060)
DEBUG - Shape of dataframe stored in matrix was (1719, 6) before masking
DEBUG - Shape of dataframe stored in matrix is now (1719, 6) after masking
DEBUG - Final (user x item) shape defined is (3733, 16060)
DEBUG - (user x item) shape defined is (3733, 16060)
DEBUG - Shape of dataframe stored in matrix was (1719, 6) before masking
DEBUG - Shape of dataframe stored in matrix is now (295, 6) after masking
DEBUG - Final (user x item) shape defined is (3957, 16060)
DEBUG - Data cached for step 1 complete


# Iteration 1

In [5]:
data = evaluator.get_data(external_model_id)
external_model.fit(data)
data

DEBUG - Getting data for algorithm bdd640fb-0667-4ad1-9c80-317fa3b1799d
Training data:  [[1 1 0 ... 0 0 0]
 [0 0 1 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 1 0 0]
 [0 0 0 ... 0 1 0]
 [0 0 0 ... 0 0 0]]
Item similarities:  [[0.         0.21320072 0.         ... 0.         0.         0.        ]
 [0.21320072 0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 ...
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]]
Item similarities after get_top_K_values:  [[0.         0.21320072 0.         ... 0.         0.         0.        ]
 [0.21320072 0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 ...
 [0.         0.         0.         ... 0.         0.         

       interactionid   uid    iid          ts  rating  helpful_vote
0                  0     0      0   906574660     5.0             1
1                  1     0      1   908892059     3.0             0
2                  2     1      2   920935758     5.0             0
3                  3     2      3   941546039     4.0             0
4                  4     1      4   942757962     5.0            20
...              ...   ...    ...         ...     ...           ...
19248          19248  3730  15828  1546039809     5.0             1
19249          19249  3730  16057  1546039948     5.0             0
19250          19250  3731  16058  1546042248     4.0             0
19251          19251  1620  16059  1546057175     5.0             2
19252          19252  3732  14415  1546129819     3.0             1

[19253 rows x 6 columns]

In [6]:
ul_data = evaluator.get_unlabeled_data(external_model_id)
prediction = external_model.predict(data, ul_data)
ul_data

DEBUG - Getting unlabeled data for algorithm bdd640fb-0667-4ad1-9c80-317fa3b1799d
  warn(f"{self.name} missing similar items for {missing} items.")

DEBUG - Performing items_in comparison
In ItemKNNIncremental _predict:  [[1.01339832 0.42171513 0.         ... 0.         0.         0.        ]
 [0.         0.         2.12132034 ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 ...
 [0.         0.         0.         ... 0.57735027 0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]]
Max user ID:  3957
Max item ID:  0
X.shape:  (3733, 16060)
Intended shape:  (3957, 16060)
Predict frame:         interactionid   uid  iid          ts  rating  helpful_vote
19253          19253  3732   -1  1546243901     3.0             0
19254          19254  3733   -1  1546295208     5.0             0
19255          19255  1561   -1

       interactionid   uid  iid          ts  rating  helpful_vote
19253          19253  3732   -1  1546243901     3.0             0
19254          19254  3733   -1  1546295208     5.0             0
19255          19255  1561   -1  1546307829     5.0             7
19256          19256  1787   -1  1546308024     1.0             0
19257          19257  1787   -1  1546308362     1.0             0
...              ...   ...  ...         ...     ...           ...
20967          20967   212   -1  1577729682     5.0             0
20968          20968   522   -1  1577753078     5.0             0
20969          20969  3076   -1  1577754015     5.0             0
20970          20970   753   -1  1577754537     5.0             0
20971          20971  3288   -1  1577755529     5.0             0

[1719 rows x 6 columns]

In [7]:
print(prediction)
print(prediction.toarray())
evaluator.submit_prediction(external_model_id, prediction)

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 81483 stored elements and shape (3957, 16060)>
  Coords	Values
  (0, 15052)	0.21320071635561041
  (0, 14872)	0.42171513041268516
  (0, 14330)	0.29819762844377895
  (0, 13473)	0.42171513041268516
  (0, 13472)	0.42171513041268516
  (0, 13471)	0.42171513041268516
  (0, 11139)	0.14909881422188948
  (0, 10564)	0.42171513041268516
  (0, 9331)	0.24347734406510196
  (0, 8790)	0.21320071635561041
  (0, 8789)	0.21320071635561041
  (0, 8788)	0.21320071635561041
  (0, 8635)	0.29819762844377895
  (0, 7690)	0.42171513041268516
  (0, 7307)	0.42171513041268516
  (0, 7292)	0.21085756520634258
  (0, 7025)	0.42171513041268516
  (0, 6723)	0.21085756520634258
  (0, 6345)	0.24347734406510196
  (0, 6168)	0.21320071635561041
  (0, 6154)	0.17216448105372378
  (0, 6153)	0.42171513041268516
  (0, 6152)	0.42171513041268516
  (0, 6151)	0.42171513041268516
  (0, 6111)	0.42171513041268516
  :	:
  (3943, 8258)	1.0
  (3944, 14320)	1.0
  (3944, 12022)	1.0
  

# Iteration 2

In [8]:
data = evaluator.get_data(external_model_id)
external_model.fit(data)
data

DEBUG - Getting data for algorithm bdd640fb-0667-4ad1-9c80-317fa3b1799d
DEBUG - (user x item) shape defined is (3957, 17393)
DEBUG - Shape of dataframe stored in matrix was (1719, 6) before masking
DEBUG - Shape of dataframe stored in matrix is now (1719, 6) after masking
DEBUG - Final (user x item) shape defined is (3957, 17393)
DEBUG - Caching evaluation data for step 2
DEBUG - (user x item) shape defined is (3957, 17393)
DEBUG - Shape of dataframe stored in matrix was (1267, 6) before masking
DEBUG - Shape of dataframe stored in matrix is now (1267, 6) after masking
DEBUG - Final (user x item) shape defined is (3957, 17393)
DEBUG - (user x item) shape defined is (3957, 17393)
DEBUG - Shape of dataframe stored in matrix was (1267, 6) before masking
DEBUG - Shape of dataframe stored in matrix is now (234, 6) after masking
DEBUG - Final (user x item) shape defined is (4109, 17393)
DEBUG - Data cached for step 2 complete
Training data:  [[1. 1. 0. ... 0. 0. 0.]
 [0. 0. 1. ... 0. 0. 0.]


       interactionid   uid    iid          ts  rating  helpful_vote
19253          19253  3732  16060  1546243901     3.0             0
19254          19254  3733  16061  1546295208     5.0             0
19255          19255  1561  16062  1546307829     5.0             7
19256          19256  1787  16063  1546308024     1.0             0
19257          19257  1787  16064  1546308362     1.0             0
...              ...   ...    ...         ...     ...           ...
20967          20967   212   7561  1577729682     5.0             0
20968          20968   522  17389  1577753078     5.0             0
20969          20969  3076  17390  1577754015     5.0             0
20970          20970   753  17391  1577754537     5.0             0
20971          20971  3288  17392  1577755529     5.0             0

[1719 rows x 6 columns]

In [9]:
ul_data = evaluator.get_unlabeled_data(external_model_id)
prediction = external_model.predict(data, ul_data)
ul_data

DEBUG - Getting unlabeled data for algorithm bdd640fb-0667-4ad1-9c80-317fa3b1799d
  warn(f"{self.name} missing similar items for {missing} items.")

DEBUG - Performing items_in comparison
In ItemKNNIncremental _predict:  [[1.01339832 0.42171513 0.         ... 0.         0.         0.        ]
 [0.         0.         2.12132034 ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 ...
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]]
Max user ID:  4111
Max item ID:  0
X.shape:  (3957, 17393)
Intended shape:  (4111, 17393)
Predict frame:         interactionid   uid  iid          ts  rating  helpful_vote
20972          20972   753   -1  1577895301     5.0             5
20973          20973  3551   -1  1577900463     5.0             0
20974          20974  3551   -1

       interactionid   uid  iid          ts  rating  helpful_vote
20972          20972   753   -1  1577895301     5.0             5
20973          20973  3551   -1  1577900463     5.0             0
20974          20974  3551   -1  1577900694     4.0             0
20975          20975  3957   -1  1577912436     5.0             0
20976          20976  3957   -1  1577912562     5.0             2
...              ...   ...  ...         ...     ...           ...
22234          22234  4077   -1  1609103063     5.0             0
22235          22235  4058   -1  1609183165     5.0             1
22236          22236  1265   -1  1609209509     5.0             2
22237          22237  2296   -1  1609217665     5.0             0
22238          22238  4102   -1  1609263547     5.0             0

[1267 rows x 6 columns]

In [10]:
print(prediction)
print(prediction.toarray())
evaluator.submit_prediction(external_model_id, prediction)

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 90739 stored elements and shape (4111, 17393)>
  Coords	Values
  (0, 15052)	0.21320071635561041
  (0, 14872)	0.42171513041268516
  (0, 14330)	0.29819762844377895
  (0, 13473)	0.42171513041268516
  (0, 13472)	0.42171513041268516
  (0, 13471)	0.42171513041268516
  (0, 11139)	0.14057171013756173
  (0, 10564)	0.42171513041268516
  (0, 9331)	0.24347734406510196
  (0, 8790)	0.21320071635561041
  (0, 8789)	0.21320071635561041
  (0, 8788)	0.21320071635561041
  (0, 8635)	0.29819762844377895
  (0, 7690)	0.42171513041268516
  (0, 7307)	0.42171513041268516
  (0, 7292)	0.21085756520634258
  (0, 7025)	0.42171513041268516
  (0, 6723)	0.21085756520634258
  (0, 6345)	0.24347734406510196
  (0, 6168)	0.21320071635561041
  (0, 6154)	0.17216448105372378
  (0, 6153)	0.42171513041268516
  (0, 6152)	0.42171513041268516
  (0, 6151)	0.42171513041268516
  (0, 6111)	0.42171513041268516
  :	:
  (4096, 16896)	1.0
  (4096, 11038)	1.0
  (4097, 9832)	1.0
  

# Iteration 3

In [11]:
data = evaluator.get_data(external_model_id)
external_model.fit(data)
data

DEBUG - Getting data for algorithm bdd640fb-0667-4ad1-9c80-317fa3b1799d
DEBUG - (user x item) shape defined is (4111, 18338)
DEBUG - Shape of dataframe stored in matrix was (1267, 6) before masking
DEBUG - Shape of dataframe stored in matrix is now (1267, 6) after masking
DEBUG - Final (user x item) shape defined is (4111, 18338)
DEBUG - Caching evaluation data for step 3
DEBUG - (user x item) shape defined is (4111, 18338)
DEBUG - Shape of dataframe stored in matrix was (1597, 6) before masking
DEBUG - Shape of dataframe stored in matrix is now (1597, 6) after masking
DEBUG - Final (user x item) shape defined is (4111, 18338)
DEBUG - (user x item) shape defined is (4111, 18338)
DEBUG - Shape of dataframe stored in matrix was (1597, 6) before masking
DEBUG - Shape of dataframe stored in matrix is now (254, 6) after masking
DEBUG - Final (user x item) shape defined is (4309, 18338)
DEBUG - Data cached for step 3 complete
Training data:  [[1. 1. 0. ... 0. 0. 0.]
 [0. 0. 1. ... 0. 0. 0.]


       interactionid   uid    iid          ts  rating  helpful_vote
20972          20972   753  17393  1577895301     5.0             5
20973          20973  3551  17394  1577900463     5.0             0
20974          20974  3551  17395  1577900694     4.0             0
20975          20975  3957  17396  1577912436     5.0             0
20976          20976  3957  17397  1577912562     5.0             2
...              ...   ...    ...         ...     ...           ...
22234          22234  4077  14828  1609103063     5.0             0
22235          22235  4058  18335  1609183165     5.0             1
22236          22236  1265  18336  1609209509     5.0             2
22237          22237  2296  18337  1609217665     5.0             0
22238          22238  4102  17660  1609263547     5.0             0

[1267 rows x 6 columns]

In [12]:
ul_data = evaluator.get_unlabeled_data(external_model_id)
prediction = external_model.predict(data, ul_data)
ul_data

DEBUG - Getting unlabeled data for algorithm bdd640fb-0667-4ad1-9c80-317fa3b1799d
  warn(f"{self.name} missing similar items for {missing} items.")

DEBUG - Performing items_in comparison
In ItemKNNIncremental _predict:  [[1.01720812 0.41263856 0.         ... 0.         0.         0.        ]
 [0.         0.         2.12132034 ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 ...
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]]
Max user ID:  4309
Max item ID:  0
X.shape:  (4111, 18338)
Intended shape:  (4309, 18338)
Predict frame:         interactionid   uid  iid          ts  rating  helpful_vote
22239          22239   791   -1  1609352713     2.0             0
22240          22240  2342   -1  1609352784     5.0             1
22241          22241   791   -1

       interactionid   uid  iid          ts  rating  helpful_vote
22239          22239   791   -1  1609352713     2.0             0
22240          22240  2342   -1  1609352784     5.0             1
22241          22241   791   -1  1609352935     1.0             0
22242          22242  3633   -1  1609429831     5.0             0
22243          22243  1127   -1  1609435015     4.0             0
...              ...   ...  ...         ...     ...           ...
23831          23831  4307   -1  1640701893     5.0             0
23832          23832  4307   -1  1640702119     5.0             0
23833          23833  4307   -1  1640703766     5.0             1
23834          23834  4308   -1  1640709415     5.0             2
23835          23835  3959   -1  1640710325     5.0             1

[1597 rows x 6 columns]

In [13]:
print(prediction)
print(prediction.toarray())
evaluator.submit_prediction(external_model_id, prediction)

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 99246 stored elements and shape (4309, 18338)>
  Coords	Values
  (0, 18160)	0.20851441405707477
  (0, 15777)	0.155962715604956
  (0, 15052)	0.20851441405707477
  (0, 14872)	0.4126385592890063
  (0, 14330)	0.2917795234523036
  (0, 13473)	0.4126385592890063
  (0, 13472)	0.4126385592890063
  (0, 13471)	0.4126385592890063
  (0, 11139)	0.130487769776369
  (0, 10564)	0.4126385592890063
  (0, 9331)	0.23823698328352716
  (0, 8790)	0.20851441405707477
  (0, 8789)	0.20851441405707477
  (0, 8788)	0.20851441405707477
  (0, 8635)	0.2917795234523036
  (0, 7690)	0.4126385592890063
  (0, 7307)	0.4126385592890063
  (0, 7292)	0.20631927964450314
  (0, 7025)	0.4126385592890063
  (0, 6723)	0.20631927964450314
  (0, 6345)	0.23823698328352716
  (0, 6168)	0.20851441405707477
  (0, 6154)	0.16845898640920823
  (0, 6153)	0.4126385592890063
  (0, 6152)	0.4126385592890063
  :	:
  (4291, 2277)	1.0
  (4292, 5815)	1.0
  (4293, 15146)	1.0
  (4294, 14523)	1

# Iteration 4

In [14]:
data = evaluator.get_data(external_model_id)
external_model.fit(data)
data

DEBUG - Getting data for algorithm bdd640fb-0667-4ad1-9c80-317fa3b1799d
DEBUG - (user x item) shape defined is (4309, 19548)
DEBUG - Shape of dataframe stored in matrix was (1597, 6) before masking
DEBUG - Shape of dataframe stored in matrix is now (1597, 6) after masking
DEBUG - Final (user x item) shape defined is (4309, 19548)
DEBUG - Caching evaluation data for step 4
DEBUG - (user x item) shape defined is (4309, 19548)
DEBUG - Shape of dataframe stored in matrix was (1259, 6) before masking
DEBUG - Shape of dataframe stored in matrix is now (1259, 6) after masking
DEBUG - Final (user x item) shape defined is (4309, 19548)
DEBUG - (user x item) shape defined is (4309, 19548)
DEBUG - Shape of dataframe stored in matrix was (1259, 6) before masking
DEBUG - Shape of dataframe stored in matrix is now (197, 6) after masking
DEBUG - Final (user x item) shape defined is (4424, 19548)
DEBUG - Data cached for step 4 complete
Training data:  [[1. 1. 0. ... 0. 0. 0.]
 [0. 0. 1. ... 0. 0. 0.]


       interactionid   uid    iid          ts  rating  helpful_vote
22239          22239   791  18338  1609352713     2.0             0
22240          22240  2342  18339  1609352784     5.0             1
22241          22241   791  18340  1609352935     1.0             0
22242          22242  3633  18341  1609429831     5.0             0
22243          22243  1127  18342  1609435015     4.0             0
...              ...   ...    ...         ...     ...           ...
23831          23831  4307  19547  1640701893     5.0             0
23832          23832  4307  11670  1640702119     5.0             0
23833          23833  4307  12786  1640703766     5.0             1
23834          23834  4308  12399  1640709415     5.0             2
23835          23835  3959  18308  1640710325     5.0             1

[1597 rows x 6 columns]

In [15]:
ul_data = evaluator.get_unlabeled_data(external_model_id)
prediction = external_model.predict(data, ul_data)
ul_data

DEBUG - Getting unlabeled data for algorithm bdd640fb-0667-4ad1-9c80-317fa3b1799d
  warn(f"{self.name} missing similar items for {missing} items.")

DEBUG - Performing items_in comparison
In ItemKNNIncremental _predict:  [[1.         0.4        0.         ... 0.         0.         0.        ]
 [0.         0.         2.12132034 ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 ...
 [0.         0.         0.         ... 0.         1.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.8660254 ]
 [0.         0.         0.         ... 0.         0.         0.        ]]
Max user ID:  4431
Max item ID:  0
X.shape:  (4309, 19548)
Intended shape:  (4431, 19548)
Predict frame:         interactionid   uid  iid          ts  rating  helpful_vote
23836          23836  4230   -1  1640869031     5.0             1
23837          23837  1548   -1  1640883197     5.0             1
23838          23838  1677   -1

       interactionid   uid  iid          ts  rating  helpful_vote
23836          23836  4230   -1  1640869031     5.0             1
23837          23837  1548   -1  1640883197     5.0             1
23838          23838  1677   -1  1640884401     5.0             4
23839          23839  1677   -1  1640885026     5.0             0
23840          23840  1680   -1  1640885425     3.0             1
...              ...   ...  ...         ...     ...           ...
25090          25090  2489   -1  1672339062     5.0             0
25091          25091  4129   -1  1672362059     5.0             0
25092          25092  4129   -1  1672362059     5.0             0
25093          25093  3250   -1  1672370323     5.0             0
25094          25094  4176   -1  1672374881     5.0             0

[1259 rows x 6 columns]

In [16]:
print(prediction)
print(prediction.toarray())
evaluator.submit_prediction(external_model_id, prediction)

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 110002 stored elements and shape (4431, 19548)>
  Coords	Values
  (0, 19380)	0.4
  (0, 19379)	0.4
  (0, 19257)	0.4
  (0, 18539)	0.2
  (0, 18160)	0.2
  (0, 18091)	0.4
  (0, 17222)	0.4
  (0, 17091)	0.1414213562373095
  (0, 15777)	0.06030226891555273
  (0, 15052)	0.2
  (0, 14872)	0.4
  (0, 14330)	0.282842712474619
  (0, 13473)	0.4
  (0, 13472)	0.4
  (0, 13471)	0.4
  (0, 11139)	0.1264911064067352
  (0, 10564)	0.4
  (0, 9331)	0.23094010767585035
  (0, 8790)	0.2
  (0, 8789)	0.2
  (0, 8788)	0.2
  (0, 8635)	0.282842712474619
  (0, 7690)	0.4
  (0, 7569)	0.282842712474619
  (0, 7307)	0.4
  :	:
  (4419, 3995)	1.0
  (4419, 1952)	1.0
  (4419, 63)	1.0
  (4420, 5789)	1.0
  (4420, 3152)	1.0
  (4420, 714)	1.0
  (4421, 6511)	1.0
  (4421, 4855)	1.0
  (4421, 4785)	1.0
  (4422, 14073)	1.0
  (4422, 10455)	1.0
  (4422, 10377)	1.0
  (4423, 17096)	1.0
  (4423, 14603)	1.0
  (4423, 6633)	1.0
  (4424, 10273)	1.0
  (4425, 12491)	1.0
  (4425, 5614)	1.0
 

# Iteration 5

In [17]:
data = evaluator.get_data(external_model_id)
external_model.fit(data)
data

DEBUG - Getting data for algorithm bdd640fb-0667-4ad1-9c80-317fa3b1799d
DEBUG - (user x item) shape defined is (4431, 20527)
DEBUG - Shape of dataframe stored in matrix was (1259, 6) before masking
DEBUG - Shape of dataframe stored in matrix is now (1259, 6) after masking
DEBUG - Final (user x item) shape defined is (4431, 20527)
DEBUG - Caching evaluation data for step 5
DEBUG - (user x item) shape defined is (4431, 20527)
DEBUG - Shape of dataframe stored in matrix was (447, 6) before masking
DEBUG - Shape of dataframe stored in matrix is now (447, 6) after masking
DEBUG - Final (user x item) shape defined is (4431, 20527)
DEBUG - (user x item) shape defined is (4431, 20527)
DEBUG - Shape of dataframe stored in matrix was (447, 6) before masking
DEBUG - Shape of dataframe stored in matrix is now (105, 6) after masking
DEBUG - Final (user x item) shape defined is (4462, 20527)
DEBUG - Data cached for step 5 complete
Training data:  [[1. 1. 0. ... 0. 0. 0.]
 [0. 0. 1. ... 0. 0. 0.]
 [0

       interactionid   uid    iid          ts  rating  helpful_vote
23836          23836  4230  18308  1640869031     5.0             1
23837          23837  1548  18862  1640883197     5.0             1
23838          23838  1677  19548  1640884401     5.0             4
23839          23839  1677  17482  1640885026     5.0             0
23840          23840  1680  19549  1640885425     3.0             1
...              ...   ...    ...         ...     ...           ...
25090          25090  2489  20523  1672339062     5.0             0
25091          25091  4129  20524  1672362059     5.0             0
25092          25092  4129  20524  1672362059     5.0             0
25093          25093  3250  20525  1672370323     5.0             0
25094          25094  4176  20526  1672374881     5.0             0

[1259 rows x 6 columns]

In [18]:
ul_data = evaluator.get_unlabeled_data(external_model_id)
prediction = external_model.predict(data, ul_data)
ul_data

DEBUG - Getting unlabeled data for algorithm bdd640fb-0667-4ad1-9c80-317fa3b1799d
  warn(f"{self.name} missing similar items for {missing} items.")

DEBUG - Performing items_in comparison
In ItemKNNIncremental _predict:  [[1.         0.4        0.         ... 0.         0.         0.        ]
 [0.         0.         2.12132034 ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 ...
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]]
Max user ID:  4464
Max item ID:  0
X.shape:  (4431, 20527)
Intended shape:  (4464, 20527)
Predict frame:         interactionid   uid  iid          ts  rating  helpful_vote
25095          25095  4339   -1  1672406677     5.0             0
25096          25096  4339   -1  1672406685     5.0             0
25097          25097  4279   -1

       interactionid   uid  iid          ts  rating  helpful_vote
25095          25095  4339   -1  1672406677     5.0             0
25096          25096  4339   -1  1672406685     5.0             0
25097          25097  4279   -1  1672427352     5.0             0
25098          25098  4279   -1  1672427477     5.0             0
25099          25099  1466   -1  1672440344     5.0             0
...              ...   ...  ...         ...     ...           ...
25537          25537  4462   -1  1691639002     5.0             0
25538          25538  4463   -1  1692200640     5.0             0
25539          25539  4463   -1  1692202028     5.0             0
25540          25540  4463   -1  1692202101     5.0             0
25541          25541  2896   -1  1692267495     5.0             0

[447 rows x 6 columns]

In [19]:
print(prediction)
print(prediction.toarray())
evaluator.submit_prediction(external_model_id, prediction)

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 118091 stored elements and shape (4464, 20527)>
  Coords	Values
  (0, 19876)	0.2
  (0, 19380)	0.4
  (0, 19379)	0.4
  (0, 19257)	0.4
  (0, 18539)	0.17888543819998318
  (0, 18160)	0.2
  (0, 18091)	0.4
  (0, 17222)	0.4
  (0, 17091)	0.1414213562373095
  (0, 15777)	0.06030226891555273
  (0, 15052)	0.2
  (0, 14872)	0.4
  (0, 14330)	0.282842712474619
  (0, 13473)	0.4
  (0, 13472)	0.4
  (0, 13471)	0.4
  (0, 11139)	0.12060453783110546
  (0, 10564)	0.4
  (0, 9331)	0.23094010767585035
  (0, 8790)	0.2
  (0, 8789)	0.2
  (0, 8788)	0.2
  (0, 8635)	0.282842712474619
  (0, 7690)	0.4
  (0, 7569)	0.282842712474619
  :	:
  (4457, 6782)	1.0
  (4458, 18357)	1.0
  (4458, 15159)	1.0
  (4458, 11532)	1.0
  (4458, 8275)	1.0
  (4458, 8092)	1.0
  (4458, 3069)	1.0
  (4459, 15721)	1.0
  (4459, 5758)	1.0
  (4459, 2846)	1.0
  (4460, 17882)	1.0
  (4460, 5648)	1.0
  (4460, 2655)	1.0
  (4461, 16980)	1.0
  (4461, 5307)	1.0
  (4461, 2582)	1.0
  (4461, 1882)	1.0


# evaluate metrics

In [20]:
evaluator.get_all_algorithm_status()

{'AmazonMusic_ItemKNN_Inc_RandomPad_bdd640fb-0667-4ad1-9c80-317fa3b1799d': <AlgorithmStateEnum.COMPLETED: 'COMPLETED'>}

In [21]:
evaluator.metric_results("macro")

Unnamed: 0_level_0,Unnamed: 1_level_0,macro_score,num_window
Algorithm,Metric,Unnamed: 2_level_1,Unnamed: 3_level_1
AmazonMusic_ItemKNN_Inc_RandomPad_bdd640fb-0667-4ad1-9c80-317fa3b1799d,NDCGK_100,0.013075,5
AmazonMusic_ItemKNN_Inc_RandomPad_bdd640fb-0667-4ad1-9c80-317fa3b1799d,PrecisionK_100,0.000546,5
AmazonMusic_ItemKNN_Inc_RandomPad_bdd640fb-0667-4ad1-9c80-317fa3b1799d,RecallK_100,0.04008,5


In [22]:
evaluator.metric_results("micro")


Unnamed: 0_level_0,Unnamed: 1_level_0,micro_score,num_user
Algorithm,Metric,Unnamed: 2_level_1,Unnamed: 3_level_1
AmazonMusic_ItemKNN_Inc_RandomPad_bdd640fb-0667-4ad1-9c80-317fa3b1799d,NDCGK_100,0.013257,761
AmazonMusic_ItemKNN_Inc_RandomPad_bdd640fb-0667-4ad1-9c80-317fa3b1799d,PrecisionK_100,0.000512,761
AmazonMusic_ItemKNN_Inc_RandomPad_bdd640fb-0667-4ad1-9c80-317fa3b1799d,RecallK_100,0.039619,761


In [23]:
evaluator.metric_results("user")

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,user_id,score
Algorithm,Timestamp,Metric,Unnamed: 3_level_1,Unnamed: 4_level_1
AmazonMusic_ItemKNN_Inc_RandomPad_bdd640fb-0667-4ad1-9c80-317fa3b1799d,t=1546241746,PrecisionK_100,512,0.0
AmazonMusic_ItemKNN_Inc_RandomPad_bdd640fb-0667-4ad1-9c80-317fa3b1799d,t=1546241746,PrecisionK_100,1024,0.0
AmazonMusic_ItemKNN_Inc_RandomPad_bdd640fb-0667-4ad1-9c80-317fa3b1799d,t=1546241746,PrecisionK_100,3587,0.01
AmazonMusic_ItemKNN_Inc_RandomPad_bdd640fb-0667-4ad1-9c80-317fa3b1799d,t=1546241746,PrecisionK_100,3590,0.0
AmazonMusic_ItemKNN_Inc_RandomPad_bdd640fb-0667-4ad1-9c80-317fa3b1799d,t=1546241746,PrecisionK_100,3079,0.0
AmazonMusic_ItemKNN_Inc_RandomPad_bdd640fb-0667-4ad1-9c80-317fa3b1799d,...,...,...,...
AmazonMusic_ItemKNN_Inc_RandomPad_bdd640fb-0667-4ad1-9c80-317fa3b1799d,t=1672385746,NDCGK_100,3956,0.0
AmazonMusic_ItemKNN_Inc_RandomPad_bdd640fb-0667-4ad1-9c80-317fa3b1799d,t=1672385746,NDCGK_100,4348,0.0
AmazonMusic_ItemKNN_Inc_RandomPad_bdd640fb-0667-4ad1-9c80-317fa3b1799d,t=1672385746,NDCGK_100,4220,0.0
AmazonMusic_ItemKNN_Inc_RandomPad_bdd640fb-0667-4ad1-9c80-317fa3b1799d,t=1672385746,NDCGK_100,4349,0.0


In [24]:
evaluator.metric_results("window")

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,window_score,num_user
Algorithm,Timestamp,Metric,Unnamed: 3_level_1,Unnamed: 4_level_1
AmazonMusic_ItemKNN_Inc_RandomPad_bdd640fb-0667-4ad1-9c80-317fa3b1799d,t=1546241746,PrecisionK_100,0.000498,201
AmazonMusic_ItemKNN_Inc_RandomPad_bdd640fb-0667-4ad1-9c80-317fa3b1799d,t=1546241746,RecallK_100,0.04063,201
AmazonMusic_ItemKNN_Inc_RandomPad_bdd640fb-0667-4ad1-9c80-317fa3b1799d,t=1546241746,NDCGK_100,0.018236,201
AmazonMusic_ItemKNN_Inc_RandomPad_bdd640fb-0667-4ad1-9c80-317fa3b1799d,t=1577777746,PrecisionK_100,0.000671,164
AmazonMusic_ItemKNN_Inc_RandomPad_bdd640fb-0667-4ad1-9c80-317fa3b1799d,t=1577777746,RecallK_100,0.057927,164
AmazonMusic_ItemKNN_Inc_RandomPad_bdd640fb-0667-4ad1-9c80-317fa3b1799d,t=1577777746,NDCGK_100,0.019442,164
AmazonMusic_ItemKNN_Inc_RandomPad_bdd640fb-0667-4ad1-9c80-317fa3b1799d,t=1609313746,PrecisionK_100,0.000112,178
AmazonMusic_ItemKNN_Inc_RandomPad_bdd640fb-0667-4ad1-9c80-317fa3b1799d,t=1609313746,RecallK_100,0.008427,178
AmazonMusic_ItemKNN_Inc_RandomPad_bdd640fb-0667-4ad1-9c80-317fa3b1799d,t=1609313746,NDCGK_100,0.002329,178
AmazonMusic_ItemKNN_Inc_RandomPad_bdd640fb-0667-4ad1-9c80-317fa3b1799d,t=1640849746,PrecisionK_100,0.000753,146
