# Endpoint Flow Testing
### Dataset: Test
### Algorithm: ItemKNN Incremental
### Padding Method: Popularity

In [1]:
import pandas as pd

In [2]:
from streamsight.algorithms import ItemKNNIncremental

external_model = ItemKNNIncremental(K=2)


INFO - streamsight package loaded.
  from .autonotebook import tqdm as notebook_tqdm



In [3]:
# Replace with actual stream_id and algorithm_id
stream_id =  "cf42abee-c48c-4013-b229-1c6d2f949f80"
algorithm_id = "bdd640fb-0667-4ad1-9c80-317fa3b1799d"

# Iteration 1

In [4]:
import requests

# Define the base URL and endpoint
base_url = "http://localhost:8000"
endpoint = "/streams/{stream_id}/algorithms/{algorithm_id}/training-data"



# Construct the full URL
url = f"{base_url}{endpoint.format(stream_id=stream_id, algorithm_id=algorithm_id)}"
print(url)
response = requests.get(url)
print(response.status_code)
print(response.content)
training_df = pd.DataFrame(response.json().get('training_data'))
shape = response.json().get('shape')


http://localhost:8000/streams/cf42abee-c48c-4013-b229-1c6d2f949f80/algorithms/bdd640fb-0667-4ad1-9c80-317fa3b1799d/training-data
200
b'{"shape":[3,3],"training_data":[{"interactionid":0,"uid":0,"iid":0,"ts":0},{"interactionid":1,"uid":1,"iid":0,"ts":1},{"interactionid":2,"uid":2,"iid":1,"ts":2},{"interactionid":3,"uid":0,"iid":2,"ts":3}]}'


In [5]:
from streamsight.matrix import InteractionMatrix

training_im = InteractionMatrix(training_df, item_ix='iid', user_ix='uid', timestamp_ix='ts', shape=shape)
external_model.fit(training_im)

Training data:  [[1 0 1]
 [1 0 0]
 [0 1 0]]
Item similarities:  [[0.         0.         0.70710678]
 [0.         0.         0.        ]
 [0.70710678 0.         0.        ]]
Item similarities after get_top_K_values:  [[0.         0.         0.70710678]
 [0.         0.         0.        ]
 [0.70710678 0.         0.        ]]
  warn(f"{self.name} missing similar items for {missing} items.")

DEBUG - Fitting ItemKNNIncremental complete - Took 0.00994s


In [7]:
import requests

# Define the base URL and endpoint
base_url = "http://localhost:8000"
endpoint = "/streams/{stream_id}/algorithms/{algorithm_id}/unlabeled-data"



# Construct the full URL
url = f"{base_url}{endpoint.format(stream_id=stream_id, algorithm_id=algorithm_id)}"
print(url)
response = requests.get(url)
ul_df = pd.DataFrame(response.json().get('unlabeled_data'))
shape = response.json().get('shape')
ul_df

http://localhost:8000/streams/cf42abee-c48c-4013-b229-1c6d2f949f80/algorithms/bdd640fb-0667-4ad1-9c80-317fa3b1799d/unlabeled-data


Unnamed: 0,interactionid,uid,iid,ts
0,4,1,-1,4
1,5,1,-1,5
2,6,3,-1,6


In [8]:
ul_im = InteractionMatrix(ul_df, item_ix='iid', user_ix='uid', timestamp_ix='ts', shape=shape)
prediction = external_model.predict(training_im, ul_im)
print(prediction.toarray())
matrix_dict = {
    'data': prediction.data.tolist(),
    'indices': prediction.indices.tolist(),
    'indptr': prediction.indptr.tolist(),
    'shape': prediction.shape
}




  warn(f"{self.name} missing similar items for {missing} items.")

DEBUG - Performing items_in comparison
In ItemKNNIncremental _predict:  [[0.70710678 0.         0.70710678]
 [0.         0.         0.70710678]
 [0.         0.         0.        ]]
Max user ID:  4
Max item ID:  0
X.shape:  (3, 3)
Intended shape:  (4, 3)
Predict frame:     interactionid  uid  iid  ts  interactionid
0              0    1   -1   4              4
1              1    1   -1   5              5
2              2    3   -1   6              6
Known user ID:  3
Known item ID:  3
X_pred after adding rows:  [[0.70710678 0.         0.70710678]
 [0.         0.         0.70710678]
 [0.         0.         0.        ]
 [0.         0.         0.        ]]
DEBUG - Padding user ID in range(3, 4) with items
To predict:  uid
1    2
3    1
Name: count, dtype: int64
Popular items:  [1.  0.  0.5]
  self._set_arrayXarray(i, j, x)

X_pred after padding:  [[0.70710678 0.         0.70710678]
 [0.         0.         0.70710678]
 [0. 

In [9]:
import requests

# Define the base URL and endpoint
base_url = "http://localhost:8000"
endpoint = "/streams/{stream_id}/algorithms/{algorithm_id}/predictions"



# Construct the full URL
url = f"{base_url}{endpoint.format(stream_id=stream_id, algorithm_id=algorithm_id)}"
print(url)
response = requests.post(url, json=matrix_dict)
print(response.status_code)
print(response.content)

http://localhost:8000/streams/cf42abee-c48c-4013-b229-1c6d2f949f80/algorithms/bdd640fb-0667-4ad1-9c80-317fa3b1799d/predictions
200
b'{"status":true}'


In [10]:
import requests

# Define the base URL and endpoint
base_url = "http://localhost:8000"
endpoint = "/streams/{stream_id}/algorithms/{algorithm_id}/is-completed"



# Construct the full URL
url = f"{base_url}{endpoint.format(stream_id=stream_id, algorithm_id=algorithm_id)}"
print(url)
response = requests.get(url)
print(response.content)

http://localhost:8000/streams/cf42abee-c48c-4013-b229-1c6d2f949f80/algorithms/bdd640fb-0667-4ad1-9c80-317fa3b1799d/is-completed
b'false'


## Iteration 2

In [11]:
import requests

# Define the base URL and endpoint
base_url = "http://localhost:8000"
endpoint = "/streams/{stream_id}/algorithms/{algorithm_id}/training-data"



# Construct the full URL
url = f"{base_url}{endpoint.format(stream_id=stream_id, algorithm_id=algorithm_id)}"
print(url)
response = requests.get(url)
print(response.status_code)
print(response.content)
training_df = pd.DataFrame(response.json().get('training_data'))
shape = response.json().get('shape')

http://localhost:8000/streams/cf42abee-c48c-4013-b229-1c6d2f949f80/algorithms/bdd640fb-0667-4ad1-9c80-317fa3b1799d/training-data
200
b'{"shape":[4,3],"training_data":[{"interactionid":4,"uid":1,"iid":1,"ts":4},{"interactionid":5,"uid":1,"iid":2,"ts":5},{"interactionid":6,"uid":3,"iid":1,"ts":6}]}'


In [12]:
from streamsight.matrix import InteractionMatrix

training_im = InteractionMatrix(training_df, item_ix='iid', user_ix='uid', timestamp_ix='ts', shape=shape)
external_model.fit(training_im)

Training data:  [[1. 0. 1.]
 [1. 1. 1.]
 [0. 1. 0.]
 [0. 1. 0.]]
Item similarities:  [[0.         0.40824829 1.        ]
 [0.40824829 0.         0.40824829]
 [1.         0.40824829 0.        ]]
Item similarities after get_top_K_values:  [[0.         0.40824829 1.        ]
 [0.40824829 0.         0.40824829]
 [1.         0.40824829 0.        ]]
DEBUG - Fitting ItemKNNIncremental complete - Took 0.0154s


In [13]:
import requests

# Define the base URL and endpoint
base_url = "http://localhost:8000"
endpoint = "/streams/{stream_id}/algorithms/{algorithm_id}/unlabeled-data"



# Construct the full URL
url = f"{base_url}{endpoint.format(stream_id=stream_id, algorithm_id=algorithm_id)}"
print(url)
response = requests.get(url)
ul_df = pd.DataFrame(response.json().get('unlabeled_data'))
shape = response.json().get('shape')
ul_df

http://localhost:8000/streams/cf42abee-c48c-4013-b229-1c6d2f949f80/algorithms/bdd640fb-0667-4ad1-9c80-317fa3b1799d/unlabeled-data


Unnamed: 0,interactionid,uid,iid,ts
0,7,2,-1,7
1,8,2,-1,8
2,9,3,-1,9
3,10,4,-1,9


In [14]:
ul_im = InteractionMatrix(ul_df, item_ix='iid', user_ix='uid', timestamp_ix='ts', shape=shape)
prediction = external_model.predict(training_im, ul_im)
print(prediction.toarray())
matrix_dict = {
    'data': prediction.data.tolist(),
    'indices': prediction.indices.tolist(),
    'indptr': prediction.indptr.tolist(),
    'shape': prediction.shape
}

DEBUG - Performing items_in comparison
In ItemKNNIncremental _predict:  [[1.         0.81649658 1.        ]
 [1.40824829 0.81649658 1.40824829]
 [0.40824829 0.         0.40824829]
 [0.40824829 0.         0.40824829]]
Max user ID:  5
Max item ID:  0
X.shape:  (4, 3)
Intended shape:  (5, 3)
Predict frame:     interactionid  uid  iid  ts  interactionid
0              0    2   -1   7              7
1              1    2   -1   8              8
2              2    3   -1   9              9
3              3    4   -1   9             10
Known user ID:  4
Known item ID:  3
X_pred after adding rows:  [[1.         0.81649658 1.        ]
 [1.40824829 0.81649658 1.40824829]
 [0.40824829 0.         0.40824829]
 [0.40824829 0.         0.40824829]
 [0.         0.         0.        ]]
DEBUG - Padding user ID in range(4, 5) with items
To predict:  uid
2    2
3    1
4    1
Name: count, dtype: int64
Popular items:  [0.  1.  0.5]
  self._set_arrayXarray(i, j, x)

X_pred after padding:  [[1.         0.8164

In [15]:
import requests

# Define the base URL and endpoint
base_url = "http://localhost:8000"
endpoint = "/streams/{stream_id}/algorithms/{algorithm_id}/predictions"



# Construct the full URL
url = f"{base_url}{endpoint.format(stream_id=stream_id, algorithm_id=algorithm_id)}"
print(url)
response = requests.post(url, json=matrix_dict)
print(response.status_code)
print(response.content)

http://localhost:8000/streams/cf42abee-c48c-4013-b229-1c6d2f949f80/algorithms/bdd640fb-0667-4ad1-9c80-317fa3b1799d/predictions
200
b'{"status":true}'


In [16]:
import requests

# Define the base URL and endpoint
base_url = "http://localhost:8000"
endpoint = "/streams/{stream_id}/algorithms/{algorithm_id}/is-completed"



# Construct the full URL
url = f"{base_url}{endpoint.format(stream_id=stream_id, algorithm_id=algorithm_id)}"
print(url)
response = requests.get(url)
print(response.content)

http://localhost:8000/streams/cf42abee-c48c-4013-b229-1c6d2f949f80/algorithms/bdd640fb-0667-4ad1-9c80-317fa3b1799d/is-completed
b'false'


## Iteration 3


In [17]:
import requests

# Define the base URL and endpoint
base_url = "http://localhost:8000"
endpoint = "/streams/{stream_id}/algorithms/{algorithm_id}/training-data"



# Construct the full URL
url = f"{base_url}{endpoint.format(stream_id=stream_id, algorithm_id=algorithm_id)}"
print(url)
response = requests.get(url)
print(response.status_code)
print(response.content)
training_df = pd.DataFrame(response.json().get('training_data'))
shape = response.json().get('shape')

http://localhost:8000/streams/cf42abee-c48c-4013-b229-1c6d2f949f80/algorithms/bdd640fb-0667-4ad1-9c80-317fa3b1799d/training-data
200
b'{"shape":[5,3],"training_data":[{"interactionid":7,"uid":2,"iid":0,"ts":7},{"interactionid":8,"uid":2,"iid":2,"ts":8},{"interactionid":9,"uid":3,"iid":2,"ts":9},{"interactionid":10,"uid":4,"iid":0,"ts":9}]}'


In [18]:
from streamsight.matrix import InteractionMatrix

training_im = InteractionMatrix(training_df, item_ix='iid', user_ix='uid', timestamp_ix='ts', shape=shape)
external_model.fit(training_im)

Training data:  [[1. 0. 1.]
 [1. 1. 1.]
 [1. 1. 1.]
 [0. 1. 1.]
 [1. 0. 0.]]
Item similarities:  [[0.         0.57735027 0.75      ]
 [0.57735027 0.         0.8660254 ]
 [0.75       0.8660254  0.        ]]
Item similarities after get_top_K_values:  [[0.         0.57735027 0.75      ]
 [0.57735027 0.         0.8660254 ]
 [0.75       0.8660254  0.        ]]
DEBUG - Fitting ItemKNNIncremental complete - Took 0.00412s


In [19]:
import requests

# Define the base URL and endpoint
base_url = "http://localhost:8000"
endpoint = "/streams/{stream_id}/algorithms/{algorithm_id}/unlabeled-data"



# Construct the full URL
url = f"{base_url}{endpoint.format(stream_id=stream_id, algorithm_id=algorithm_id)}"
print(url)
response = requests.get(url)
ul_df = pd.DataFrame(response.json().get('unlabeled_data'))
shape = response.json().get('shape')
ul_df

http://localhost:8000/streams/cf42abee-c48c-4013-b229-1c6d2f949f80/algorithms/bdd640fb-0667-4ad1-9c80-317fa3b1799d/unlabeled-data


Unnamed: 0,interactionid,uid,iid,ts
0,11,4,-1,10
1,12,4,-1,10


In [20]:
ul_im = InteractionMatrix(ul_df, item_ix='iid', user_ix='uid', timestamp_ix='ts', shape=shape)
prediction = external_model.predict(training_im, ul_im)
print(prediction.toarray())
matrix_dict = {
    'data': prediction.data.tolist(),
    'indices': prediction.indices.tolist(),
    'indptr': prediction.indptr.tolist(),
    'shape': prediction.shape
}

DEBUG - Performing items_in comparison
In ItemKNNIncremental _predict:  [[0.75       1.44337567 0.75      ]
 [1.32735027 1.44337567 1.6160254 ]
 [1.32735027 1.44337567 1.6160254 ]
 [1.32735027 0.8660254  0.8660254 ]
 [0.         0.57735027 0.75      ]]
Max user ID:  5
Max item ID:  0
X.shape:  (5, 3)
Intended shape:  (5, 3)
Predict frame:     interactionid  uid  iid  ts  interactionid
0              0    4   -1  10             11
1              1    4   -1  10             12
[[0.75       1.44337567 0.75      ]
 [1.32735027 1.44337567 1.6160254 ]
 [1.32735027 1.44337567 1.6160254 ]
 [1.32735027 0.8660254  0.8660254 ]
 [0.         0.57735027 0.75      ]]


In [21]:
import requests

# Define the base URL and endpoint
base_url = "http://localhost:8000"
endpoint = "/streams/{stream_id}/algorithms/{algorithm_id}/predictions"



# Construct the full URL
url = f"{base_url}{endpoint.format(stream_id=stream_id, algorithm_id=algorithm_id)}"
print(url)
response = requests.post(url, json=matrix_dict)
print(response.status_code)
print(response.content)

http://localhost:8000/streams/cf42abee-c48c-4013-b229-1c6d2f949f80/algorithms/bdd640fb-0667-4ad1-9c80-317fa3b1799d/predictions
200
b'{"status":true}'


In [22]:
import requests

# Define the base URL and endpoint
base_url = "http://localhost:8000"
endpoint = "/streams/{stream_id}/algorithms/{algorithm_id}/is-completed"



# Construct the full URL
url = f"{base_url}{endpoint.format(stream_id=stream_id, algorithm_id=algorithm_id)}"
print(url)
response = requests.get(url)
print(response.content)

http://localhost:8000/streams/cf42abee-c48c-4013-b229-1c6d2f949f80/algorithms/bdd640fb-0667-4ad1-9c80-317fa3b1799d/is-completed
b'true'


In [23]:
import requests

# Define the base URL and endpoint
base_url = "http://localhost:8000"
endpoint = "/streams/{stream_id}/metrics"



# Construct the full URL
url = f"{base_url}{endpoint.format(stream_id=stream_id)}"
print(url)
response = requests.get(url)
print(response.content)

http://localhost:8000/streams/cf42abee-c48c-4013-b229-1c6d2f949f80/metrics
b'{"micro_metrics":[{"algorithm_name":"itemknn_incremental","algorithm_id":"bdd640fb-0667-4ad1-9c80-317fa3b1799d","metric":"DCGK_2","micro_score":0.8769765845238192,"num_user":6},{"algorithm_name":"itemknn_incremental","algorithm_id":"bdd640fb-0667-4ad1-9c80-317fa3b1799d","metric":"HitK_2","micro_score":0.5454545454545454,"num_user":11},{"algorithm_name":"itemknn_incremental","algorithm_id":"bdd640fb-0667-4ad1-9c80-317fa3b1799d","metric":"NDCGK_2","micro_score":0.6021911987942431,"num_user":6},{"algorithm_name":"itemknn_incremental","algorithm_id":"bdd640fb-0667-4ad1-9c80-317fa3b1799d","metric":"PrecisionK_2","micro_score":0.5,"num_user":6},{"algorithm_name":"itemknn_incremental","algorithm_id":"bdd640fb-0667-4ad1-9c80-317fa3b1799d","metric":"RecallK_2","micro_score":0.5833333333333334,"num_user":6}],"macro_metrics":[{"algorithm_name":"itemknn_incremental","algorithm_id":"bdd640fb-0667-4ad1-9c80-317fa3b1799d","m

In [24]:
# import json
response_data = response.json()
# Display Micro Metrics
print("Micro Metrics:")
for metric in response_data["micro_metrics"]:
    print(f"Algorithm: {metric['algorithm_name']} (ID: {metric['algorithm_id']})")
    print(f"  Metric: {metric['metric']}")
    print(f"  Micro Score: {metric['micro_score']}")
    print(f"  Number of Users: {metric['num_user']}\n")

# Display Macro Metrics
print("Macro Metrics:")
for metric in response_data["macro_metrics"]:
    print(f"Algorithm: {metric['algorithm_name']} (ID: {metric['algorithm_id']})")
    print(f"  Metric: {metric['metric']}")
    print(f"  Macro Score: {metric['macro_score']}")
    print(f"  Number of Windows: {metric['num_window']}\n")

Micro Metrics:
Algorithm: itemknn_incremental (ID: bdd640fb-0667-4ad1-9c80-317fa3b1799d)
  Metric: DCGK_2
  Micro Score: 0.8769765845238192
  Number of Users: 6

Algorithm: itemknn_incremental (ID: bdd640fb-0667-4ad1-9c80-317fa3b1799d)
  Metric: HitK_2
  Micro Score: 0.5454545454545454
  Number of Users: 11

Algorithm: itemknn_incremental (ID: bdd640fb-0667-4ad1-9c80-317fa3b1799d)
  Metric: NDCGK_2
  Micro Score: 0.6021911987942431
  Number of Users: 6

Algorithm: itemknn_incremental (ID: bdd640fb-0667-4ad1-9c80-317fa3b1799d)
  Metric: PrecisionK_2
  Micro Score: 0.5
  Number of Users: 6

Algorithm: itemknn_incremental (ID: bdd640fb-0667-4ad1-9c80-317fa3b1799d)
  Metric: RecallK_2
  Micro Score: 0.5833333333333334
  Number of Users: 6

Macro Metrics:
Algorithm: itemknn_incremental (ID: bdd640fb-0667-4ad1-9c80-317fa3b1799d)
  Metric: DCGK_2
  Macro Score: 1.0026354460317588
  Number of Windows: 3

Algorithm: itemknn_incremental (ID: bdd640fb-0667-4ad1-9c80-317fa3b1799d)
  Metric: HitK_2