## Test the Performance of Approximate vs Exact Matching 

In [11]:
import tensorflow as tf
import time

In [1]:
PROJECT_ID = 'ksalama-cloudml'
BUCKET = 'ksalama-cloudml'
REGION = 'us-central1'
INDEX_DIR = f'gs://{BUCKET}/bqml/scann_index'
MODEL_NAME = 'item_embedding_lookup'
MODEL_VERSION = 'v1'
SERVICE_NAME = 'index-server'
KIND = 'song'

In [2]:
from index_server.lookup import EmbeddingLookup
embedding_lookup = EmbeddingLookup(PROJECT_ID, REGION, MODEL_NAME, MODEL_VERSION)

Service name: projects/ksalama-cloudml/models/item_embedding_lookup/versions/v1


In [3]:
vector = embedding_lookup.lookup(['2114402'])[0]

## Approximate Matching (ScaNN)

In [4]:
from index_server.matching import ScaNNMatcher
scann_matcher = ScaNNMatcher(INDEX_DIR)

Loading ScaNN index...
ScaNN index is loadded.


In [8]:
start_time = time.time()
for i in range(100):
  scann_matcher.match(vector, 50)
end_time = time.time()
elapsed_time = end_time - start_time

print(f'Elapsed time:{elapsed_time} - average time: {elapsed_time/100}')

Elapsed time:0.026595115661621094 - average time: 0.00026595115661621094


## Exact Matching

In [45]:
class ExactMatcher(object):
  def __init__(self, model_dir):
    print("Loading exact matchg model...")
    self.model = tf.saved_model.load(model_dir)
    print("Exact matchg model is loaded.")
  
  def match(self, instances):
    outputs = self.model.signatures['serving_default'](tf.constant(instances, tf.dtypes.int64))
    return outputs['predicted_item2_Id'].numpy()

In [46]:
BQML_MODEL_OUTPUT_DIR = f'gs://{BUCKET}/bqml/item_matching_model'
exact_matcher = ExactMatcher(BQML_MODEL_OUTPUT_DIR)

Loading exact matchg model...
Exact matchg model is loaded.


In [48]:
start_time = time.time()
for i in range(100):
  exact_matcher.match([2114402])
end_time = time.time()
elapsed_time = end_time - start_time

print(f'Elapsed time:{elapsed_time} - average time: {elapsed_time/100}')

Elapsed time:1.2519912719726562 - average time: 0.012519912719726562
