In [None]:
api_key = ""
project = "the-office-series"
region = ""
authorizationToken = ""
dataset_id = "the-office-series"
encode_type = "text"
model_id = "mpnet"
fields = ["episode_title"]

In [None]:

# config = {
#   "dataset_id": "advanced_search_example",
#   "model_id": "mpnet",
#   "encode_type": "text",
#   "fields": [
#     "product_title"
#   ],
#   "region": "xxx",
#   "project": "xxx",
#   "api_key": "xxx",
#   "authorizationToken": "xxx:xxx:xxx:xxx"
# }

show_warnings_in_logs = False #@param {type:"boolean"}

import json
import warnings
warnings.filterwarnings('ignore')


print("Installing RelevanceAI ...")

from relevanceai import Client 
client = Client(token=authorizationToken)

try:
    if model_id == 'clip' and encode_type.lower() == 'image_urls':
        from vectorhub.bi_encoders.text_image.torch import Clip2Vec
        class Model(Clip2Vec):
            @property
            def __name__(self):
                return model_id
        enc = Model()
        enc.encode = enc.encode_image

    elif model_id == 'clip' and encode_type.lower() == 'text':
        from vectorhub.bi_encoders.text_image.torch import Clip2Vec
        class Model(Clip2Vec):
            @property
            def __name__(self):
                return model_id
        enc = Model()
        enc.encode = enc.encode_text

    elif model_id == 'mpnet':
        from vectorhub.encoders.text.sentence_transformers import SentenceTransformer2Vec
        class Model(SentenceTransformer2Vec):
            @property
            def __name__(self):
                return model_id
        enc = Model("all-mpnet-base-v2")

    elif model_id == 'multiqampnet':
        from vectorhub.encoders.text.sentence_transformers  import SentenceTransformer2Vec
        class Model(SentenceTransformer2Vec):
            @property
            def __name__(self):
                return model_id
        enc = Model("multi-qa-mpnet-base-dot-v1")

    elif model_id == 'bit':
        from vectorhub.encoders.image.tfhub import BitMedium2Vec
        class Model(BitMedium2Vec):
            @property
            def __name__(self):
                return model_id
        enc = Model()


except Exception as e:
    print(e)
    raise ValueError(f'{str(e)}')

print("Finished installing machine learning models and dependencies to vectorize data.")
# enc.__name__ = config['model_id']

import os
import sys
import warnings

f = open(os.devnull, 'w')
sys.stderr = f

import traceback

def encode_documents(docs):
    try:
#       if show_warnings_in_logs:
#           with contextlib.redirect_stdout(None):
#               with warnings.catch_warnings():
#                   warnings.simplefilter("ignore")
#                   return enc.encode_documents(fields, docs)
      return enc.encode_documents(fields, docs)
    except Exception as e:
        trc = traceback.format_exc()
        error_message = "Exception during processing: " + str(e) + "\n" + trc
        raise Exception(f"{error_message}")

print("Starting to vectorize your data.")

client.logger.warn = client.logger.warning
client.pull_update_push(dataset_id, encode_documents, 
  show_progress_bar=True, 
  filters=[{'field' : f,
    'filter_type' : 'exists', 
    "condition":"==", 
    "condition_value":""} for f in fields], 
    select_fields=fields, retrieve_chunk_size=100)

print("Finished vectorizing your data with, you may close this window.")