# Python Client

In [1]:
# Import ModelBazaar class.
import thirdai
from thirdai.neural_db import ModelBazaar

# Activate your license
thirdai.licensing.activate("YOUR-THIRDAI-ACTIVATION-KEY")



In [2]:
# Initialise a ModelBazaar object with url where model_bazaar is hosted.
# Append `/api/` in the url.
bazaar = ModelBazaar(base_url="http://70.233.60.118//api/")

In [None]:
# Used to signup on model bazaar. Sends a email verification link.
bazaar.sign_up(email="kartik@thirdai.com", password="password", username="mj3ai")

In [3]:
# Login with your email_id and password.
bazaar.log_in(email="kartik@thirdai.com", password="password")

{'status': 'success', 'message': 'Successfully logged in via email', 'data': {'user': {'username': 'kartik', 'email': 'kartik@thirdai.com', 'user_id': '01e7e8d8-ab86-411b-8096-8c20d5068a07'}, 'access_token': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MDU0OTE0OTgsImVtYWlsIjoia2FydGlrQHRoaXJkYWkuY29tIn0.K5Ab6gODVL3hB-hJ77VQ2u8b9FgFmkYklb6NIN824Jc', 'verified': True, 'admin_domains': ['thirdai.com']}}


In [4]:
train_extra_options = {
    "num_samples_per_shard": 2600,
    "allocation_memory": 30000,
    #   ---shard agnostic training params---
    "model_cores": 43,
    "model_memory": 100000,
    "csv_id_column": "DOC_ID",
    "csv_strong_columns": ["TITLE"],
    "csv_weak_columns": ["TEXT"],
    "csv_reference_columns": ["TITLE", "TEXT"],
    "fhr": 50000,
    "embedding_dim": 4096,
    "output_dim": 50000,
    # "extreme_num_hashes": 8,
    # "learning_rate": 0.001,
    # "max_in_memory_batches": 2000,
}

In [5]:
# Creates a model from scratch trained on given list of documents.
# Can be synchronous or asynchronous(default; in which case we call await_train)
# `doc_type` can be "local"(default), "nfs" or "s3".
model = bazaar.train(
    model_name="scifact-checkpointing-check-3",
    docs=["/model_bazaar/datasets/unsupervised.csv"],
    doc_type="nfs",
    sharded=True,
    is_async=True,
    train_extra_options=train_extra_options,
)

{'status': 'success', 'message': 'Successfully submitted the job', 'data': {'model_id': '9dc50f7e-b8bc-455e-9e1c-20bd7842bb98', 'user_id': '01e7e8d8-ab86-411b-8096-8c20d5068a07'}}


In [6]:
supervised_train_extra_options = {
    "allocation_memory": 30000,
    #   ---shard agnostic training params---
    "model_cores": 40,
    "model_memory": 50000,
    "csv_query_column": "query",
    "csv_id_column": "id",
    "csv_id_delimiter": ":",
    "epochs": 5,
    "learning_rate": 0.0005
    # "max_in_memory_batches": 2000,
}

In [7]:
supervised_model = bazaar.supervised_train(
    model_name="scifact-supervised-final-1-1",
    docs=["/model_bazaar/datasets/trn_supervised.csv"],
    doc_type="nfs",
    sharded=True,
    train_extra_options=supervised_train_extra_options,
    base_model_id="cf5bcb58-778f-4d2c-a0f8-c10468cce8fb",
)

{'status': 'success', 'message': 'Successfully submitted the supervised train job', 'data': {'base_model_id': 'cf5bcb58-778f-4d2c-a0f8-c10468cce8fb', 'model_id': '4ab9dbe6-800b-4f77-887f-7f53d3dc7607', 'user_id': '01e7e8d8-ab86-411b-8096-8c20d5068a07'}}


In [None]:
# Blocking call to wait till model finishes training.
bazaar.await_train(model)

In [None]:
# Deploys the model and returns an ndb_client which can be used to interact to neural_db.
# Can be synchronous or asynchronous(default; in which case we call await_deploy)
ndb_client = bazaar.deploy(
    model_identifier=model.model_identifier,
    deployment_name="deployment-0",
    is_async=True,
)

In [None]:
# Blocking call to wait till deployment finishes.
bazaar.await_deploy(ndb_client)

In [None]:
# Insert new files in the neural_db model.
ndb_client.insert(
    files=["/Users/mjay/Documents/MACH.pdf", "/Users/mjay/Documents/OpenMPIInstall.pdf"]
)

In [None]:
# Search the ndb model.
results = ndb_client.search(query="who are the authors of this paper", top_k="5")

query_text = results["query_text"]
references = results["references"]
for reference in references:
    print(reference["text"])

In [None]:
# [RLHF] Associate : takes list of dictionaries where each dictionary has 'source' and 'target' keys.
ndb_client.associate(
    [
        {"source": "authors", "target": "contributors"},
        {"source": "paper", "target": "document"},
    ]
)

In [None]:
# [RLHF] Upvote/Downvote : takes list of dictionaries where each dictionary has 'query_text' and 'reference_id' keys.
best_answer = references[4]
good_answer = references[2]
ndb_client.upvote(
    [
        {"query_text": query_text, "reference_id": best_answer["id"]},
        {"query_text": query_text, "reference_id": good_answer["id"]},
    ]
)

In [None]:
# Undeploys the model.
bazaar.undeploy(ndb_client)

In [None]:
# Deletes the model from model bazaar.
bazaar.delete(model_identifier=model.model_identifier)

In [None]:
# Return a list of models accessible to user on model bazaar.
bazaar.list_models()

In [None]:
# Returns a list of all active deployments.
bazaar.list_deployments()

In [None]:
# Connects to an active deployment.
ndb_client = bazaar.connect(deployment_identifier="mj3ai/model-0:mj3ai/deployment-0")

In [None]:
# Push an NDB model from local to model bazaar
bazaar.push_model(
    model_name="test-upload-2",
    local_path="/Users/mjay/test.ndb",
    access_level="private",
)

In [None]:
# Pull model from model bazaar to local
ndb_model = bazaar.pull_model(model_identifier="mj3ai/model-1")