# NeuralDB Python Client

## Necessary imports and License Activation

In [None]:
# Import ModelBazaar class.
import thirdai
from thirdai.neural_db import ModelBazaar

# Activate your license
thirdai.licensing.activate("YOUR-THIRDAI-ACTIVATION-KEY")

## Initialise ModelBazaar

In [None]:
# Initialise a ModelBazaar object with url where model_bazaar is hosted. 
# Append `/api/` in the url.
bazaar = ModelBazaar(base_url="http://YOUR-NEURALDB-ENTERPRISE-IP/api/")

## New user sign-up

In [None]:
# One-time signup for new users on model bazaar. Sends a email verification link.
# Admin is automatically signed up at cluster initialization.
bazaar.sign_up(email="your-mail-id@domain.com", password="password", username="username")

## User Login

In [None]:
# Login with your email_id and password.
bazaar.log_in(email="your-mail-id@domain.com", password="password")

## Create and train models on ModelBazaar

Models created on ModelBazaar can be trained as a single model or a mixture of models (preferable in case of large documents). 

### 1. Single Model training

In [None]:
'''
Creates a model from scratch on ModelBazaar and trains it on given list of documents.
Can be synchronous or asynchronous(default; in which case we call await_train)
`doc_type` describes the file location: "local"(default), "nfs" or "s3".
'''
model = bazaar.train(
    model_name="model-1",
    unsupervised_docs=["/path/to/fileA"], # Path to the unsupervised documents
    supervised_docs=["/path/to/fileB","source_id"], # Its a tuple of supervised file path and the corresponding source id of the unsupervised file it corresponds to.
    test_doc="/path/to/fileC",
    doc_type="local",
    is_async=True,
)

### 2. Mixture of Models training

In [None]:
'''
`train_extra_options` describes required parameters of sharded training :
    1. num_model_shards : number of shards in which data is divided; one model trains on each data shard.
    2. allocation_memory: amount of memory(in MBs) to assign for data sharding job. (Suggested : 10x data size)
    3. model_cores      : cpu cores to be allocated for each model train job.
    4. model_memory     : amount of memory(in MBs) to assign for each data train job.
    5. fhr              : input_dimension for individual model.
    6. embedding_dim    : hidden_dimension for individual model.
    7. output_dim       : output_dimension for individual model.
    8. max_in_memory_batches    : number of batches to train in one iteration.
    9. priority         : priority (between 1-100) of train_jobs. Higher value means greater priority. (default: 50)

    * In case of using .csv documents, user must provide required values for `csv_*` fields.
'''
train_extra_options = {
    "num_model_shards": 10,
    "allocation_memory": 300000,
    #   ---shard agnostic training params---
    "model_cores": 20,
    "model_memory": 200000,
    "csv_id_column": "id",
    "csv_strong_columns": ["title"],
    "csv_weak_columns": ["abstract"],
    "csv_reference_columns": ["title", "abstract"],
    "fhr": 200000,
    "embedding_dim": 4096,
    "output_dim": 100000,
    "max_in_memory_batches": 100,
    "priority": 50
}

model = bazaar.train(
    model_name="mixture-model-1",
    docs=["/model_bazaar/datasets/pubmed/complete_pubmed_dataset.csv"],
    doc_type="nfs",
    sharded=True,
    is_async=True,
    train_extra_options=train_extra_options,
)

## Blocking call to wait for model training

In [None]:
bazaar.await_train(model)

## Deploy models from ModelBazaar

In [None]:
# Deploys the model and returns an ndb_client which can be used to query/re-index/RLHF.
# Can be synchronous or asynchronous(default; in which case we call await_deploy)
ndb_client = bazaar.deploy(
    model_identifier=model.model_identifier,
    deployment_name="deployment-1",
    is_async=True,
)

## Blocking call to wait for deployment.

In [None]:
bazaar.await_deploy(ndb_client)

## Insert new files to index in the deployed neural_db model.

In [None]:

ndb_client.insert(
    documents=[
        {"document_type": "PDF", "path": "/path/to/temp.pdf", "location": "local"}, 
        {"document_type": "CSV", "path": "/path/to/temp.csv", "location": "local"}
    ]
)

## Query the NDB model

In [None]:
# Search the ndb model.
results = ndb_client.search(query="who are the authors of this paper", top_k="5")

query_text = results["query_text"]
references = results["references"]
for reference in references:
    print(reference["text"])

## RLFH features :

### 1. Associate

In [None]:
# [RLHF] Associate : takes list of dictionaries where each dictionary has 'source' and 'target' keys.
ndb_client.associate(
    [
        {"source": "authors", "target": "contributors"},
        {"source": "paper", "target": "document"},
    ]
)

### 2. Upvote/Downvote

In [None]:
# [RLHF] Upvote/Downvote : takes list of dictionaries where each dictionary has 'query_text' and 'reference_id' keys.
best_answer = references[4]
good_answer = references[2]
ndb_client.upvote(
    [
        {"query_text": query_text, "reference_id": best_answer["id"]},
        {"query_text": query_text, "reference_id": good_answer["id"]},
    ]
)

## Undeploy models

In [None]:
bazaar.undeploy(ndb_client)

## Deletes the model from model bazaar.

In [None]:
# Use `list_models()` to find models on ModelBazaar and their identifiers.
# example model_identifier : "model-author/model-name"
bazaar.delete(model_identifier=model.model_identifier)

## List models and active deployments

In [None]:
# Return a list of models accessible to user on model bazaar.
# Each model contains an attribute `model_identifier` to uniquely identify the model.
bazaar.list_models()

In [None]:
# Returns a list of all active deployments.
# Each deployment contains an attribute `deployment_identifier` to uniquely identify the deployment.
bazaar.list_deployments()

## Connect to existing active deployment

In [None]:
# Use `list_deployments()` to find active deployments and their identifiers.
# example deployment_identifier : "model-author/model-name:deployment-author/deployment-name"
ndb_client = bazaar.connect(deployment_identifier="your-deployment-identifier")

## Push your local NDB model to model bazaar.

In [None]:
bazaar.push_model(
    model_name="test-upload",
    local_path="path/local/ndb/model.ndb",
    access_level="private",
)

## Pull NDB model from model bazaar to local.

In [None]:
# Remember, this is your local `ndb_model`, not the remote deployed one.
ndb_model = bazaar.pull_model(model_identifier="mj3ai/model-1")