In [1]:
import requests # type: ignore
from requests.auth import HTTPBasicAuth # type: ignore
import json
import os
from typing import List, Optional
ip_addr = "localhost"
BASE_URL = f"http://{ip_addr}/"

In [2]:
def signup(username: str, email: str, password: str):
    url = f"{BASE_URL}/api/user/email-signup-basic"
    payload = {
        "username": username,
        "email": email,
        "password": password
    }
    response = requests.post(url, json=payload)
    return response.json()

In [3]:
def login(email: str, password: str):
    url = f"{BASE_URL}/api/user/email-login"
    response = requests.get(url, auth=HTTPBasicAuth(email, password))
    result = response.json()
    token = result.get("data", {}).get("access_token")
    return token

In [4]:
def list_models(name, token, domain=None, username=None, type=None, sub_type=None, access_level=None):
    """
    List models based on filters for authenticated users.

    Returns:
    - List of models that match the provided filters.
    """
    url = f"{BASE_URL}/api/model/list"
    headers = {"Authorization": f"Bearer {token}"}
    params = {
        "name": name,
        "domain": domain,
        "username": username,
        "type": type,
        "sub_type": sub_type,
        "access_level": access_level,
    }
    response = requests.get(url, headers=headers, params={k: v for k, v in params.items() if v is not None})
    return response.json()


In [5]:
def delete_model(model_identifier, token):
    """
    Delete a specified model., model_identifier is username/modelname
    """
    url = f"{BASE_URL}/api/model/delete"
    headers = {"Authorization": f"Bearer {token}"}
    params = {"model_identifier": model_identifier}
    response = requests.post(url, headers=headers, params=params)
    return response.json()


In [6]:
def create_retriever_model(
    model_name: str,
    token: str,
    base_model_identifier: Optional[str] = None,
    files: Optional[List[str]] = None,  # Local file paths
    s3_urls: Optional[List[str]] = None,  # S3 URLs
    nfs_paths: Optional[List[str]] = None,  # NFS paths
):
    """
    Creates and trains an NDB retriever model with local, S3, and NFS files.

    Parameters:
    - model_name: The name of the model.
    - token: Authorization token.
    - base_model_identifier: (Optional) The identifier of the base model to use.
    - files: (Optional) List of local file paths.
    - s3_urls: (Optional) List of S3 URLs for files.
    - nfs_paths: (Optional) List of NFS paths for files.
    """
    url = f"{BASE_URL}/api/train/ndb"
    headers = {"Authorization": f"Bearer {token}"}

    # Construct file information based on the different sources
    file_info = {
        "unsupervised_files": [],
        "supervised_files": [],
        "test_files": []
    }

    if files:
        for file_path in files:
            file_info["unsupervised_files"].append({
                "path": file_path,
                "location": "local"
            })

    if s3_urls:
        for s3_url in s3_urls:
            file_info["unsupervised_files"].append({
                "path": s3_url,
                "location": "s3"
            })

    if nfs_paths:
        for nfs_path in nfs_paths:
            file_info["unsupervised_files"].append({
                "path": nfs_path,
                "location": "nfs"
            })

    # Prepare the files for upload
    upload_files = []
    if files:
        for file_path in files:
            if os.path.isfile(file_path):
                upload_files.append(('files', open(file_path, 'rb')))
                
    
    upload_files.append(("file_info", (None, json.dumps(file_info), "application/json")))

    try:
        # Send the POST request to the /ndb endpoint
        response = requests.post(
            url,
            headers=headers,
            params={
                "model_name": model_name,
                "base_model_identifier": base_model_identifier,
            },
            files=upload_files
        )

        # Check for the response
        if response.status_code == 200:
            print("Model training job submitted successfully.")
            print(response.json())
            return response.json()["data"]["model_id"]
        else:
            print("Failed to submit the model training job.")
            print(response.json())

    except Exception as e:
        print(f"An error occurred: {str(e)}")

In [7]:
def create_ner_model(
    model_name: str,
    token: str,
    base_model_identifier: Optional[str] = None,
    files: Optional[List[str]] = None,  # Local file paths
    model_options: Optional[dict] = None  # Additional model options for UDT training
):
    """
    Trains a UDT model with local, S3, and NFS files.

    Parameters:
    - model_name: The name of the model.
    - token: Authorization token.
    - base_model_identifier: (Optional) The identifier of the base model to use.
    - files: (Optional) List of local file paths.
    - model_options: (Optional) Dictionary of additional model options.
    """
    url = f"{BASE_URL}/api/train/udt"
    headers = {"Authorization": f"Bearer {token}"}

    # Construct file information based on the different sources
    file_info = {
        "supervised_files": [],
        "test_files": []
    }

    if files:
        for file_path in files:
            file_info["supervised_files"].append({
                "path": file_path,
                "location": "local"
            })
    
    # Prepare the files for upload
    upload_files = []
    if files:
        for file_path in files:
            if os.path.isfile(file_path):
                upload_files.append(('files', open(file_path, 'rb')))
                
    upload_files.append(("file_info", (None, json.dumps(file_info), "application/json")))
    
    if model_options:
        upload_files.append(
            ("model_options", (None, json.dumps(model_options), "application/json"))
        )

    try:
        # Send the POST request to the /udt endpoint
        response = requests.post(
            url,
            headers=headers,
            params = {
                "model_name": model_name,
                "base_model_identifier": base_model_identifier,
            },
            files=upload_files
        )

        # Check for the response
        if response.status_code == 200:
            print("Model training job submitted successfully.")
            print(response.json())
            return response.json()["data"]["model_id"]
        else:
            print("Failed to submit the model training job.")
            print(response.json())

    except Exception as e:
        print(f"An error occurred: {str(e)}")


In [8]:
# Function to wait for training to complete
import time


def await_train(token, model_identifier):
    # Define the URL for checking training status
    status_url = f"{BASE_URL}/api/train/status"
    headers = {"Authorization": f"Bearer {token}"}

    while True:
        # Make a GET request to check the training status
        response = requests.get(status_url, params={"model_identifier": model_identifier}, headers=headers)
        
        if response.status_code != 200:
            raise Exception(f"Failed to get training status: {response.status_code}, {response.text}")
        
        # Check the training status
        status = response.json()["data"]["train_status"]
        
        if status == "complete":
            print("Training completed successfully.")
            break
        elif status == "failed":
            raise Exception("Training failed.")
        
        print("Training in progress...")
        time.sleep(10)  # Wait for 10 seconds before checking again

In [9]:
# Function to deploy the trained model
def deploy_model(token, model_identifier):
    # Define the URL for model deployment
    deploy_url = f"{BASE_URL}/api/deploy/run"
    headers = {"Authorization": f"Bearer {token}"}
    params = {"model_identifier": model_identifier}

    # Make a POST request to deploy the model
    response = requests.post(deploy_url, headers=headers, params=params)

    # Extract deployment ID from the response
    content = response.json()
    deployment_id = content["data"]["model_id"]
    print(f"Model deployed successfully. Deployment ID: {deployment_id}")
    return deployment_id

In [10]:
# Function to wait for deployment to complete
def await_deploy(token, model_identifier):
    # Define the URL for checking deployment status
    status_url = f"{BASE_URL}/api/deploy/status"
    headers = {"Authorization": f"Bearer {token}"}

    while True:
        # Make a GET request to check the deployment status
        response = requests.get(status_url, params={"model_identifier": model_identifier}, headers=headers)
        
        if response.status_code != 200:
            raise Exception(f"Failed to get deployment status: {response.status_code}, {response.text}")
        
        # Check the deployment status
        status = response.json()["data"]["deploy_status"]
        
        if status == "complete":
            print("Deployment completed successfully.")
            break
        elif status == "failed":
            raise Exception("Deployment failed.")
        
        print("Deployment in progress...")
        time.sleep(10)  # Wait for 10 seconds before checking again

In [11]:
def ndb_deployment_query(token, model_id, query):
    # Define the URL for querying the deployed model
    headers = {"Authorization": f"Bearer {token}"}
    query_url = f"{BASE_URL}/{model_id}/predict"
    
    # Set up the query parameters
    base_params = {"query": query, "top_k": 5}
    ndb_params = {"constraints": {}}

    # Make a POST request to query the model
    response = requests.post(
        query_url,
        json={"base_params": base_params, "ndb_params": ndb_params},
        headers=headers,
    )

    # Check if the query was successful; if not, raise an exception
    if response.status_code != 200:
        raise Exception(f"Query failed: {response.status_code}, {response.text}")

    print(f"Query results: {response.json()}")
    
    return response.json()["data"]["references"]

In [12]:
def ndb_upvote_reference(model_id: str, token, query: str, reference_id: str):
    headers = {"Authorization": f"Bearer {token}"}
    query_url = f"{BASE_URL}/{model_id}/upvote"
    
    # Set up the query parameters
    text_id_pairs = [{"query_text": query, "reference_id": reference_id}]

    # Make a POST request to query the model
    response = requests.post(
        query_url,
        json={"text_id_pairs": text_id_pairs},
        headers=headers,
    )
    
    print(response.json())

In [13]:
def ndb_associate_keywords(model_id: str, token, source: str, target: str):
    headers = {"Authorization": f"Bearer {token}"}
    query_url = f"{BASE_URL}/{model_id}/associate"
    
    # Set up the query parameters
    text_pairs = [{"source": source, "target": target}]

    # Make a POST request to query the model
    response = requests.post(
        query_url,
        json={"text_pairs": text_pairs},
        headers=headers,
    )
    
    print(response.json())

In [14]:
def create_doc_dict(path: str, doc_type: str):
    """
    Creates a document dictionary for different document types.

    Parameters:
    path (str): Path to the document file.
    doc_type (str): Type of the document location.

    Returns:
    dict[str, str]: Dictionary containing document details.

    Raises:
    Exception: If the document type is not supported.
    """
    _, ext = os.path.splitext(path)
    if ext == ".pdf":
        return {"document_type": "PDF", "path": path, "location": doc_type}
    if ext == ".csv":
        return {"document_type": "CSV", "path": path, "location": doc_type}
    if ext == ".docx":
        return {"document_type": "DOCX", "path": path, "location": doc_type}

    raise Exception(f"Please add a map from {ext} to document dictionary.")

def ndb_insert_document(model_id: str, token, local_file: str):
    headers = {"Authorization": f"Bearer {token}"}
    query_url = f"{BASE_URL}/{model_id}/insert"
    
    files = [("files", open(local_file, "rb"))]
    documents = [create_doc_dict(local_file, "local")]
    
    files.append(("documents", (None, json.dumps(documents), "application/json")))
    
    response = requests.post(
        query_url,
        files=files,
        headers=headers,
    )
    
    print(response.json())

In [15]:
def ndb_sources(model_id: str, token):
    headers = {"Authorization": f"Bearer {token}"}
    query_url = f"{BASE_URL}/{model_id}/sources"

    # Make a POST request to query the model
    response = requests.get(
        query_url,
        headers=headers,
    )
    
    print(response.json())

In [16]:
def ndb_delete_document(model_id: str, token, source_id):
    headers = {"Authorization": f"Bearer {token}"}
    query_url = f"{BASE_URL}/{model_id}/delete"

    # Make a POST request to query the model
    response = requests.post(
        query_url,
        json={"source_ids": [source_id]},
        headers=headers,
    )
    
    print(response.json())

In [17]:
import asyncio
import websockets

# WebSocket server URL
WS_URL = f"ws://{ip_addr}/llm-dispatch/generate"

async def generate_text_with_openai(query, model, api_key):
    """
    Generate text using the OpenAI provider via the WebSocket endpoint.

    Parameters:
    - query: The input text to generate from.
    - model: The model name to use for generation (e.g., 'gpt-3.5-turbo').
    - api_key: Your OpenAI API key.

    Returns:
    - The full generated text.
    """
    async with websockets.connect(WS_URL) as websocket:
        # Construct the input message specifically for OpenAI
        input_message = {
            "query": query,
            "model": model,
            "provider": "openai",  # Specify OpenAI as the provider
            "key": api_key         # OpenAI API key
        }

        # Send the input message as JSON
        await websocket.send(json.dumps(input_message))

        full_response = ""  # Initialize the full response

        # Receive messages from the WebSocket
        while True:
            response = await websocket.recv()
            response_data = json.loads(response)

            # Check for error status
            if response_data["status"] == "error":
                print("Error:", response_data["detail"])
                return None

            # Collect the generated content
            full_response += response_data["content"]

            # Stop receiving when end_of_stream is True
            if response_data.get("end_of_stream"):
                break

        return full_response  # Return the full generated response


In [18]:
def ner_model_predict(token, model_id, query):
    headers = {"Authorization": f"Bearer {token}"}
    query_url = f"{BASE_URL}/{model_id}/predict"
    
    base_params = {"query": query, "top_k": 1}
    
    response = requests.post(
        query_url,
        json=base_params,
        headers=headers,
    )

    # Check if the query was successful; if not, raise an exception
    if response.status_code != 200:
        raise Exception(f"Query failed: {response.status_code}, {response.text}")

    print(f"Query results: {response.json()}")
    return response.json()["data"]
    
    

In [23]:
def extract(token, ner_model_id, references):
    token_to_tag = {}
    token_counts = {}

    for reference in references:
        text = reference["text"]
        text = " ".join(text.split())
        predicted_tags = ner_model_predict(token, ner_model_id, text)
        for i, token in enumerate(text.split()):
            tag = predicted_tags[i]
            if tag[0][0] != "O":
                if token not in token_to_tag or token_to_tag[token][1] < tag[0][1]:
                    tg = (f"<{tag[0][0]}>", tag[0][1])
                    token_to_tag[token] = tg
    token_counts = {v[0]:0 for k, v in token_to_tag.items()}
    inverse_map = {}

    for k, v in token_to_tag.items():
        token_to_tag[k] = v[0]
        new_tag = v[0][:-1] + f"_{token_counts[v[0]]}>"
        inverse_map[new_tag] = k
        token_to_tag[k] = new_tag
        token_counts[v[0]] += 1

    output_text = []
    for reference in references:
        text = reference["text"]
        text = " ".join(text.split())
        redacted_text = [word if word not in token_to_tag else token_to_tag[word] for word in text.split()]
        output_text.append(" ".join(redacted_text))

    return "\n\n".join(output_text), inverse_map

In [20]:
import re


def strip_non_alphanumeric(word):
    pattern = r'^[^a-zA-Z0-9_<>\s]+|[^a-zA-Z0-9_<>\s]+$'
    cleaned_string = re.sub(pattern, '', word)
    return cleaned_string
def restore(text, tag_to_token):
    restored_text = []
    for word in text.split():
        word = strip_non_alphanumeric(word)
        if word in tag_to_token.keys():
            restored_text.append(tag_to_token[word])
        else:
            restored_text.append(word)
    return " ".join(restored_text)

In [None]:
# Usage

token = login(email="yash@thirdai.com", password="password")

ndb_model_id = create_retriever_model("ndb_model", token=token, files=["/Users/yashwanthadunukota/ThirdAI-Platform/data/scifact/insert.pdf"])

class Token:
    id_column: str = "target"
    query_column: str = "source"

    target_labels: list[str] = ["PER", "ORG"]
    sub_type: str = "token"

model_options = {
    "udt_options": {
        "udt_sub_type": Token.sub_type,
        "target_labels": Token.target_labels,
        "source_column": Token.query_column,
        "target_column": Token.id_column,
    }
}
token_model_id = create_ner_model("ner_model", token=token, files=["/Users/yashwanthadunukota/ThirdAI-Platform/data/token/ner.csv"], model_options=model_options)

await_train(token=token, model_identifier="yash/ndb_model")
await_train(token=token, model_identifier="yash/ner_model")

deploy_model(token=token, model_identifier="yash/ndb_model")
deploy_model(token=token, model_identifier="yash/ner_model")

await_deploy(token=token, model_identifier="yash/ndb_model")
await_deploy(token=token, model_identifier="yash/ner_model")


async def generate_response(query, token, api_key=""):
    
    references = ndb_deployment_query(token=token, model_id=ndb_model_id, query=query)
    
    context, token_to_tags = extract(token=token, ner_model_id=token_model_id, references=references)
        
    # Example usage
    response = await generate_text_with_openai(
        query=f"query: {query}, answers: {references}, context: {context}",
        model="gpt-3.5-turbo",  # Replace with the specific OpenAI model you want to use
        api_key=api_key  # Replace with your actual OpenAI API key
    )
    
    restored_text = restore(response, token_to_tags)
    
    print(restored_text)


In [None]:
await generate_response(query="alice in wonderland",token=token, api_key="")