In [1]:
import requests
from requests.auth import HTTPBasicAuth
import json
import os
from typing import List, Dict, Optional
import time
import re
import uuid

BASE_URL = "http://34.236.171.80/"

In [2]:
def login(email: str, password: str):
    """
    Login a user with the given email and password.
    If successful, this function returns an access token.
    """
    url = f"{BASE_URL}/api/user/email-login"
    response = requests.get(url, auth=HTTPBasicAuth(email, password))
    result = response.json()
    token = result.get("data", {}).get("access_token")
    return token

def list_models(
    name: str, token: str, domain=None, username=None, type=None, sub_type=None, access_level=None
):
    """
    List models based on filters for authenticated users.
    Returns:
    - List of models that match the provided filters.
    """
    url = f"{BASE_URL}/api/model/list"
    headers = {"Authorization": f"Bearer {token}"}
    params = {
        "name": name,
        "domain": domain,
        "username": username,
        "type": type,
        "sub_type": sub_type,
        "access_level": access_level,
    }
    response = requests.get(
        url, headers=headers, params={k: v for k, v in params.items() if v is not None}
    )
    return response.json()

In [3]:
def query_retrieval_model(model_id: str, query: str, token: str):
    """
    Retrieves top k most relevant references to the query from the deployed model.
    Parameters:
    - model_id: <username>/<modelname>
    - query: The query to search for.
    - token: Authorization token from login
    """
    # Define the URL for querying the deployed model
    headers = {"Authorization": f"Bearer {token}"}
    query_url = f"{BASE_URL}/{model_id}/search"
    # Set up the query parameters
    base_params = {"query": query, "top_k": 5}
    # Make a POST request to query the model
    response = requests.post(
        query_url,
        json=base_params,
        headers=headers,
    )
    # Check if the query was successful; if not, raise an exception
    if response.status_code != 200:
        raise Exception(f"Query failed: {response.status_code}, {response.text}")
    return response.json()["data"]["references"]

def query_sentiment_model(model_id: str, query: str, token: str):
    """
    Retrieves top k most relevant references to the query from the deployed model.
    Parameters:
    - model_id: <username>/<modelname>
    - query: The query to search for.
    - token: Authorization token from login
    """
    # Define the URL for querying the deployed model
    headers = {"Authorization": f"Bearer {token}"}
    query_url = f"{BASE_URL}/{model_id}/predict"
    # Set up the query parameters
    base_params = {"text": query, "top_k": 5}
    # Make a POST request to query the model
    response = requests.post(
        query_url,
        json=base_params,
        headers=headers,
    )
    # Check if the query was successful; if not, raise an exception
    if response.status_code != 200:
        raise Exception(f"Query failed: {response.status_code}, {response.text}")
    return response.json()["data"]["predicted_classes"]

In [4]:
def token_classifier_predict(model_id: str, query: str, token: str, top_k=1):
    """
    Predicts the NER tags for a given query.
    Parameters:
    - model_id: model ID as returned by create_token_classifier. You can also find the model ID in the list returned by list_models.
    - query: The passage to predict the NER tags for.
    - token: Authorization token from login
    - top_k: The number of tags predicted for each token.
    Returns a dictionary in this format:
    {
        "text": "The text that was passed in",
        "predicted_tags": [
            ["TOP_TAG_FOR_FIRST_TOKEN", "SCORE_FOR_TOP_TAG_FOR_FIRST_TOKEN", ...],
            ["TOP_TAG_FOR_SECOND_TOKEN", "SCORE_FOR_TOP_TAG_FOR_SECOND_TOKEN", ...],
            ...
            ["TOP_TAG_FOR_LAST_TOKEN", "SCORE_FOR_TOP_TAG_FOR_LAST_TOKEN", ...]
        ]
    }
    The number of tags predicted for each token is specified in the top_k parameter.
    """
    headers = {"Authorization": f"Bearer {token}"}
    query_url = f"{BASE_URL}/{model_id}/predict"
    base_params = {"text": query, "top_k": top_k}
    response = requests.post(
        query_url,
        json=base_params,
        headers=headers,
    )
    # Check if the query was successful; if not, raise an exception
    if response.status_code != 200:
        raise Exception(f"Query failed: {response.status_code}, {response.text}")
    return response.json()["data"]

def obfuscate_pii(token_classifier_model_id: str, text_chunks: List[str], auth_token: str):
    """
    Obfuscates PII in the references using the NER model by replacing them with placeholders.
    Parameters:
    - token_classifier_model_id: model ID as returned by create_token_classifier. You can also find the model ID in the list returned by list_models.
    - text_chunks: A list of strings containing the text to obfuscate.
    - auth_token: Authorization token from login
    Returns a tuple containing:
    - A list of strings containing the obfuscated PII information.
    - A dictionary containing the mapping of obfuscated tokens to original tokens.
    """
    token_to_tag = {}
    token_counts = {}
    for text in text_chunks:
        text = " ".join(text.split())
        predicted_tags = token_classifier_predict(auth_token, token_classifier_model_id, text)
        predicted_tags = predicted_tags["predicted_tags"]
        for i, token in enumerate(text.split()):
            tag = predicted_tags[i][0]
            if tag != "O":
                if token not in token_to_tag:
                    tg = f"<{tag}>"
                    token_to_tag[token] = tg
    token_counts = {v: 0 for k, v in token_to_tag.items()}
    inverse_map = {}
    for k, v in token_to_tag.items():
        new_tag = v[:-1] + f"_{token_counts[v]}>"
        inverse_map[new_tag] = k
        token_to_tag[k] = new_tag
        token_counts[v] += 1
    output_text = []
    for text in text_chunks:
        text = " ".join(text.split())
        redacted_text = [
            word if word not in token_to_tag else token_to_tag[word]
            for word in text.split()
        ]
        output_text.append(" ".join(redacted_text))
    return output_text, inverse_map

def restore_pii(text: str, tag_to_token: Dict[str, str]):
    """
    Restores the PII in the text by replacing the placeholders with the original tokens.
    Parameters:
    - text: A string containing the obfuscated PII information.
    - tag_to_token: A dictionary containing the mapping of obfuscated tokens to original tokens.

    Returns a string containing the restored PII information.
    """
    restored_text = []
    for word in text.split():
        word = strip_non_alphanumeric(word)
        if word in tag_to_token.keys():
            restored_text.append(tag_to_token[word])
        else:
            restored_text.append(word)
    return " ".join(restored_text)


def strip_non_alphanumeric(word):
    pattern = r"^[^a-zA-Z0-9_<>\s]+|[^a-zA-Z0-9_<>\s]+$"
    cleaned_string = re.sub(pattern, "", word)
    return cleaned_string

In [5]:
def chat(model_id: str, user_input: str, token: str, session_id: str = None):
    """
    Sends a chat request to the /chat endpoint.
    Parameters:
    - user_input: The message or query from the user.
    - token: Authorization token from login.
    - session_id: (Optional) Session ID for maintaining conversation context.
    Returns:
    - Response from the chat API.
    """
    chat_url = f"{BASE_URL}/{model_id}/chat"
    headers = {"Authorization": f"Bearer {token}"}
    payload = {
        "user_input": user_input,
        "session_id": session_id,
        "provider": "on-prem"
    }
    response = requests.post(chat_url, json=payload, headers=headers)
    if response.status_code != 200:
        raise Exception(f"Chat request failed: {response.status_code}, {response.text}")
    return response.json()

In [6]:
retrieval_model_id = "250a9631-9627-47c8-bbdc-30ada92ef5f9"
token_model_id = "40070a21-b6f3-4607-9289-3f6218af9597"
sentiment_model_id = "ac58ccaf-8031-433a-9ab5-db20af83b978"

In [7]:
token = login("admin@thirdai.com", "password")

In [8]:
results = query_retrieval_model(retrieval_model_id, "how to upgrade ios version", token)

print(results)

[{'id': 9, 'text': "mobile plans home internet & tv packages and much more! | du why have my monthly recurring charges increased? * if there is an increase in your monthly recurring charges it could be any of the following reasons: o you've upgraded your plan to another plan with a higher monthly recurring charge. o you've purchased additional recurring or one-time bundles on your plan such as data minutes roaming bundles etc. o you've purchased a new device with an instalment plan * please refer to the bill guide to understand more about each component in your monthly du bill.", 'context': '', 'source': '/model_bazaar/models/250a9631-9627-47c8-bbdc-30ada92ef5f9/model.ndb/documents/3bbc2f6e-adaf-4d75-9e39-e5bba6b3f1b4/du_faqs.pdf', 'metadata': {'highlight': '{1: [12, 13, 14], 2: [0, 1, 2, 3, 4, 5, 6, 7]}', 'page': '1'}, 'source_id': '7f42a63a-2083-48da-b874-4a920938980d', 'score': 1.0522379875183105}, {'id': 64, 'text': 'you can register your gcc mastercard or visa credit card through:

In [9]:
token_tags = token_classifier_predict(token_model_id, "my name is david and my phone number is 9728172948", token)

print(token_tags)

{'query_text': 'my name is david and my phone number is 9728172948', 'tokens': ['my', 'name', 'is', 'david', 'and', 'my', 'phone', 'number', 'is', '9728172948'], 'predicted_tags': [['O'], ['O'], ['O'], ['NAME'], ['O'], ['O'], ['O'], ['O'], ['O'], ['PHONENUMBER']]}


In [10]:
sentiment_result = query_sentiment_model(sentiment_model_id, "I hate this movie", token)
sentiment_result

[['negative', 0.9343262314796448],
 ['positive', 0.041376352310180664],
 ['neutral', 0.024297324940562248]]

In [11]:
token = login("admin@thirdai.com", "password")

results = query_retrieval_model(retrieval_model_id, "how to upgrade ios version", token)

# results is a list of dictioaries with references
# results[0].keys()
# dict_keys(['id', 'text', 'context', 'source', 'metadata', 'source_id', 'score'])

token_tags = token_classifier_predict(token_model_id, "my name is david and my phone number is 9728172948", token)
# {'query_text': 'my name is david and my phone number is 9728172948', 'tokens': ['my', 'name', 'is', 'david', 'and', 'my', 'phone', 'number', 'is', '9729991112'], 'predicted_tags': [['O'], ['O'], ['O'], ['NAME'], ['O'], ['O'], ['O'], ['O'], ['O'], ['PHONENUMBER']]} 
print(token_tags)

sentiment_result = query_sentiment_model(sentiment_model_id, "I hate this movie", token)

print(sentiment_result)

{'query_text': 'my name is david and my phone number is 9728172948', 'tokens': ['my', 'name', 'is', 'david', 'and', 'my', 'phone', 'number', 'is', '9728172948'], 'predicted_tags': [['O'], ['O'], ['O'], ['NAME'], ['O'], ['O'], ['O'], ['O'], ['O'], ['PHONENUMBER']]}
[['negative', 0.9343262314796448], ['positive', 0.041376352310180664], ['neutral', 0.024297324940562248]]


In [12]:
sample_queries = [
    "how to pay my bill",
    "can I recharge for a friend",
    "can I recharge for a friend on the website instead of du app",
    "how to get refunds when I unsubscribe",
]

In [13]:
token = login("admin@thirdai.com", "password")

sentiment_map = {"0": "negative", "1": "neutral", "2": "positive"}

session_id = str(uuid.uuid4())

while True:
    user_input = input("Enter your query: ")
    token_tags_result = token_classifier_predict(token_model_id, user_input, token)
    tokens = token_tags_result['tokens']
    predicted_tags = token_tags_result['predicted_tags']
    sensitive_tokens = [tokens[i] for i in range(len(tokens)) if predicted_tags[i][0] != "O"]
    if sensitive_tokens:
        print("Sensitive Tokens Detected: ", sensitive_tokens, flush=True)
    else:
        print("No Sensitive Tokens Detected.", flush=True)
    sentiment_result = query_sentiment_model(sentiment_model_id, user_input, token)
    # print(sentiment_result, flush=True)
    if sentiment_result[0][1] > 0.7 or sentiment_result[2][1] > 0.7:
        if sentiment_result[0][1] > sentiment_result[2][1]:
            print(f"Sentiment result is negative")
        else:
            print(f"Sentiment result is positive")
    else:
        print(f"Sentiment result is neutral")
    # retrieval_results = query_retrieval_model(retrieval_model_id, user_input, token)
    # reference_text = "\n".join([result['text'] for result in retrieval_results])
    # print("Reference Text:\n", reference_text)
    # prompt = f""
    response = chat(retrieval_model_id, user_input, token, session_id)
    print("Chat Response:", response, flush=True)

No Sensitive Tokens Detected.
Sentiment result is neutral
Chat Response: {'status': 'success', 'message': 'Successful', 'data': {'response': "I'm ready! Please provide me with the user's questions and I'll do my best to answer them."}}
No Sensitive Tokens Detected.
Sentiment result is neutral
Chat Response: {'status': 'success', 'message': 'Successful', 'data': {'response': "To pay your bill, you can use the following methods:\n\n1. Quick pay: Quick and fast way to do the payment without the need to login by using your credit/debit card and international American Express credit cards.\n\n2. Quick pay machines located in most malls and du stores\n\n3. 4200 N. payment self service kiosks installed across all of the UAE to pay all your postpaid and fixed bills using cash or international/uae credit/debit card (American Express/visa/master/jcb)\n\n4. Other channels: You can make your payments through exchange houses, Al Ansa, Fardan, Redha, Al Ansari, Al Razouki, International Exchange, Re