# Answers queries based on Weaviate db search
### 1. Search the weaviate database for the top k most relevent textContent objects using a hybrid search
### 2. Build a string of context to be fed into the reasoning llm
### 3. Insert the context into the llm and format the answer properly
### 4. Print result to the console

In [None]:
import weaviate
import json
import os
from datetime import datetime
import time
import html2text
import openai


WEAV_CLUSTER_URL = "https://bu-cluster-2-o5pekqq0.weaviate.network"
WEAV_API_KEY = "vXNsRxv6vSJ57r0JKOJxhlBwMDIBadbyvjGC"
OPENAI_API_KEY = "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"

openai.api_key = OPENAI_API_KEY


# helper function to print json in a pretty way
def prettify(json_dict: dict) -> None:
    """
    This function prints a JSON dictionary in a pretty way.

    Args:
        json_dict: A JSON dictionary
    """
    print(json.dumps(json_dict, indent=2))


def connect_to_weaviate(weav_cluster_url: str, weav_api_key: str, openAI_api_key: str) -> weaviate.Client:
    """
    This function connects to a Weaviate instance.

    Args:
        None
    """
    client = weaviate.Client(
        url=weav_cluster_url,  
        auth_client_secret=weaviate.AuthApiKey(api_key=weav_api_key),  
        additional_headers={
            "X-OpenAI-Api-Key": openAI_api_key
        }
    )

    return client


def query_weaviate(client: weaviate.Client, prompt: str, top_k: int) -> dict:
    """
    This function queries Weaviate for the most similar context to the prompt.
    
    Args:
        prompt: The prompt to query Weaviate with
        client: The Weaviate client
        n: The number of results to return
    """

    results = (
        client.query
            .get("Jonahs_weaviate_TextContent", ["text", "contentOf { ... on Jonahs_weaviate_Webpage { title } }"])
            .with_hybrid(
                query=prompt,
                alpha=0.75
            )
            .with_limit(top_k)
            .do()
    )

    result_dict = {}

    # Return the 'cleanText' property of the results
    # print(results['data']['Get']['TextContent'])

    for result in results['data']['Get']['Jonahs_weaviate_TextContent']:
        result_dict[result['text']] = result['contentOf'][0]['title']

    return result_dict


def get_answer(
    client: weaviate.Client,
    query: str,
    top_k: int=3,
    model: str="text-davinci-003",
    max_len_context: int=6000,
    max_tokens_in_response: int=2000,
    size: str="ada",
    debug: bool=False,
    stop_sequence: str=None,
) -> str:
    """
    This function answers a question based on a context.

    Args:
        client: The Weaviate client
        query: The query to use to find the context
        top_k: The number of results to use as context
        model: The OpenAI model to use
        max_len_context: The maximum length of the context
        max_tokens_in_response: The maximum number of tokens in the response
        size: The size of the model
        debug: Whether to print debug information
        stop_sequence: The sequence to stop the response at
    """

    results = query_weaviate(client=client, prompt=query, top_k=top_k)

    count = 0
    context = ""
    for result in results.keys():
        if count >= top_k:
            break
        else:
            context += result + "\n"
            count += 1
    
    context = context[:max_len_context]
    lst_sources = list(set(results.values()))[:top_k]

    if debug:      
        print("Context:\n" + context)
        print("\n\n")

    try:
        # Create a response using the question and context
        response = openai.Completion.create(
            prompt=f"Answer the question based on the context below, and if the question can't be answered based on the context, say \"I don't know\"\n\nContext: {context}\n\n---\n\nQuestion: {query}\nAnswer: \n\n ",
            temperature=0,
            max_tokens=max_tokens_in_response,
            top_p=1,
            frequency_penalty=0,
            presence_penalty=0,
            stop=stop_sequence,
            model=model,
        )
        answer = response["choices"][0]["text"].strip()

        full_answer = f"Question: {query}\n\nAnswer: {answer}\n\nSource: {lst_sources}\n\n"

        return full_answer
    except Exception as e:
        print(e)
        return ""
    

def print_answer(query: str) -> None:
    """
    This function prints the answer to a query.

    Args:
        query: The query to answer
    """

    # Connect to Weaviate
    weav_client = connect_to_weaviate(weav_cluster_url=WEAV_CLUSTER_URL, weav_api_key=WEAV_API_KEY, openAI_api_key=OPENAI_API_KEY)

    # Answer the query
    answer = get_answer(client=weav_client, query=query, top_k=3, model="text-davinci-003")

    # Print the answer
    print(answer)


if __name__ == "__main__":
    print_answer("How do I know if I need to be covered by health insurance?")