This is going to be the frontend to interact with our rag application.

In [1]:
import google.generativeai as genai
from pinecone import Pinecone
import gradio as gr

In [2]:
from google.api_core.exceptions import FailedPrecondition

In [3]:
import os
# INITIALIZATION AND CONFIGURATION
# Gemini
genai.configure(api_key=os.getenv('GOOGLE_API_KEY'))
model = genai.GenerativeModel('gemini-pro')

# Pinecone initialization
pc = Pinecone(api_key=os.getenv('PINECONE_API_KEY'), environment='gcp-starter')
index_name = pc.list_indexes()[0]['name']
index = pc.Index(index_name)
namespace = 'Econwiki'

In [4]:
# query = "What is life?"
def pinecone_query(query: str, with_expansion: bool = False) -> str:
    # TODO: Introduce more parameters rather than rely on  hardcoded values
    try:
        if with_expansion:
            system_prompt = f"""
                You are a useful informative assistant
                You are to be issued with a question
                You will create similar questions 
                    similar scope, similar topics and themes,
                The point is to expound on the original question
                    with related questions
                No need to write 'Similar questions'
                or do extensive formatting

                The question is attached after this.

            """
            query = system_prompt + query

        query_vector_ = genai.embed_content(content= query,
                                            model='models/embedding-001')
        query_vector = query_vector_['embedding']

        res = index.query(
            top_k=5,
            vector=query_vector,
            include_metadata=True,
            namespace=namespace
        )

        return '\n\n'.join([match['metadata']['text'] for match in res['matches']])
    
    except FailedPrecondition:
        # TODO: Raise descriptive gradio error
        raise gr.Error("Server may be using a VPN. Please disconnect and try again.")

In [5]:
# from pprint import pprint
# # Querying with Expansion
# # Used to refine retrieval by generating related queries that may yield a better result
# def pinecone_query_with_expansion(query):
#     system_prompt = f"""
#         You are a useful informative assistant
#         You are to be issued with a question
#         You will create similar questions 
#             similar scope, similar topics and themes,
#         The point is to expound on the original question
#             with related questions
#         No need to write 'Similar questions'
#         or do extensive formatting

#         The question is attached after this.

#     """
#     augmented_query = system_prompt + query
#     res = model.generate_content(augmented_query)
#     return res.text

# pprint(pinecone_query_with_expansion('What is my name?'))

In [6]:
def llm_answer_query(context: str) -> str:
    prompt = f"""
        You are provided with a text to summarize
        There may be a main topic or theme that you can identify
        Synthesize the various aspects of the texts to create a concise yet informative summary
        The text follows below:

        {context}

    """
    res = model.generate_content(prompt)
    return res.text

In [7]:
# GRADIO APP
with gr.Blocks() as demo:
    input = gr.Textbox('what is the role of the IMF in Kenya?', label='Query', placeholder='Enter your question here.')
    button = gr.Button('Submit')

    llm_output = gr.Markdown(label="Answer")
    expand_button = gr.Button('Expand Answer', value=True)      # Passes True value to query function to enable expaned query
    
    with gr.Accordion("Retrieved Content", open=False):
        retrieved_output = gr.Markdown()

    # button.click(fn=pinecone_query, inputs=[input], outputs=[retrieved_output])
    # Multiple triggers
    gr.on(triggers=[button.click, input.submit],
          fn=pinecone_query,
          inputs=[input], 
          outputs=[retrieved_output],
          )
    expand_button.click(fn=pinecone_query, inputs=[input, expand_button], outputs=[retrieved_output])
    retrieved_output.change(fn=llm_answer_query, inputs=[retrieved_output], outputs=[llm_output])

demo.launch()

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




Traceback (most recent call last):
  File "c:\Users\USER\AppData\Local\Programs\Python\Python310\lib\site-packages\google\api_core\grpc_helpers.py", line 75, in error_remapped_callable
    return callable_(*args, **kwargs)
  File "c:\Users\USER\AppData\Local\Programs\Python\Python310\lib\site-packages\grpc\_channel.py", line 1161, in __call__
    return _end_unary_response_blocking(state, call, False, None)
  File "c:\Users\USER\AppData\Local\Programs\Python\Python310\lib\site-packages\grpc\_channel.py", line 1004, in _end_unary_response_blocking
    raise _InactiveRpcError(state)  # pytype: disable=not-instantiable
grpc._channel._InactiveRpcError: <_InactiveRpcError of RPC that terminated with:
	status = StatusCode.RESOURCE_EXHAUSTED
	details = "Resource has been exhausted (e.g. check quota)."
	debug_error_string = "UNKNOWN:Error received from peer ipv4:172.217.170.170:443 {created_time:"2024-06-14T09:51:14.9191042+00:00", grpc_status:8, grpc_message:"Resource has been exhausted (e.g.

### Remarks
1. Query expansion does not appear to be very succesful. It does not extract the document summaries that RAPTOR produces.
2. App control flow is such that one can only expand answer once. This should be increased by setting better event listening flow.