<h1 align=center> Possible Fixes </h1>

1. Refine vectorstore integration. Retrieval is very weak. It isnt extracting the documents it is meant to.
    - Redefine similarity search using [docs](https://python.langchain.com/docs/integrations/vectorstores/opensearch#maximum-marginal-relevance-search-mmr)

2. Use a different chain type in RetrievalQA,
    either map_reduce

In [1]:
import os
import uuid
import pinecone
import gradio as gr
import google.generativeai as palm

from langchain.chains import RetrievalQA
from langchain.llms import GooglePalm
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings import GooglePalmEmbeddings
from langchain.vectorstores.pinecone import Pinecone

  from tqdm.autonotebook import tqdm


### Helper Functions

In [2]:
def get_completion(prompt):
    palm.configure(api_key=os.getenv("PALM_API_KEY"))
    res = palm.chat(prompt=prompt)
    return res.last

In [3]:

def read_txt(file):
    with open(file.name, encoding='utf-8') as f:
        return f.read()

In [4]:
def read_pdf(file):
    loader = PyPDFLoader(file)
    pages = loader.load_and_split()
    
    text = [page.page_content for page in pages]
    return text[0]

In [5]:
def vectorize(docs, query, temperature=0.7, chain_type='stuff', verbose=False,
              google_api_key=os.getenv("PALM_API_KEY"), pinecone_api_key=os.getenv("PINECONE_API_KEY")):
    """
    Integrate with pinecone vector store and wllow querying.
    """

    pinecone.init(
        api_key=pinecone_api_key,
        environment='gcp-starter'
    )

    index_name = pinecone.list_indexes()[0]
    index = pinecone.Index(index_name)
    index.delete(delete_all=True, namespace='')              # Clear the default namespace everytime for new documents

    embeddings = GooglePalmEmbeddings(google_api_key=google_api_key)

    docsearch = Pinecone.from_texts(docs, embeddings, index_name=index_name)
    docs = docsearch.similarity_search(query)
    doc_text = [doc.page_content for doc in docs][0]

    retriever = docsearch.as_retriever()
    
    llm = GooglePalm(google_api_key=google_api_key, temperature=temperature)
    
    vector_query = RetrievalQA.from_chain_type(
        llm=llm, 
        chain_type=chain_type, 
        retriever=retriever, 
        verbose=True
    )

    templated_query = f"""
        You are a consultant \
        Your job is to ingest data from documents and come up with concise ways of expressing it \
        In other words, you will summarize documents for audiences \
        You will target a novive audience who may not have prior knowledge of the text \
        Whenever you do not have enough information to summarize say explicitly:
            "I do not have enough information to summarize"
        Otherwise provide a well annotated summary \
        If there are points use numbered or bulleted lists \
        Highlight important points \
        Provide an introduction and conclusion whenever necessary \
        
        You are provided with the user query below:

        {query}


    """
    res = vector_query.run(templated_query)

    if verbose:
        print(doc_text)
        print(index.describe_index_stats())

    return res

### Workflow

In [6]:
def main(query, file):
    """
    Wrapper for all functions in the backend that server the gradio ui frontend
    """

    docs = read_pdf(file)

    res = vectorize(docs, query, verbose=True)
    
    return res

In [7]:
default_text = "could you provide a summary of azure with reference to these documents"

In [8]:
# iface = gr.Interface(fn=main, inputs=[gr.Textbox(value=default_text), "file"], outputs=gr.Markdown())
# iface.launch()

In [9]:
def main(query, history, file):
    """
    Wrapper for all functions in the backend that server the gradio ui frontend
    """

    docs = read_pdf(file)

    res = vectorize(docs, query, verbose=True)
    
    yield res

In [10]:
app = gr.ChatInterface(
    fn=main,
    additional_inputs=[gr.File()]
)
app.launch()

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.






[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m
Quick R eview Quick R eview
172
{'dimension': 768,
 'index_fullness': 0.02,
 'namespaces': {'': {'vector_count': 2000}},
 'total_vector_count': 2000}
