## Import Libraries

In [1]:
import os
from langchain.graphs import Neo4jGraph
from langchain.chains import GraphCypherQAChain, RetrievalQA, ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings, HuggingFaceInstructEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.llms import CTransformers, HuggingFaceHub
from dotenv import load_dotenv
from langchain.memory import ConversationBufferMemory
from langchain.prompts import PromptTemplate

## Load all the env variables

In [7]:
load_dotenv()
os.environ['HUGGINGFACEHUB_API_TOKEN'] = "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"
# os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
# os.environ["NEO4J_URI"] = os.getenv("NEO4J_URI")
# os.environ["NEO4J_USERNAME"] = os.getenv("NEO4J_USERNAME")
# os.environ["NEO4J_PASSWORD"] = os.getenv("NEO4J_PASSWORD")

## We load the Neo4j knowledge graph

In [4]:
graph = Neo4jGraph(
    url=os.environ["NEO4J_URI"],
    username=os.environ["NEO4J_USERNAME"],
    password=os.environ["NEO4J_PASSWORD"],
)

### Here we load our documents (we only work with on pdf for now, but you can pass as much as you want)

In [3]:
def data_ingestion():
    loader = DirectoryLoader("data/", glob="*.pdf", loader_cls=PyPDFLoader)
    load_data = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=100)
    documents = text_splitter.split_documents(load_data)
    return documents

documents = data_ingestion()

In [6]:
# from peft import PeftModel, PeftConfig
# from transformers import AutoModelForCausalLM

# config = PeftConfig.from_pretrained("smit0104/research_summarization-mistral")
# model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1")
# llm = PeftModel.from_pretrained(model, "smit0104/research_summarization-mistral")

### Here we make a langchain pipeline that does the following:

1. Load the embeddings
2. Load our vectordatabase
3. Initialize our LLM
4. Prompt engineering
5. Bind everything together

In [15]:
custom_prompt_template = """Use the following pieces of information from the research paper to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

def set_custom_prompt():
    """
    Prompt template for QA retrieval for each vectorstore
    """
    prompt = PromptTemplate(template=custom_prompt_template,
                            input_variables=['context', 'question'])
    return prompt
def retrieval_qa_chain(llm, prompt, db, memory):
    
    qa_chain = ConversationalRetrievalChain.from_llm(llm=llm, 
                                                     memory=memory, 
                                                     retriever = db.as_retriever(search_kwargs={'k': 2}), 
                                                     combine_docs_chain_kwargs = {'prompt': prompt})

    # qa_chain = RetrievalQA.from_chain_type(llm=llm,
    #                                    chain_type='stuff',
    #                                    retriever=db.as_retriever(search_kwargs={'k': 2}),
    #                                    return_source_documents=True,
    #                                    chain_type_kwargs={'prompt': prompt}
    #                                    )
    return qa_chain

def load_llm():
    # Load the locally downloaded model here
    llm = HuggingFaceHub(repo_id = "mistralai/Mistral-7B-Instruct-v0.1", model_kwargs={'temperature': 0.5, "max_length": 512})
    return llm

def qa_bot(documents):
    embeddings = HuggingFaceInstructEmbeddings(model_name='hkunlp/instructor-base')
    db = FAISS.from_documents(documents, embeddings)
    llm = load_llm()
    qa_prompt = set_custom_prompt()
    memory = ConversationBufferMemory(memory_key = 'chat_history', return_messages=True, output_key='answer')
    qa = retrieval_qa_chain(llm, qa_prompt, db,memory)

    return qa

def final_result(query):
    qa_result = qa_bot()
    response = qa_result({'query': query})
    return response

In [16]:
qa_chain = qa_bot(documents)

load INSTRUCTOR_Transformer
max_seq_length  512




### We create a retreival chain that combines everything for us

In [51]:
query = "What does the paper conclude?"

In [52]:
print(
        qa_chain({"question": query})['answer']
    )

The paper concludes that abstractive summarization is a challenging NLP task, and that there is


### Create a Gradio environment to run on web

In [11]:
import gradio as gr

def get_result(query):
    print(query)
    return qa_chain({"query": query})['result']

demo = gr.Interface(fn=get_result, inputs="text", outputs="text")
    
if __name__ == "__main__":
    demo.launch(show_api=False, share=True)   

Running on local URL:  http://127.0.0.1:7861

Could not create share link. Please check your internet connection or our status page: https://status.gradio.app.


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
2023/11/30 11:36:35 [W] [service.go:132] login to server failed: DialTcpByHttpProxy error, StatusCode [403]
