In [78]:
# !pip install langchain
# !pip install langchain_community
# !pip install PyPDF
# !pip install spacy
# !pip install -qU langchain-text-splitters
# !pip install langchain_chroma
# !pip install -U sentence-transformers
# !pip install langchain_huggingface
#!pip install gradio

In [4]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
import chromadb
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings.spacy_embeddings import SpacyEmbeddings
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.chat_models import ChatOllama
from langchain_core.runnables import RunnablePassthrough
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain.chains import RetrievalQA
import gradio as gr

client = chromadb.PersistentClient(path='java-vector')
collection = client.create_collection(name="java-vector")

In [5]:
def loader_pdf(path):
    loader = PyPDFLoader(path)
    return loader.load()

In [6]:
def processing_text(text):
    
    texto = []
    for i in range(len(text)):
        texto.append(text[i].page_content)
            
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=0,
    )
    
    texts = text_splitter.split_documents(text)

    return texts

In [7]:
def create_db(text):
    vector_db = Chroma.from_documents(
        documents=text,
        embedding= SpacyEmbeddings(model_name="pt_core_news_md"),
        collection_name="java-vector")
    return vector_db

In [13]:
def search_llm(vector_db):
    local_model = 'llama2'
    llm = ChatOllama(model=local_model)
    
    retriever = vector_db.as_retriever(search_kwargs={'k': 2})
    PROMPT_TEMPLATE = """Answer the question based ONLY on the following context in portuguese and concise answer: {context}.
    If you don't know the answer, just say that you don't know, don't try to male up an answer.
    Question: {question}"""
    
    QA_CHAIN_PROMPT = PromptTemplate.from_template(PROMPT_TEMPLATE)
    
    chain = RetrievalQA.from_chain_type(
        llm,
        'stuff',
        retriever=retriever,
        return_source_documents=True,
        chain_type_kwargs={'prompt': QA_CHAIN_PROMPT}
            )
    return chain

In [14]:
text = loader_pdf('/home/nayra/Downloads/JavaBasico.pdf')
texto_novo = processing_text(text)
vector_db = create_db(texto_novo)
chain = search_llm(vector_db)

In [None]:
chat_history = []

with gr.Blocks() as demo:

    chatbot = gr.Chatbot()
    msg = gr.Textbox()
    clear = gr.Button("Clear")

    chat_history = []
    
    def user(user_message, chat_history):
        
        # Retorna resposta da LLM, através da sessão de Q&A
        result = chain({"query": user_message})
        
        # Realiza um append na tela do chat, contendo a mensagem do usuário e a resposta do modelo
        chat_history.append((user_message, result["result"]))

        return gr.update(value=""), chat_history
    
    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False)
    clear.click(lambda: None, None, chatbot, queue=False)

if __name__ == "__main__":
    demo.launch(debug=True)

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


In [3]:
client.delete_collection(name="java-vector")