In [1]:
from langchain_community.llms import LlamaCpp
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone as PineconeVecDb
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from pinecone import Pinecone

from dotenv import load_dotenv
import os

  from tqdm.autonotebook import tqdm


In [18]:
load_dotenv()
PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
PINECONE_ENV_NAME = os.getenv('PINECONE_ENV_NAME')
PINECONE_INDEX = os.getenv('PINECONE_INDEX')

### Functions

In [8]:
# FUNCTIONS
def load_data(path: str):
    loader = DirectoryLoader(path=path, glob='*.pdf', loader_cls=PyPDFLoader)
    return loader.load()

def split_text(documents):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    return text_splitter.split_documents(documents)

### Extracting Data

In [9]:
extracted_data = load_data('documents/')
overlap_data = split_text(extracted_data)

In [13]:
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/paraphrase-MiniLM-L6-v2')

  from .autonotebook import tqdm as notebook_tqdm


In [28]:
# init pinecone
pinecone_client = Pinecone(api_key=PINECONE_API_KEY)

In [33]:
docsearch = PineconeVecDb.from_documents(overlap_data, embeddings, index_name=PINECONE_INDEX)

In [36]:
q = 'What is a micro service?'

ans = docsearch.similarity_search(q, k=3)

In [48]:
prompt_template = """
Use the following informations to answer the question. If you don't know the answer, don't try to make it up. Only use the informations given!

Context: {context}
Question: {question}

Only return helpful answers below and nothing else.
Answer:
"""

In [49]:
prompt = PromptTemplate(template=prompt_template, input_variables=['context', 'question'])
chain_type_kwargs = {'prompt':prompt}

In [57]:
llama2 = LlamaCpp(
    model_path='models/llama-2.gguf',
    n_gpu_layers=20,
    n_batch=256,
    verbose=False
)

In [58]:
qa = RetrievalQA.from_chain_type(
    llm=llama2,
    chain_type='stuff',
    retriever=docsearch.as_retriever(search_kwargs={'k':2}),
    return_source_documents=True,
    chain_type_kwargs=chain_type_kwargs
)

In [63]:
res = qa({ "query" : "Why golang?"})

In [64]:
print(res['result'])

Golang is used because it is a good practice to have a stable Go environment for version 1.0.


In [65]:
res = qa({ 'query': 'is pitbull a dog?'})
print(res['result'])

Using the provided information and API, I have tried chaining the programs together and got the following results:

pitbull is a dog.

Therefore, the answer to your question is "Yes", pitbull is a dog.


In [66]:
print(res)

{'query': 'is pitbull a dog?', 'result': 'Using the provided information and API, I have tried chaining the programs together and got the following results:\n\npitbull is a dog.\n\nTherefore, the answer to your question is "Yes", pitbull is a dog.', 'source_documents': [Document(page_content="Chapter 4[ 115 ]confabulation\nschmooze\nNew World chat\nOld World chat\nconversation\nthrush\nwood warbler\nchew the fat\nshoot the breeze\nchitchat\nchatter\nThe results you get will most likely differ from what we have listed here since we're \nhitting a live API, but the important aspect here is that when we give a word or term \nas input to the program, it returns a list of synonyms as output, one per line.\nTry chaining your programs together in various orders to see what", metadata={'page': 129.0, 'source': 'documents/Go Microsevices.pdf'}), Document(page_content='you can. For an up-to-date list of what the vet tool will report on, check out the \ndocumentation at https://godoc.org/code.goo