<a href="https://colab.research.google.com/github/RuthNjeri6/LLM-llama-2-demo/blob/main/reacto_w3_indexing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Indexing using LLama-2 on colab

In [8]:
!pip -q install langchain==0.0.264 pypdf==3.15.1 torch accelerate==0.21.0 transformers==4.31.0 sentence_transformers==2.2.2 ctransformers==0.2.22 faiss-gpu

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 MB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [9]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.document_loaders import JSONLoader, DirectoryLoader, PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [None]:
DATA_PATH = './first_80_papers'
DB_FAISS_PATH = 'vectorstore/db_faiss'

In [6]:
# Create vector database
def create_vector_db():
    try:
        loader = DirectoryLoader(DATA_PATH, glob="./*.pdf", loader_cls=PyPDFLoader, show_progress=True, use_multithreading=True)
        #loader = DirectoryLoader(DATA_PATH, glob="./*.json", loader_cls=JSONLoader, loader_kwargs = {'jq_schema':'.pages[]'}, show_progress=True, use_multithreading=True)
    except Exception as e:
        print(e)
        return

    documents = loader.load()
    print(f"Loaded {len(documents)} documents")
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    texts = text_splitter.split_documents(documents)

    embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2', model_kwargs={'device': 'cuda'})

    db = FAISS.from_documents(texts, embeddings)
    db.save_local(DB_FAISS_PATH)

In [10]:
create_vector_db()

100%|██████████| 122/122 [01:08<00:00,  1.79it/s]


Loaded 994 documents


In [11]:
from langchain import PromptTemplate
from langchain.llms import CTransformers
from langchain.chains import RetrievalQA

In [12]:
DB_FAISS_PATH = 'vectorstore/db_faiss'

custom_prompt_template = """Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [13]:
def set_custom_prompt():
    """
    Prompt template for QA retrieval for each vectorstore
    """
    prompt = PromptTemplate(template=custom_prompt_template,
                            input_variables=['context', 'question'])
    return prompt

In [14]:
#Retrieval QA Chain
def retrieval_qa_chain(llm, prompt, db):
    qa_chain = RetrievalQA.from_chain_type(llm=llm,
                                       chain_type='stuff',
                                       retriever=db.as_retriever(search_kwargs={'k': 2}),
                                       return_source_documents=True,
                                       chain_type_kwargs={'prompt': prompt}
                                       )
    return qa_chain

In [None]:
#Loading the model
def load_llm():
    # Load the locally downloaded model here
    llm = CTransformers(
        model = "llama-2-7b-chat.ggmlv3.q8_0.bin",
        model_type="llama",
        max_new_tokens = 512,
        temperature = 0.5
    )
    return llm