In [None]:
from flask import Flask, render_template, jsonify, request
#from src.helper import download_hugging_face_embeddings

#from langchain_pinecone import PineconeVectorStore
from langchain.vectorstores import Pinecone as  PineconeVectorStore
import pinecone
from langchain.vectorstores import Pinecone
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers
from langchain.chains import RetrievalQA
from dotenv import load_dotenv
#from src.prompt import *
import os
import sentence_transformers
import chromadb

from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings

from langchain_community.vectorstores import Chroma

In [None]:
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
#from langchain.chains import LLMChain
from langchain.llms import LlamaCpp
#from langchain.prompts import PromptTemplate

In [None]:
#Method to parse all the documents in the root directory
#Each page is read into DirectoryLoader object with each element representing a page
def pdf_loader(pdf_dir):
    loader = DirectoryLoader(pdf_dir, glob='*.pdf', loader_cls=PyPDFLoader)
    documents = loader.load()
    return documents  
#Each page is then split into chunks of max size 500
#Also each chunk overlaps each other by 20 tokens
def text_splitter(exrtacted_doc):
    splitter_obj = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    split_doc = splitter_obj.split_documents(exrtacted_doc)
    return split_doc
#Embeding model to conver the word to embeddings
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings


In [None]:
extracted_doc = pdf_loader('data/')
split_doc = text_splitter(extracted_doc)

In [None]:
#embed_methd =  download_hugging_face_embeddings
from langchain_community.embeddings.sentence_transformer import (
    SentenceTransformerEmbeddings,
)
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

In [None]:
out = embedding_function.embed_query("My name is Tharun")
print(len(out))

In [None]:
#save tp disk
#db = Chroma.from_documents(split_doc, embedding_function, persist_directory="./chroma_db")

#load from disk
db = Chroma(persist_directory="./chroma_db", embedding_function=embedding_function)

In [None]:
query = "What causes allergies?"
result = db.similarity_search(query)
print(result[0].page_content)

In [None]:
prompt_template="""
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""
PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs = {"prompt":PROMPT}

In [None]:

llm=CTransformers(model="model/llama-2-7b-chat.ggmlv3.q4_0.bin",
                  model_type="llama",
                  config={'max_new_tokens':512,
                          'temperature':0.8}
                          )

In [None]:
!CMAKE_ARGS="-DLLAMA_CUBLAS=on"  FORCE_CMAKE=1 pip install --upgrade --force-reinstall llama-cpp-python --no-cache-dir

In [None]:
n_gpu_layers=-1
n_batch = 512
# Callbacks support token-wise streaming
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
# Make sure the model path is correct for your system!
llm = LlamaCpp(
    model_path="model/llama-2-7b-chat.ggmlv3.q4_0.bin",
    temperature=0.75,
    max_tokens=512,
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    callback_manager=callback_manager,
    verbose=True,  # Verbose is required to pass to the callback manager
)

In [None]:
#Use Langchain chain concept 
qa=RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=db.as_retriever(search_kwargs={'k': 2}),
    return_source_documents=True, 
    chain_type_kwargs=chain_type_kwargs)

In [None]:
query = "What is acne?"
result = qa({'query':query})
#print(result[0].page_content)
print(result["result"])

In [None]:
import vllm