In [None]:
#imports
import langchain 
from torch import cuda, bfloat16
import torch
import transformers
from transformers import AutoTokenizer
from time import time
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFacePipeline
from langchain.document_loaders import TextLoader,PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import MapReduceDocumentsChain, ReduceDocumentsChain,ConversationalRetrievalChain,StuffDocumentsChain
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import CTransformers
from langchain.vectorstores import FAISS
from langchain.memory import ConversationBufferMemory
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain import PromptTemplate, LLMChain

In [None]:
#loading downloaded llm suitable for local usage,temperature(entropy/randomness in answer):1e-2
llm = CTransformers(model=r"C:\Users\Medha\miniconda3\m3_topic_summ\models\llama-2-7b-chat.ggmlv3.q2_K.bin", model_type="llama", streaming=True, 
                    callbacks=[StreamingStdOutCallbackHandler()],
                    config={'max_new_tokens':4096,'temperature':0.01, 'context_length':4096})



In [None]:
#load the pdf files from the path
loader = PyPDFLoader(r'C:\Users\Medha\miniconda3\m3_topic_summ\data\lec1\lec1_transcript.pdf')
docs = loader.load()  

# #load the pdf files from the path
# loader = DirectoryLoader(r'C:\Users\Medha\miniconda3\m3_topic_summ\data\textbooks_extra_materials',glob="*.pdf",loader_cls=PyPDFLoader)
# docs = loader.load()


In [None]:
text_splitter  = RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=50)
text_chunks = text_splitter.split_documents(docs)

#create embeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
                                   model_kwargs={'device':"cpu"})

#vectorstore
vector_store = FAISS.from_documents(text_chunks,embeddings)


In [None]:
question = "What is t-test?"
sim_docs = vector_store.similarity_search(question)
len(sim_docs)

In [None]:
retriever = vector_store.as_retriever()

qa = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="map_reduce", 
    retriever=retriever, 
    verbose=True
)

In [None]:


def test_rag(qa, query):
    print(f"Query: {query}\n")
    time_1 = time()
    result = qa.run(query)
    time_2 = time()
    print(f"Inference time: {round(time_2-time_1, 3)} sec.")
    print("\nResult: ", result)



In [None]:
query = "Mention the examples and how they were solved by t-test?Also mention the key points that were used to solve the probelms in t-test"
test_rag(qa, query)

