In [None]:
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.document_loaders import UnstructuredFileLoader
from langchain.chains import RetrievalQAWithSourcesChain
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForCausalLM
from langchain import HuggingFacePipeline
from langchain.text_splitter import CharacterTextSplitter
import textwrap
import os
import torch

In [None]:
os.environ['HuggingFaceHub_API_Token'] = 'YOOUR_HUGGING_FACE_API_KEY'

In [None]:
!huggingface-cli login

In [None]:
loader = UnstructuredFileLoader('PATH_TO_YOUR_FOLDER')
documents = loader.load()

In [None]:
text_splitter=CharacterTextSplitter(separator='\n',
                                    chunk_size=1000,
                                    chunk_overlap=50)
text_chunks=text_splitter.split_documents(documents)

In [None]:
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',model_kwargs={'device': 'cuda'})

In [None]:
vectorstore=FAISS.from_documents(text_chunks, embeddings)

In [None]:
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")

In [None]:
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf",
                                             device_map='auto',
                                             torch_dtype=torch.float16,
                                             use_auth_token=True,
                                             load_in_8bit=True,
                                              #load_in_4bit=True
                                             )

In [None]:
pipeline = pipeline("text-generation",
                model=model,
                tokenizer= tokenizer,
                torch_dtype=torch.bfloat16,
                device_map="auto",
                max_new_tokens = 1024,
                do_sample=True,
                top_k=10,
                num_return_sequences=1,
                eos_token_id=tokenizer.eos_token_id
                )

In [None]:
llm=HuggingFacePipeline(pipeline=pipeline, model_kwargs={'temperature':0.3})

In [None]:
chain =  RetrievalQA.from_chain_type(llm=llm, chain_type = "stuff", return_source_documents=False, retriever=vectorstore.as_retriever())

In [None]:
query = "YOUR_QUESTION"
result=chain({"query": query}, return_only_outputs=True)
wrapped_text = textwrap.fill(result['result'], width=500)
wrapped_text

In [None]:
result['result']