# Using HuggingFace Load model directly method

* AutoModelForCausalLM

In [None]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.retrievers import BM25Retriever, EnsembleRetriever

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings

In [None]:
import torch
from transformers import AutoModelForCausalLM

In [None]:
file_path = "../data/Orca Progressive Learning from Complex.pdf"
data_file = PyPDFLoader(file_path)
docs = data_file.load()

In [None]:
# create chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
chunks = splitter.split_documents(docs)

In [None]:
HF_TOKEN = input("Enter your HuggingFace Token")

In [None]:
# https://huggingface.co/BAAI/bge-base-en-v1.5
embeddings = HuggingFaceInferenceAPIEmbeddings(
    api_key=HF_TOKEN, model_name="BAAI/bge-base-en-v1.5"
)

In [None]:
# retrieve k
k = 5

In [None]:
vector_store = Chroma.from_documents(chunks, embeddings)

In [None]:
vector_retriever = vector_store.as_retriever(search_kwargs={"k": k})

In [None]:
semantic_retriever = BM25Retriever.from_documents(chunks)
semantic_retriever.k = k

In [None]:
ensemble_retriever = EnsembleRetriever(
    retrievers=[vector_retriever, semantic_retriever], weights=[0.5, 0.5]
)

In [None]:
# tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
llm = AutoModelForCausalLM.from_pretrained(
    "HuggingFaceH4/zephyr-7b-beta", torch_dtype=torch.bfloat16, low_cpu_mem_usage=True
)

In [None]:
template = """
<|system|>
You are a helpful AI Assistant that follows instructions extremely well.
Use the following context to answer user question.

Think step by step before answering the question.
You will get a $100 tip if you provide correct answer.

CONTEXT: {context}
</s>
<|user|>
{query}
</s>
<|assistant|>
"""

In [None]:
prompt = ChatPromptTemplate.from_template(template)
output_parser = StrOutputParser()

In [None]:
chain = (
    {"context": ensemble_retriever, "query": RunnablePassthrough()}
    | prompt
    | llm
    | output_parser
)

In [None]:
print(chain.invoke("What is instruction tuning?"))