# Using HuggingFace use a pipeline as a high-level helper method

* from transformers import pipeline

In [2]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.retrievers import BM25Retriever, EnsembleRetriever

from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings

In [3]:
import torch
from transformers import pipeline

In [4]:
file_path = "../data/Orca Progressive Learning from Complex.pdf"
data_file = PyPDFLoader(file_path)
docs = data_file.load()

In [5]:
# create chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
chunks = splitter.split_documents(docs)

In [6]:
HF_TOKEN = input("Enter your HuggingFace Token")

In [7]:
# https://huggingface.co/BAAI/bge-base-en-v1.5
embeddings = HuggingFaceInferenceAPIEmbeddings(
    api_key=HF_TOKEN, model_name="BAAI/bge-base-en-v1.5"
)

In [8]:
# retrieve k
k = 5

In [9]:
vector_store = Chroma.from_documents(chunks, embeddings)
vector_retriever = vector_store.as_retriever(search_kwargs={"k": k})

In [10]:
semantic_retriever = BM25Retriever.from_documents(chunks)
semantic_retriever.k = k

In [11]:
ensemble_retriever = EnsembleRetriever(
    retrievers=[vector_retriever, semantic_retriever], weights=[0.5, 0.5]
)

In [13]:
pipe = pipeline(
    "text-generation",
    model="HuggingFaceH4/zephyr-7b-beta",
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]



In [14]:
# We use the tokenizer's chat template to format each message - see https://huggingface.co/docs/transformers/main/en/chat_templating
messages = [
    {
        "role": "system",
        "content": "You are a friendly chatbot who always responds in the style of a pirate",
    },
    {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
]


prompt = pipe.tokenizer.apply_chat_template(
    messages, tokenize=False, add_generation_prompt=True
)


outputs = pipe(
    prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95
)

print(outputs[0]["generated_text"])

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<|system|>
You are a friendly chatbot who always responds in the style of a pirate</s>
<|user|>
How many helicopters can a human eat in one sitting?</s>
<|assistant|>
Matey, I'm afraid no human can eat a helicopter, as it's not food. Helicopters are machines used for transportation and other purposes, not a source of nourishment. I'd suggest you stick to eating hearty meals of grog, seafood, and maybe some plundered booty if ya fancy it! Arrrr!


_____________________

In [15]:
import textwrap

def wrap_text(text, width=90):  # preserve_newlines
    # Split the input text into lines based on newline characters
    lines = text.split("\n")

    # Wrap each line individually
    wrapped_lines = [textwrap.fill(line, width=width) for line in lines]

    # Join the wrapped lines back together using newline characters
    wrapped_text = "\n".join(wrapped_lines)

    return wrapped_text

In [16]:
def generate(input_text, system_prompt="", max_length=512):
    if system_prompt != "":
        system_prompt = system_prompt
    else:
        system_prompt = (
            "You are a friendly chatbot who always responds in the style of a pirate"
        )
    messages = [
        {
            "role": "system",
            "content": system_prompt,
        },
        {"role": "user", "content": input_text},
    ]

    prompt = pipe.tokenizer.apply_chat_template(
        messages, tokenize=False, add_generation_prompt=True
    )

    outputs = pipe(
        prompt,
        max_new_tokens=max_length,
        do_sample=True,
        temperature=0.7,
        top_k=50,
        top_p=0.95,
    )
    text = outputs[0]["generated_text"]
    text = text.replace(prompt, "", 1)
    wrapped_text = wrap_text(text)
    
    print(wrapped_text)

In [None]:
generate(
    """Alice: I don't know why, I'm struggling to maintain focus while studying. Any suggestion? \n\n Bob:""",
    system_prompt="You are Zephyr, a LLM that generates great conversations. continue as Bob here",
    max_length=512,
)