In [None]:
from torch import cuda, bfloat16
import transformers

model_id = 'meta-llama/Llama-2-7b-chat-hf'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

# set quantization configuration to load large model with less GPU memory
# this requires the `bitsandbytes` library
bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)

# begin initializing HF items, you need an access token
hf_auth = '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'
model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,
    device_map='auto',
    use_auth_token=hf_auth
)

# enable evaluation mode to allow model inference
model.eval()

print(f"Model loaded on {device}")




Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cuda122.so
CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so
CUDA SETUP: Highest compute capability among GPUs detected: 7.5
CUDA SETUP: Detected CUDA version 122
CUDA SETUP: Loading binary /usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cuda122.so...


  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

You are calling `save_pretrained` to a 4-bit converted model, but your `bitsandbytes` version doesn't support it. If you want to save 4-bit models, make sure to have `bitsandbytes>=0.41.3` installed.


Model loaded on cuda:0


In [None]:
tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)



In [None]:
stop_list = ['\nHuman:', '\n```\n']

stop_token_ids = [tokenizer(x)['input_ids'] for x in stop_list]
stop_token_ids

import torch

stop_token_ids = [torch.LongTensor(x).to(device) for x in stop_token_ids]
stop_token_ids

[tensor([    1, 29871,    13, 29950,  7889, 29901], device='cuda:0'),
 tensor([    1, 29871,    13, 28956,    13], device='cuda:0')]

In [None]:
from langchain.document_loaders import TextLoader

loader = TextLoader("out.txt")
documents = loader.load()


In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
all_splits = text_splitter.split_documents(documents)

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {"device": "cuda"}

embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)

# storing embeddings in the vector store
vectorstore = FAISS.from_documents(all_splits, embeddings)



In [None]:
from transformers import StoppingCriteria, StoppingCriteriaList

# define custom stopping criteria object
class StopOnTokens(StoppingCriteria):
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        for stop_ids in stop_token_ids:
            if torch.eq(input_ids[0][-len(stop_ids):], stop_ids).all():
                return True
        return False

stopping_criteria = StoppingCriteriaList([StopOnTokens()])

In [None]:
generate_text = transformers.pipeline(
    model=model,
    tokenizer=tokenizer,
    return_full_text=True,  # langchain expects the full text
    task='text-generation',
    # we pass model parameters here too
    stopping_criteria=stopping_criteria,  # without this model rambles during chat
    temperature=0.7,  # 'randomness' of outputs, 0.0 is the min and 1.0 the max
    max_new_tokens=512,  # max number of tokens to generate in the output
    repetition_penalty=1.1  # without this output begins repeating
)

In [None]:
res = generate_text("Explain me what Mr.Beast does")
print(res[0]["generated_text"])

Explain me what Mr.Beast does?
MrBeast, whose real name is Jimmy Donaldson, is a popular American YouTuber and philanthropist known for his generous acts of kindness and large-scale charity giveaways. He has gained a massive following on YouTube and other social media platforms due to his outrageous stunts, challenges, and donations to various causes.

Here are some examples of what MrBeast does:

1. Large-scale charity giveaways: MrBeast is famous for organizing and funding large-scale charity giveaways, often involving thousands or even millions of dollars in donations. These giveaways can range from paying off mortgages for struggling homeowners to buying new cars for families in need.
2. Outrageous stunts and challenges: MrBeast is known for performing outrageous stunts and challenges, such as eating 100 hot peppers in one sitting, jumping into a pool filled with sharks, or attempting to break world records. These stunts often go viral and help to increase his popularity.
3. Vlogs 

In [None]:
from langchain.llms import HuggingFacePipeline

llm = HuggingFacePipeline(pipeline=generate_text)

# checking again that everything is working fine
llm(prompt="Explain me what Mr.Beast does")

  warn_deprecated(


" in his videos?\nMrBeast, whose real name is Jimmy Donaldson, is a popular YouTuber known for his eccentric and generous acts of kindness. In his videos, he often performs outrageous stunts, gives away large sums of money to random people, or engages in other forms of charity work. He has gained a massive following on YouTube due to his over-the-top antics and generosity. Some common themes in his videos include:\n\n1. Giveaways: MrBeast frequently gives away large sums of money to random people, often in unexpected ways. For example, he might surprise someone by giving them $10,000 cash or a new car.\n2. Stunts: MrBeast loves to perform outrageous stunts, such as jumping off a cliff into a lake, attempting to break world records, or doing crazy challenges like eating 100 hot wings in one sitting.\n3. Charity work: MrBeast is known for his philanthropic efforts, often donating millions of dollars to various causes, such as disaster relief, medical expenses, or supporting small busines

In [None]:
from langchain.chains import ConversationalRetrievalChain

chain = ConversationalRetrievalChain.from_llm(llm, vectorstore.as_retriever(), return_source_documents=True)

In [None]:
chat_history = []

query = "What does mr.beast do for a living and what kinda person he is"
result = chain({"question": query, "chat_history": chat_history})

print(result['answer'])

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


 Mr. Beast is a popular YouTuber known for his generous acts of charity and giveaways. He has a large following on social media and uses his platform to raise money for various causes, such as buying houses for families in need, funding scholarships, and providing financial support to individuals who are experiencing hardship. Based on the text provided, it seems that Mr. Beast is a kind and generous person who is passionate about helping others.


In [None]:
chat_history = [(query, result["answer"])]

query = "give an example where he might have helped someone"
result = chain({"question": query, "chat_history": chat_history})

print(result['answer'])

  Of course! One notable example is when Mr. Beast surprised a terminally ill cancer patient named Tyler with a life-changing donation of $30,000. Tyler had been diagnosed with kidney cancer at the age of 19 and had already gone through multiple rounds of chemotherapy and radiation treatment. Despite his young age, he had already accumulated a significant amount of medical debt and was struggling to afford basic necessities like food and rent.

In response to Tyler's story, which he shared on social media, Mr. Beast reached out to him directly and offered to help alleviate some of his financial burdens. He arranged to meet Tyler in person and presented him with a surprise donation of $30,000 in cash. This generous gift allowed Tyler to pay off his remaining medical debts and gave him some much-needed financial stability during a difficult time.


In [None]:
print(result['source_documents'])


[Document(page_content="Beast. Howdy, Ben. You're doing good. I'm doing great. Atlas was our contestant before you He decided to pay it forward and give you five grand you can head out though Just get it come back So that five thousand dollars is from Atlas shot out to him now. Can I see your phone? Yes You can we're gonna open up YouTube. We're gonna search Mr. Beast. Okay. It's nice to see your subscribe Every single Mr. Beast video you've watched I will give you one hundred dollars Oh As you can see under I adopted every dog in a dog shelter. There's a red bar Which means you watched it and you get a hundred bucks bring the briefcase You got a hundred dollars. Thank you. I bought the world's largest firework. Here's a hundred dollars. Thank you I broke into a house and left fifty grand The other would you rather? I've built the world's largest like a tower no red bar. Oh, he skipped that was a good one He redeemed himself by watching I spent 50 hours in solitary confinement and he w

In [None]:
chat_history = []

# Define a function to build a context-rich prompt
def create_prompt(query, chat_history):
    # Context to mimic MrBeast's style
    style_context = "MrBeast, known for his extravagant and philanthropic YouTube challenges, " \
                    "combines entertainment with large-scale charity. He often gives away large " \
                    "sums of money, cars, or even houses to random people or his subscribers. " \
                    "His presentations are high-energy and always aimed at creating fun, shocking, " \
                    "and heartfelt moments.Talk like him by using given database in a funky way "

    # Combine historical chat context, style context, and the current query
    full_prompt = f"{style_context} {' '.join(chat_history)} {query}"
    return full_prompt

# Example query about MrBeast
query = "how much money can you give me?"

# Build the enhanced prompt with context
enhanced_prompt = create_prompt(query, chat_history)

# Assuming 'chain' is a function to pass the prompt to a model like GPT or LLAMA and get a response
result = chain({"question": enhanced_prompt, "chat_history": chat_history})

# Add current interaction to the history for context in future queries
chat_history.append(query)
chat_history.append(result['answer'])

print(result['answer'])


 MrBeast, you are truly a generous soul! *adjusts sunglasses* I can give you... *pauses dramatically*...one MILLION DOLLARS! 💸😱 But wait, there's more! *winks* I'll also give you... *counts on fingers*...two smaller revivants! 🤯🎉 Now, let's see which of these four small YouTubers will take their hand off last and get a shoutout at the end of the video! 🏆🔥 So, what do you say, my friend? Are you ready to join the #MrBeastChallenge and become the next big thing on YouTube? 🚀🔜
