# Customised data

In [None]:
!pip3 install -q -U bitsandbytes==0.42.0
!pip3 install -q -U peft==0.8.2
!pip3 install -q -U trl==0.7.10
!pip3 install -q -U accelerate==0.27.1
!pip3 install -q -U datasets==2.17.0
!pip3 install -q -U transformers==4.38.1
!pip3 install langchain sentence-transformers chromadb langchainhub

In [None]:
def make_rag_chain(model, retriever, rag_prompt = None):
    # We will use a prompt template from langchain hub.
    if not rag_prompt:
        rag_prompt = hub.pull("rlm/rag-prompt")

    # And we will use the LangChain RunnablePassthrough to add some custom processing into our chain.
    rag_chain = (
            {
                "context": RunnableLambda(get_question) | retriever | format_docs,
                "question": RunnablePassthrough()
            }
            | rag_prompt
            | model
    )

    return rag_chain


def get_question(input):
    if not input:
        return None
    elif isinstance(input,str):
        return input
    elif isinstance(input,dict) and 'question' in input:
        return input['question']
    elif isinstance(input,BaseMessage):
        return input.content
    else:
        raise Exception("string or dict with 'question' key expected as RAG chain input.")
        
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [None]:
!conda install -y pytorch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2 pytorch-cuda=11.8 -c pytorch -c nvidia

import torch

In [None]:
import torch
torch.cuda.is_available()

In [1]:
import pandas as pd
import os
import torch
torch.cuda.is_available()

from huggingface_hub import login
from transformers import pipeline
from transformers import AutoModelForCausalLM, AutoTokenizer

from langchain_core.documents.base import Document
from langchain_community.document_loaders import TextLoader
from langchain_community.embeddings.sentence_transformer import (
    SentenceTransformerEmbeddings,
)
from langchain_community.vectorstores import Chroma
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.llms import HuggingFaceEndpoint
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain



# Set up the Hugging Face Hub API token
os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"
cache_dir = "./models"
os.environ['HF_HOME'] = './cache/'

if torch.cuda.is_available():
    model_id = "meta-llama/Llama-2-7b-chat-hf"
    model = AutoModelForCausalLM.from_pretrained(model_id, cache_dir=cache_dir)
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    tokenizer.use_default_system_prompt = False

pipe = pipeline(
     "text-generation", 
     model=model, 
     tokenizer=tokenizer,
     return_tensors='pt',
     max_length=512,
     max_new_tokens=512,
     model_kwargs={"torch_dtype": torch.bfloat16},
     device="cuda"
    )

llm = HuggingFacePipeline(
 pipeline=pipe,
 model_kwargs={"temperature": 0.7, "max_length": 512},
)

df = pd.read_excel("bot2/optymize.xlsx")
data_list = df.values.ravel().tolist()
document_list = []

for content in data_list:
    document = Document(content=content, page_content=content)
    document_list.append(document)
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)
docs = text_splitter.split_documents(document_list)
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
db = Chroma.from_documents(docs, embedding_function)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

OutOfMemoryError: CUDA out of memory. Tried to allocate 172.00 MiB (GPU 0; 23.63 GiB total capacity; 21.09 GiB already allocated; 160.69 MiB free; 21.10 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from langchain.chains import RetrievalQA

retriever = db.as_retriever(search_type="mmr", search_kwargs={'k': 4, 'fetch_k': 20})
prompt = hub.pull("rlm/rag-prompt")
rag_chain = make_rag_chain(model, retriever, rag_prompt = prompt)

In [None]:
questions = [
        "what is Optymize?",
        "how can i deposite coin on Optymize?",
        "what is Optymize's twitter?",
        "what is gOPZ tokens?",
        "what is Optymize tokenomics?",
        "what is Optymize details tokenomics?",
        "Optymize Vault Model – How does it works?, detail explaination",
        "Optymize Vault Model – How does it works?"
        ]
for q in questions:
    print("\n--- QUESTION: ", q)
    print("* Ans:\n", rag_chain.invoke(q))

In [None]:
question = """The Optymize Protocol is a first-of-its-kind multi-blockchain solution that combines both yield enhancement and risk mitigation for crypto assets.
Explain more on the keywords here"""

template = """Question: {question}
Answer: Let's think step by step."""
prompt = PromptTemplate.from_template(template)

llm_chain = LLMChain(prompt=prompt, llm=llm)
llm_chain.invoke(question)

In [None]:
qs = [{'question': question}]
res = llm_chain.generate(qs)
print(res.generations)