# Customised data

In [1]:
def make_rag_chain(model, retriever, rag_prompt = None):
    # We will use a prompt template from langchain hub.
    if not rag_prompt:
        rag_prompt = hub.pull("rlm/rag-prompt")

    # And we will use the LangChain RunnablePassthrough to add some custom processing into our chain.
    rag_chain = (
            {
                "context": RunnableLambda(get_question) | retriever | format_docs,
                "question": RunnablePassthrough()
            }
            | rag_prompt
            | model
    )

    return rag_chain


def get_question(input):
    if not input:
        return None
    elif isinstance(input,str):
        return input
    elif isinstance(input,dict) and 'question' in input:
        return input['question']
    elif isinstance(input,BaseMessage):
        return input.content
    else:
        raise Exception("string or dict with 'question' key expected as RAG chain input.")

In [2]:
import pandas as pd
import os
import torch

from langchain_core.documents.base import Document
from langchain_community.document_loaders import TextLoader
from langchain_community.embeddings.sentence_transformer import (
    SentenceTransformerEmbeddings,
)
from langchain_community.vectorstores import Chroma
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.llms import HuggingFaceEndpoint
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain



# Set up the Hugging Face Hub API token
os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"

# Define the repository ID for the Gemma 2b model
repo_id = "google/gemma-2b-it"

llm = HuggingFaceEndpoint(
    repo_id=repo_id, max_length=1024, temperature=1.5
)


df = pd.read_excel("bot2/optymize.xlsx")
data_list = df.values.ravel().tolist()
document_list = []

for content in data_list:
    document = Document(content=content, page_content=content)
    document_list.append(document)
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)
docs = text_splitter.split_documents(document_list)
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
db = Chroma.from_documents(docs, embedding_function)

                    max_length was transferred to model_kwargs.
                    Please make sure that max_length is what you intended.


Token has not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.


Created a chunk of size 635, which is longer than the specified 500
Created a chunk of size 868, which is longer than the specified 500
Created a chunk of size 606, which is longer than the specified 500
Created a chunk of size 1257, which is longer than the specified 500
Created a chunk of size 533, which is longer than the specified 500


Token is valid (permission: write).
Your token has been saved to /home/fish/.cache/huggingface/token
Login successful


  return self.fget.__get__(instance, owner)()


In [9]:
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from langchain.chains import RetrievalQA

retriever = db.as_retriever(search_type="mmr", search_kwargs={'k': 4, 'fetch_k': 20})
prompt = hub.pull("rlm/rag-prompt")

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

retriever = db.as_retriever(search_type="mmr", search_kwargs={'k': 4, 'fetch_k': 20})
prompt = hub.pull("rlm/rag-prompt")
rag_chain = make_rag_chain(model, retriever, rag_prompt = prompt)

NameError: name 'model' is not defined

In [4]:
output_parser = StrOutputParser()
rag_chain = make_rag_chain(llm, retriever) | output_parser

In [10]:
questions = [
        "what is Optymize?",
        "how can i deposite coin on Optymize?",
        "what is Optymize's twitter?",
        "what is gOPZ tokens?",
        "what is Optymize tokenomics?",
        "what is Optymize details tokenomics?",
        "Optymize Vault Model – How does it works?, detail explaination",
        "Optymize Vault Model – How does it works?"
        ]
for q in questions:
    print("\n--- QUESTION: ", q)
    #print("* Ans:\n", rag_chain.invoke(q))


--- QUESTION:  what is Optymize?

--- QUESTION:  how can i deposite coin on Optymize?

--- QUESTION:  what is Optymize's twitter?

--- QUESTION:  what is gOPZ tokens?

--- QUESTION:  what is Optymize tokenomics?

--- QUESTION:  what is Optymize details tokenomics?

--- QUESTION:  Optymize Vault Model – How does it works?, detail explaination

--- QUESTION:  Optymize Vault Model – How does it works?


In [11]:
rag_chain.invoke(questions[0])

<class 'str'>
what is Optymize?
<class 'str'>
what is Optymize?
<class 'str'>
what is Optymize?
<class 'list'>
[Document(page_content='As Optymize would back the creation of Optymize Vault with its stablecoins, this also means the number of Optymize Vaults that can be created would be restrained by the amount of capital Optymize has and the speed users stake to the Optymize Vaults to release Optymize’s stablecoins for further Optymize Vaults creation. In order to speed up the creation of Optymize Vaults to suit the needs of different users, Optymize allows for staking into Liquidity Pools.'), Document(page_content='Essentially users are lending their tokens to Optymize and in return Optymize pays a return to users in the form of OPZ, which users can either sell right away or stake to earn gOPZ and entitle to shar the protocol’s revenue.'), Document(page_content='We encourage our community members to contribute to improve the Optymize protocol. While we will leverage third-party service

' Optymize allows users to contribute to improving the protocol by staking their tokens in Liquidity Pools. This allows users to earn a return in the form of OPZ tokens, which can be sold, staked, or used to earn gOPZ tokens.'

In [7]:
question = """The Optymize Protocol is a first-of-its-kind multi-blockchain solution that combines both yield enhancement and risk mitigation for crypto assets.
Explain more on the keywords here"""

template = """Question: {question}
Answer: Let's think step by step."""
prompt = PromptTemplate.from_template(template)

llm_chain = LLMChain(prompt=prompt, llm=llm)
llm_chain.invoke(question)

{'question': 'The Optymize Protocol is a first-of-its-kind multi-blockchain solution that combines both yield enhancement and risk mitigation for crypto assets.\nExplain more on the keywords here',
 'text': '\n\n* **Multi-blockchain:** This simply means the protocol operates across multiple blockchain networks. More seamlessly moves money around using a few protocols.\n\n\n* **Yield enhancement:** Imagine increasing rewards earned by holding particular crypto assets in a smart contract. For example, someone could utilize yield farm support in yield-focused DeFi offerings like Defied Protocol or Honeybad to benefit from favorable returns. More rewarding due to increased token supply from other holders participating in the smart contract.\n\n\n* **Risk mitigation:** This ensures stable returns, a layer of resilience to market conditions. What if it ends  to accept token doically (Ifa)? Risk equal zero as assets release( MAT) proxy received two times during banking epoch get their faircha

In [8]:
qs = [{'question': question}]
res = llm_chain.generate(qs)
print(res.generations)

[[Generation(text='\n\n* **Multi-blockchain:** This simply means the protocol operates across multiple blockchain networks. More seamlessly moves money around using a few protocols.\n\n\n* **Yield enhancement:** Imagine increasing rewards earned by holding particular crypto assets in a smart contract. For example, someone could utilize yield farm support in yield-focused DeFi offerings like Defied Protocol or Honeybad to benefit from favorable returns. More rewarding due to increased token supply from other holders participating in the smart contract.\n\n\n* **Risk mitigation:** This ensures stable returns, a layer of resilience to market conditions. What if it ends  to accept token doically (Ifa)? Risk equal zero as assets release( MAT) proxy received two times during banking epoch get their fairchains funciton properly handing to security chart projections. mathematically, approximately everyone on a share gets covered. In less destabilized deposits earned like now liquidity, increas