In [1]:
from dotenv import load_dotenv
load_dotenv("/home/marshath/play/chainlink/algovate/.env")

import pickle
import logging
import tiktoken
import pandas as pd
import ipywidgets as widgets
from typing import List, Any, Dict
from IPython.display import display, Markdown, clear_output
from pydantic import BaseModel
from langchain.docstore.document import Document
from langchain.text_splitter import TokenTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
from langchain.chains import RetrievalQAWithSourcesChain, LLMChain
from langchain.retrievers import TFIDFRetriever
from langchain.chat_models import ChatOpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import BaseRetriever
from langchain.prompts import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
)

Python-dotenv could not parse statement starting at line 4


In [2]:
with open("/home/marshath/play/chainlink/algovate/algovate/data/combined_documents.pkl", "rb") as f:
    documents = pickle.load(f)

In [3]:
class CustomeSplitter:
    def __init__(self, chunk_threshold=6000, chunk_size=6000, chunk_overlap=50):
        self.chunk_threshold = chunk_threshold
        self.chunk_size = chunk_size
        self.chunk_overlap = chunk_overlap
        self.enc = tiktoken.get_encoding("cl100k_base")
        self.splitter = TokenTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)

    def token_counter(self, document):
        tokens = self.enc.encode(document.page_content)
        return len(tokens)

    def split(self, documents):
        chunked_documents = []
        for i, doc in enumerate(documents):
            try:
                if self.token_counter(doc) > self.chunk_threshold:
                    chunks = self.splitter.split_documents([doc])
                    chunks = [Document(page_content=chunk.page_content, metadata={"source": f"{chunk.metadata['source']} chunk {i}"}) for i, chunk in enumerate(chunks)]
                    chunked_documents.extend(chunks)
                else:
                    chunked_documents.append(doc)
            except Exception as e:
                chunked_documents.append(doc)
                print(f"Error on document {i}")
                print(e)
                print(doc.metadata["source"])

        return chunked_documents

In [4]:
class CustomRetriever(BaseRetriever, BaseModel):
    full_docs: List[Document]
    base_retriever:BaseRetriever = None
    logger: Any = None

    class Config:
        """Configuration for this pydantic object."""

        arbitrary_types_allowed = True

    @classmethod
    def from_documents(
        cls,
        full_docs: List[Document],
        search_kwargs: Dict[str, Any] = {},
        **kwargs: Any,
    ):
        splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=50)
        split_docs = splitter.split_documents(full_docs)
        vector_store = FAISS.from_documents(split_docs, embedding=OpenAIEmbeddings())

        return cls(full_docs=full_docs, base_retriever=vector_store.as_retriever(**search_kwargs), **kwargs)

    def get_relevant_documents(self, query: str) -> List[Document]:  
        results =  self.base_retriever.get_relevant_documents(query=query)
        self.logger.info(f"Retrieved {len(results)} documents")
        doc_ids = [doc.metadata["source"] for doc in results]
        self.logger.info(f"{doc_ids}")
        full_retrieved_docs = [d for d in chunked_documents if d.metadata["source"] in doc_ids]
        return full_retrieved_docs
        
    async def aget_relevant_documents(self, query: str) -> List[Document]:
        raise NotImplementedError

In [5]:
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
handler = logging.StreamHandler()
handler.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)

In [6]:
splitter = CustomeSplitter()
chunked_documents = splitter.split(documents)

In [7]:
retriever = CustomRetriever.from_documents(
    chunked_documents, 
    search_kwargs={"k": 5}, 
    logger=logger
)

In [35]:
contains_answer_system_template = """
You are an AI assistant. 
You are helping a user find information about Chainlink.
Your first task is to identify if the document given below contains answer to the user's question.
Please ONLY answer yes or no. 
Answer yes ONLY if you are confident that the document contains the answer.
"""

contains_answer_human_template = """
User's question: {question}

Document: {document}
"""
CONTAINS_ANSWER_PROMPT = ChatPromptTemplate.from_messages(
    [
        SystemMessagePromptTemplate.from_template(contains_answer_system_template),
        HumanMessagePromptTemplate.from_template(contains_answer_human_template),
    ]
)

final_answer_system_template = """
You are an AI assistant.
You are helping a user find information about Chainlink.
Given the document below, please answer the user's question.
If the document does not contain the answer, please answer "I don't know".
"""

final_answer_human_template = """
User's question: {question}

Document: {document}

Answer:
"""

FINAL_ANSWER_PROMPT = ChatPromptTemplate.from_messages(
    [
        SystemMessagePromptTemplate.from_template(final_answer_system_template),
        HumanMessagePromptTemplate.from_template(final_answer_human_template),
    ]
)

verification_system_template = """
You are an AI assistant.
You are helping a user find information about Chainlink.
Given the a question and an answer pair, please verify if the answer is correct.
Please ONLY answer yes or no.
"""
verification_human_template = """
Question: {question}
Answer: {answer}
"""

VERIFICATION_PROMPT = ChatPromptTemplate.from_messages(
    [
        SystemMessagePromptTemplate.from_template(verification_system_template),
        HumanMessagePromptTemplate.from_template(verification_human_template),
    ]
)


In [36]:
llm = ChatOpenAI(model="gpt-3.5-turbo-16k", temperature=0.)
chain = LLMChain(llm=llm, prompt=CONTAINS_ANSWER_PROMPT)

In [41]:
question = "give me a sample solidity contract to use Chainlink VRF?"
retrieved_docs = retriever.get_relevant_documents(question)

answer_found = ""
for d in retrieved_docs:
    if not answer_found:
        chain.prompt = CONTAINS_ANSWER_PROMPT
        response = chain.predict(question=question, document=d.page_content)
        if not any(response.lower().startswith(x) for x in ["no", "none", "na", "n/a"]):
            chain.prompt = FINAL_ANSWER_PROMPT
            answer = chain.predict(question=question, document=d.page_content)
            chain.prompt = VERIFICATION_PROMPT
            verification = chain.predict(question=question, answer=answer)
            if verification.lower().startswith("yes"):
                answer_found = answer
                break

Markdown(f"**Question:** {question}\n\n**Answer:** {answer}")

2023-06-21 19:46:27,298 - __main__ - INFO - Retrieved 4 documents
2023-06-21 19:46:27,299 - __main__ - INFO - ['https://github.com/oceanByte/chainlink-education/blob/a0b8886bd664423b40c8bd3661fdb7d61e975ea2/src/api/src/shared/course/courses/vrf102/Chapters/Chapter-4/course.md', 'https://stackoverflow.com/questions/71590473/transaction-fails-when-i-call-getrandomnumber-function-from-chainlink-vrf-v1', 'https://docs.chain.link/vrf/v2/subscription/', 'https://stackoverflow.com/questions/72168793/how-to-fund-your-contract-with-links-for-vrf-v2-subscription']


**Question:** give me a sample solidity contract to use Chainlink VRF?

**Answer:** Here is a sample Solidity contract that uses Chainlink VRF:

```solidity
// SPDX-License-Identifier: MIT
pragma solidity ^0.8.7;

import "@chainlink/contracts/src/v0.8/interfaces/LinkTokenInterface.sol";
import "@chainlink/contracts/src/v0.8/interfaces/VRFCoordinatorV2Interface.sol";
import "@chainlink/contracts/src/v0.8/VRFConsumerBaseV2.sol";

contract VRFv2SubscriptionManager is VRFConsumerBaseV2 {
    VRFCoordinatorV2Interface COORDINATOR;
    LinkTokenInterface LINKTOKEN;

    // Rinkeby coordinator. For other networks,
    // see https://docs.chain.link/docs/vrf-contracts/#configurations
    address vrfCoordinator = 0x6168499c0cFfCaCD319c818142124B7A15E857ab;

    // Rinkeby LINK token contract. For other networks, see
    // https://docs.chain.link/docs/vrf-contracts/#configurations
    address link_token_contract = 0x01BE23585060835E02B77ef475b0Cc51aA1e0709;

    // The gas lane to use, which specifies the maximum gas price to bump to.
    // For a list of available gas lanes on each network,
    // see https://docs.chain.link/docs/vrf-contracts/#configurations
    bytes32 keyHash =
        0xd89b2bf150e3b9e13446986e571fb9cab24b13cea0a43ea20a6049a85cc807cc;

    // A reasonable default is 100000, but this value could be different
    // on other networks.
    uint32 callbackGasLimit = 100000;

    // The default is 3, but you can set this higher.
    uint16 requestConfirmations = 3;

    // For this example, retrieve 2 random values in one request.
    // Cannot exceed VRFCoordinatorV2.MAX_NUM_WORDS.
    uint32 numWords = 2;

    // Storage parameters
    uint256[] public s_randomWords;
    uint256 public s_requestId;
    uint64 public s_subscriptionId;
    address public s_owner;

    constructor() VRFConsumerBaseV2(vrfCoordinator) {
        COORDINATOR = VRFCoordinatorV2Interface(vrfCoordinator);
        LINKTOKEN = LinkTokenInterface(link_token_contract);
        s_owner = msg.sender;
        //Create a new subscription when you deploy the contract.
        createNewSubscription();
    }

    // Assumes the subscription is funded sufficiently.
    function requestRandomWords() external onlyOwner {
        // Will revert if subscription is not set and funded.
        s_requestId = COORDINATOR.requestRandomWords(
            keyHash,
            s_subscriptionId,
            requestConfirmations,
            callbackGasLimit,
            numWords
        );
    }

    function fulfillRandomWords(
        uint256, /* requestId */
        uint256[] memory randomWords
    ) internal override {
        s_randomWords = randomWords;
    }

    // Create a new subscription when the contract is initially deployed.
    function createNewSubscription() private onlyOwner {
        // Create a subscription with a new subscription ID.
        address[] memory consumers = new address[](1);
        consumers[0] = address(this);
        s_subscriptionId = COORDINATOR.createSubscription();
        // Add this contract as a consumer of its own subscription.
        COORDINATOR.addConsumer(s_subscriptionId, consumers[0]);
    }

    // Assumes this contract owns link.
    // 1000000000000000000 = 1 LINK
    function topUpSubscription(uint256 amount) external onlyOwner {
        LINKTOKEN.transferAndCall(
            address(COORDINATOR),
            amount,
            abi.encode(s_subscriptionId)
        );
    }

    function addConsumer(address consumerAddress) external onlyOwner {
        // Add a consumer contract to the subscription.
        COORDINATOR.addConsumer(s_subscriptionId, consumerAddress);
    }

    function removeConsumer(address consumerAddress) external onlyOwner {
        // Remove a consumer contract from the subscription.
        COORDINATOR.removeConsumer(s_subscriptionId, consumerAddress);
    }

    function cancelSubscription(address receivingWallet) external onlyOwner {
        // Cancel the subscription and send the remaining LINK to a wallet address.
        COORDINATOR.cancelSubscription(s_subscriptionId, receivingWallet);
        s_subscriptionId = 0;
    }

    // Transfer this contract's funds to an address.
    // 1000000000000000000 = 1 LINK
    function withdraw(uint256 amount, address to) external onlyOwner {
        LINKTOKEN.transfer(to, amount);
    }

    modifier onlyOwner() {
        require(msg.sender == s_owner);
        _;
    }
}
```

This contract is an example of a consumer contract that also owns and manages the subscription for Chainlink VRF v2. It uses the Chainlink VRFConsumerBaseV2 contract as a base contract and implements the necessary functions to interact with the Chainlink VRF system.

You can deploy this contract on the Rinkeby testnet and use it to generate random numbers using Chainlink VRF.

Please note that you will need to import the necessary Chainlink contracts and interfaces in order to compile and deploy this contract successfully.