In [1]:
model_api_gateway = (
    "https://r6cl9xl3q9.execute-api.us-east-1.amazonaws.com/default/llama2API"
)

In [3]:
import requests

json_body = {
    "inputs": "I believe the meaning of life is",
    "parameters" : {
        "max_new_tokens" : 256,
        "top_p" : 0.9,
        "temperature" : 0.1
    }
}

response = requests.post(model_api_gateway, json=json_body)

print(response.json())

[{'generation': ' to be happy. I believe that happiness is a choice. I believe that happiness is a state of mind. I believe that happiness is a state of being. I believe that happiness is a state of being. I believe that happiness is a state of being. I believe that happiness is a state of being. I believe that happiness is a state of being. I believe that happiness is a state of being. I believe that happiness is a state of being. I believe that happiness is a state of being. I believe that happiness is a state of being. I believe that happiness is a state of being. I believe that happiness is a state of being. I believe that happiness is a state of being. I believe that happiness is a state of being. I believe that happiness is a state of being. I believe that happiness is a state of being. I believe that happiness is a state of being. I believe that happiness is a state of being. I believe that happiness is a state of being. I believe that happiness is a state of being. I believe th

In [4]:
!pip install -U -q langchain

In [5]:
from typing import Any, List, Mapping, Optional
from langchain.callbacks.manager import CallbackManagerForLLMRun
from langchain.llms.base import LLM

class Llama2SageMaker(LLM):
    max_new_tokens: int = 256
    top_p: float = 0.9
    temperature: float = 0.1

    @property
    def _llm_type(self) -> str:
        return "Llama2SageMaker"

    def _call(
        self,
        prompt: str,
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
    ) -> str:
        if stop is not None:
            raise ValueError("stop kwargs are not permitted.")

        json_body = {
            "inputs" : prompt,
            "parameters" : {
                "max_new_tokens" : self.max_new_tokens,
                "top_p" : self.top_p,
                "temperature" : self.temperature
            }
        }

        response = requests.post(model_api_gateway, json=json_body)

        return response.json()[0]["generation"]

    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        """Get the identifying parameters."""
        return {
            "max_new_tokens" : self.max_new_tokens,
            "top_p" : self.top_p,
            "temperature" : self.temperature
        }

In [6]:
llm = Llama2SageMaker()

In [7]:
llm("How much wood could a woodchuck chuck if a wood chuck could chuck wood?")

'\nHow much wood could a woodchuck chuck if a wood chuck could chuck wood?\nHow much wood could a woodchuck chuck if a wood chuck could chuck wood? How much wood could a woodchuck chuck if a wood chuck could chuck wood? How much wood could a woodchuck chuck if a wood chuck could chuck wood? How much wood could a woodchuck chuck if a wood chuck could chuck wood? How much wood could a woodchuck chuck if a wood chuck could chuck wood? How much wood could a woodchuck chuck if a wood chuck could chuck wood? How much wood could a woodchuck chuck if a wood chuck could chuck wood? How much wood could a woodchuck chuck if a wood chuck could chuck wood? How much wood could a woodchuck chuck if a wood chuck could chuck wood? How much wood could a woodchuck chuck if a wood chuck could chuck wood? How much wood could a woodchuck chuck if a wood chuck could chuck wood? How much wood could a woodchuck chuck if a wood ch'

In [8]:
!pip install -q -U pymupdf

In [9]:
from langchain.document_loaders import PyMuPDFLoader

loader = PyMuPDFLoader("https://arxiv.org/pdf/2005.11401v4.pdf")

In [10]:
!pip install -q -U openai chromadb tiktoken

In [11]:
import getpass
import os

openai_api_key = getpass.getpass("Please provide your OpenAI API Key: ")
os.environ["OPENAI_API_KEY"] = openai_api_key

In [12]:
from langchain.indexes import VectorstoreIndexCreator

index = VectorstoreIndexCreator().from_loaders([loader])

In [13]:
from langchain.chains import RetrievalQA

qa_chain = RetrievalQA.from_chain_type(
    llm, retriever=index.vectorstore.as_retriever(), return_source_documents=True
)

In [14]:
question = "What is Retrieval Augmented Generation?"
result = qa_chain({"query": question})
result["result"]

' Retrieval Augmented Generation is a technique that combines a pre-trained language model with a retrieval model. The retrieval model is used to retrieve relevant documents from a large corpus of documents. The retrieved documents are then used to augment the pre-trained language model. This augmentation helps the pre-trained language model to generate more accurate and relevant answers to questions.\n\nRetrieval Augmented Generation is a powerful technique that can be used to improve the performance of pre-trained language models on a wide range of NLP tasks.\n\nRetrieval Augmented Generation is a powerful technique that can be used to improve the performance of pre-trained language models on a wide range of NLP tasks.\n\nRetrieval Augmented Generation is a powerful technique that can be used to improve the performance of pre-trained language models on a wide range of NLP tasks.\n\nRetrieval Augmented Generation is a powerful technique that can be used to improve the performance of p

In [15]:
question = "What is Retrieval Augmented Generation?"
result = qa_chain({"query": question})
result

{'query': 'What is Retrieval Augmented Generation?',
 'result': ' Retrieval Augmented Generation is a technique that combines a pre-trained language model with a retrieval model. The retrieval model is used to retrieve relevant information from a knowledge base, and the language model is used to generate a response based on the retrieved information.\n\nRetrieval Augmented Generation is a technique that combines a pre-trained language model with a retrieval model. The retrieval model is used to retrieve relevant information from a knowledge base, and the language model is used to generate a response based on the retrieved information.\n\nRetrieval Augmented Generation is a technique that combines a pre-trained language model with a retrieval model. The retrieval model is used to retrieve relevant information from a knowledge base, and the language model is used to generate a response based on the retrieved information.\n\nRetrieval Augmented Generation is a technique that combines a pr

In [16]:
question_list = ['What is the title of the paper on Retrieval Augmented Generation?', 'What is the title of the paper on Retrieval Augmented Generation mentioned in the context?', 'What is the title of the paper on Retrieval Augmented Generation mentioned in the context information?', 'What is the title of the paper on Retrieval Augmented Generation?', 'What is the task of the Retrieval Augmented Generation (RAG) model according to the context information?', 'What advantages does RAG have over the DPR QA system in terms of generating answers?', 'What are some potential downsides of using RAG, a language model based on Wikipedia, in various scenarios?', 'What is the file name of the paper on Retrieval Augmented Generation?', 'What is the accuracy of RAG models in generating correct answers even when the correct answer is not in any retrieved document?', 'Question: What is an example of how parametric and non-parametric memories work together in BART?', 'What is the title of the paper on Retrieval Augmented Generation?', 'What is the purpose of using multiple answer annotations in open-domain QA?', 'Question: Which novel by Ernest Hemingway is based on his wartime experiences?', 'What advantage do non-parametric memory models like RAG have over parametric-only models like T5 or BART?', 'What is the title of the paper on Retrieval Augmented Generation?', 'What is the title of the paper on Retrieval Augmented Generation?', 'What training setup details are mentioned in the paper on Retrieval Augmented Generation?', 'What is the title of the paper on Retrieval Augmented Generation mentioned in the context information?', 'What is the title of the paper mentioned in the context information?', 'What are the three sections into which the 14th century work "The Divine Comedy" is divided?', 'What are the two components of RAG models described in the context?', 'What is the title of the paper on Retrieval Augmented Generation?', 'What is the benchmark dataset used for question answering research mentioned in the provided context?', 'What are the two models proposed in the paper on Retrieval Augmented Generation?', 'What is the approach used to train the retriever and generator components in the paper on Retrieval Augmented Generation?', 'What is the best performing "closed-book" open-domain QA model mentioned in the context?', 'What is the ratio of distinct to total tri-grams for the generation tasks in the Jeopardy Question Generation Task?', 'What is the main finding of the paper on Retrieval Augmented Generation?', 'What is the main objective of the work presented in the paper on Retrieval Augmented Generation?', 'What are the limitations of large pre-trained language models in accessing and manipulating knowledge in knowledge-intensive tasks?', 'What is the main objective of RAG in the experiments conducted in the paper on Retrieval Augmented Generation?', 'What is the purpose of the MSMARCO NLG task v2.1?']

In [17]:
question_context_answer = []

for question in question_list:
  result = qa_chain({"query": question})
  answer_package = {
      "question" : question,
      "contexts" : [page.page_content for page in result["source_documents"]],
      "answer" : result["result"]
  }
  question_context_answer.append(answer_package)

  print(answer_package)

{'question': 'What is the title of the paper on Retrieval Augmented Generation?', 'contexts': ['[20] Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasupat, and Ming-Wei Chang. REALM:\nRetrieval-augmented language model pre-training. ArXiv, abs/2002.08909, 2020. URL https:\n//arxiv.org/abs/2002.08909.\n[21] Tatsunori B Hashimoto,\nKelvin Guu,\nYonatan Oren,\nand Percy S Liang.\nA\nretrieve-and-edit\nframework\nfor\npredicting\nstructured\noutputs.\nIn\nS.\nBengio,\nH. Wallach,\nH. Larochelle,\nK. Grauman,\nN. Cesa-Bianchi,\nand R. Garnett,\ned-\nitors,\nAdvances\nin\nNeural\nInformation\nProcessing\nSystems\n31,\npages\n10052–\n10062.\nCurran\nAssociates,\nInc.,\n2018.\nURL\nhttp://papers.nips.cc/paper/\n8209-a-retrieve-and-edit-framework-for-predicting-structured-outputs.\npdf.\n[22] Nabil Hossain, Marjan Ghazvininejad, and Luke Zettlemoyer. Simple and effective retrieve-\nedit-rerank text generation. In Proceedings of the 58th Annual Meeting of the Association for\nComputational Linguis

In [18]:
!pip install -q -U datasets

In [19]:
import datasets
import pandas as pd

dataset = datasets.Dataset.from_pandas(pd.DataFrame(data=question_context_answer))

  from .autonotebook import tqdm as notebook_tqdm


In [20]:
!pip install -q -U ragas

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
googleapis-common-protos 1.60.0 requires protobuf!=3.20.0,!=3.20.1,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0.dev0,>=3.19.5, but you have protobuf 3.20.0 which is incompatible.[0m[31m
[0m

In [21]:
from ragas import evaluate

results = evaluate(dataset)

Downloading (…)lve/main/config.json: 100%|██████████| 647/647 [00:00<00:00, 1.82MB/s]
Downloading pytorch_model.bin: 100%|██████████| 57.4M/57.4M [00:01<00:00, 52.7MB/s]
Downloading (…)okenizer_config.json: 100%|██████████| 517/517 [00:00<00:00, 6.23MB/s]
Downloading (…)solve/main/vocab.txt: 100%|██████████| 232k/232k [00:00<00:00, 9.62MB/s]
Downloading (…)cial_tokens_map.json: 100%|██████████| 112/112 [00:00<00:00, 1.15MB/s]


evaluating with [answer_relevancy]


100%|██████████| 3/3 [00:55<00:00, 18.47s/it]


evaluating with [context_ relevancy]


100%|██████████| 3/3 [02:34<00:00, 51.58s/it]


evaluating with [faithfulness]


100%|██████████| 3/3 [03:13<00:00, 64.66s/it]


In [22]:
results

{'ragas_score': 0.3316, 'answer_relevancy': 0.8997, 'context_ relevancy': 0.1511, 'faithfulness': 0.7578}