In [1]:
from typing import Dict

from langchain import PromptTemplate
from sagemaker_endpoint import SagemakerEndpoint
from langchain.llms.sagemaker_endpoint import LLMContentHandler
from langchain.chains.question_answering import load_qa_chain
import json

from langchain.docstore.document import Document
example_doc_1 = """
Peter and Elizabeth took a taxi to attend the night party in the city. While in the party, Elizabeth collapsed and was rushed to the hospital.
Since she was diagnosed with a brain injury, the doctor told Peter to stay besides her until she gets well.
Therefore, Peter stayed with her at the hospital for 3 days without leaving.
"""

docs = [
    Document(
        page_content=example_doc_1,
    )
]

query = """How long was Elizabeth hospitalized?
"""

prompt_template = """Use the following pieces of context to answer the question at the end.

{context}

Question: {question}
Answer:"""
PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)


class ContentHandler(LLMContentHandler):
    content_type = "application/json"
    accepts = "application/json"

    def transform_input(self, prompt: str, model_kwargs: Dict) -> bytes:
        input_str = json.dumps({prompt: prompt, **model_kwargs})
        return input_str.encode("utf-8")

    def transform_output(self, output: bytes) -> str:
        response_json = json.loads(output.read().decode("utf-8"))
        return response_json[0]["generated_text"]

SagemakerEndpoint.model_kwargs = {'custom_attributes':'accept_eula=true'}

content_handler = ContentHandler()
llm = SagemakerEndpoint(
        endpoint_name="meta-textgeneration-llama-2-7b-2023-08-26-18-08-09-987",
        credentials_profile_name="temmie",
        region_name="us-east-1",
        model_kwargs={"temperature": 1e-10},
        content_handler=content_handler
        #custom_attributes="accept_eula=true"
    )
#llm._endpoint_kwargs = {"custom_attributes":"accept_eula=true"}
chain = load_qa_chain(
    llm=llm,
    prompt=PROMPT,
)

chain({"input_documents": docs, "question": query}, return_only_outputs=True)

ModuleNotFoundError: No module named 'langchain.pydantic_v1'

In [14]:

import boto3
import json


# Specify the AWS configuration profile to use
profile_name = 'temmie'

# Create a session with the specified profile
session = boto3.Session(profile_name=profile_name)

# Create a SageMaker runtime client using the session
sagemaker_runtime = session.client('sagemaker-runtime')
  

# Specify the endpoint name and payload
endpoint_name = 'meta-textgeneration-llama-2-7b-2023-08-26-18-08-09-987'
payload = {
    "inputs": "I believe the meaning of life is",
    "parameters": {
        "max_new_tokens": 64,
        "top_p": 0.9,
        "temperature": 0.6,
        "return_full_text": False,
    }
}
payload_json = json.dumps(payload)

# Specify the custom attributes
custom_attributes = 'accept_eula=true'

# Perform inference using the invoke_endpoint method
response = sagemaker_runtime.invoke_endpoint(
    EndpointName=endpoint_name,
    ContentType='application/json',  # Content type for the payload
    CustomAttributes=custom_attributes,
    Body=payload_json.encode('utf-8')
)

# Parse and print the inference response
inference_result = response['Body'].read()
print("Inference Result:", inference_result.decode('utf-8'))



Inference Result: [
  {
    "generation":" to make a difference.\nWhen I was younger, I dreamed of being a doctor. I thought that I would be able to make a difference in people's lives. I thought that I would be able to help people who were sick and injured.\nNow that I am older, I know that I don"
  }
]


In [5]:
SagemakerEndpoint.__dict__

mappingproxy({'__config__': langchain.llms.sagemaker_endpoint.Config,
              '__fields__': {'cache': ModelField(name='cache', type=Optional[bool], required=False, default=None),
               'verbose': ModelField(name='verbose', type=bool, required=False, default_factory='<function _get_verbosity>'),
               'callbacks': ModelField(name='callbacks', type=Union[List[langchain.callbacks.base.BaseCallbackHandler], BaseCallbackManager, NoneType], required=False, default=None),
               'callback_manager': ModelField(name='callback_manager', type=Optional[BaseCallbackManager], required=False, default=None),
               'tags': ModelField(name='tags', type=Optional[List[str]], required=False, default=None),
               'client': ModelField(name='client', type=Optional[Any], required=False, default=None),
               'endpoint_name': ModelField(name='endpoint_name', type=str, required=False, default=''),
               'region_name': ModelField(name='region_name

In [16]:
import boto3
import json

def perform_sagemaker_inference(profile_name, endpoint_name, payload, custom_attributes):
    # Specify the AWS configuration profile to use
    

    # Create a session with the specified profile
    session = boto3.Session(profile_name=profile_name)

    # Create a SageMaker runtime client using the session
    sagemaker_runtime = session.client('sagemaker-runtime')

    # Convert payload to JSON string
    payload_json = json.dumps(payload)

    # Perform inference using the invoke_endpoint method
    response = sagemaker_runtime.invoke_endpoint(
        EndpointName=endpoint_name,
        ContentType='application/json',  # Content type for the payload
        CustomAttributes=custom_attributes,
        Body=payload_json.encode('utf-8')
    )

    # Parse and return the inference response
    inference_result = response['Body'].read()
    return inference_result.decode('utf-8')

# Specify the endpoint name and payload
endpoint_name = 'meta-textgeneration-llama-2-7b-2023-08-26-18-08-09-987'
payload = {
    "inputs": "Who is the president of Nigeia?",
    "parameters": {
        "max_new_tokens": 64,
        "top_p": 0.9,
        "temperature": 0.6,
        "return_full_text": False,
    }
}
custom_attributes = 'accept_eula=true'
profile_name = 'temmie'

# Call the function and print the result
result = perform_sagemaker_inference(profile_name, endpoint_name, payload, custom_attributes)
print("Inference Result:", result)


Inference Result: [
  {
    "generation":"\n2016: Who is the president of Nigeia?\n2016: Who is the president of Nigeia? - The answer is\n2016: Who is the president of Nigeia? - The answer is Goodluck Jonathan\n2016: Who"
  }
]


In [20]:
class CallLama(SagemakerEndpoint):
    def __init__(self, profile_name, endpoint_name, payload, custom_attributes):
        super(CallLama, self).__init__(credentials_profile_name=profile_name)
        self.profile_name = profile_name
        self.endpoint_name = endpoint_name
        self.payload = payload
        self.custom_attributes = custom_attributes

    def _call(self):
        session = boto3.Session(profile_name=self.profile_name)

        # Create a SageMaker runtime client using the session
        sagemaker_runtime = session.client('sagemaker-runtime')

        # Convert payload to JSON string
        payload_json = json.dumps(self.payload)

        # Perform inference using the invoke_endpoint method
        response = sagemaker_runtime.invoke_endpoint(
            EndpointName=self.endpoint_name,
            ContentType='application/json',  # Content type for the payload
            CustomAttributes=self.custom_attributes,
            Body=payload_json.encode('utf-8')
        )

        # Parse and return the inference response
        inference_result = response['Body'].read()
        return inference_result.decode('utf-8')
        

In [21]:
chain = load_qa_chain(
    llm=CallLama(profile_name, endpoint_name, payload, custom_attributes),
    prompt=PROMPT,
)

ValidationError: 2 validation errors for CallLama
content_handler
  field required (type=value_error.missing)
__root__
  Could not load credentials to authenticate with AWS client. Please check that credentials in the specified profile name are valid. (type=value_error)