In [4]:
# pip install langchain -i https://pypi.douban.com/simple

In [1]:
from sagemaker.huggingface.model import HuggingFacePredictor

predictor = HuggingFacePredictor(
    endpoint_name='CHATGLM2-6B-INT4-20231204-030044'
)

  from pandas.core.computation.check import NUMEXPR_INSTALLED


sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [7]:
ENDPOINT_NAME = predictor.endpoint

The endpoint attribute has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


# Official Guide

https://python.langchain.com/docs/integrations/llms/sagemaker

In [5]:
from langchain.docstore.document import Document

example_doc_1 = """
Peter and Elizabeth took a taxi to attend the night party in the city. While in the party, Elizabeth collapsed and was rushed to the hospital.
Since she was diagnosed with a brain injury, the doctor told Peter to stay besides her until she gets well.
Therefore, Peter stayed with her at the hospital for 3 days without leaving.
"""

docs = [
    Document(
        page_content=example_doc_1,
    )
]

In [6]:
import json
from typing import Dict

from langchain.chains.question_answering import load_qa_chain
from langchain.llms import SagemakerEndpoint
from langchain.llms.sagemaker_endpoint import LLMContentHandler
from langchain.prompts import PromptTemplate

query = """How long was Elizabeth hospitalized?
"""

prompt_template = """Use the following pieces of context to answer the question at the end.

{context}

Question: {question}
Answer:"""
PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

In [53]:
class ContentHandler(LLMContentHandler):
    content_type = "application/json"
    accepts = "application/json"

    def transform_input(self, prompt: str, model_kwargs: Dict) -> bytes:
        input_str = json.dumps({prompt: prompt, **model_kwargs})
        return input_str.encode("utf-8")

    def transform_output(self, output: bytes) -> str:
        response_json = json.loads(output.read().decode("utf-8"))
        # return response_json[0]["generated_text"]
        return response_json


content_handler = ContentHandler()

In [62]:
chain = load_qa_chain(
    llm=SagemakerEndpoint(
         endpoint_name=endpoint_name, 
         region_name=region, 
         model_kwargs={"max_new_tokens": 700, "top_p": 0.9, "temperature": 0.6},
         endpoint_kwargs={"CustomAttributes": 'accept_eula=true'},
         content_handler=content_handler
     ),
    prompt=PROMPT,
)

chain({"input_documents": docs, "question": query}, return_only_outputs=True)

{'output_text': "{'outputs': 'Elizabeth was hospitalized for 3 days.\\n\\n\\n','source': 'Peter and Elizabeth took a taxi to attend the night party in the city. While in the party, Elizabeth collapsed and was rushed to the hospital. Since she was diagnosed with a brain injury, the doctor told Peter to stay Besides her until she gets well. Therefore, Peter stayed with her at the hospital for 3 days without leaving.\\n\\n\\n','suggested_answer': 'Elizabeth was hospitalized for 3 days.\\n\\n\\n'}"}

## Medium

https://medium.com/@ryanlempka/fundamentals-of-combining-langchain-and-sagemaker-with-a-llama-2-example-694924ab0d92

In [44]:
import boto3
session = boto3.session.Session()
REGION = session.region_name
print("The current region is:", region)

ENDPOINT_NAME = predictor.endpoint

The endpoint attribute has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


The current region is: cn-northwest-1


In [45]:
from langchain import LLMChain
from langchain import SagemakerEndpoint
from langchain.prompts import PromptTemplate
from langchain.llms.sagemaker_endpoint import LLMContentHandler
import os
import json

region = REGION
endpoint_name = ENDPOINT_NAME

In [48]:
class ContentHandler(LLMContentHandler):
    content_type = "application/json"
    accepts = "application/json"

    def transform_input(self, prompt: str, model_kwargs: dict) -> bytes:
        input_str = json.dumps({"inputs" : [[{"role" : "system",
        "content" : "You are a kind robot."},
        {"role" : "user", "content" : prompt}]],
        "parameters" : {**model_kwargs}})
        return input_str.encode('utf-8')
    
    def transform_output(self, output: bytes) -> str:
        response_json = json.loads(output.read().decode("utf-8"))
        # return response_json[0]["generation"]["content"]
        return response_json

In [49]:
template = "{content}"

prompt = PromptTemplate.from_template(template)

In [50]:
content_handler = ContentHandler()

llm=SagemakerEndpoint(
     endpoint_name=endpoint_name, 
     region_name=region, 
     # model_kwargs={"max_new_tokens": 700, "top_p": 0.9, "temperature": 0.6},
     # endpoint_kwargs={"CustomAttributes": 'accept_eula=true'},
     content_handler=content_handler
 )

In [51]:
llm_chain = LLMChain(
     llm=llm,
     prompt=prompt
 )

In [52]:
llm_chain.run({"How can I travel from New York to Los Angeles?"})

"I'm sorry, but the content of the input for the user role is not defined in the provided information. Could you please provide the content for the user role?"

## K

https://github.com/kyopark2014/LLM-LangChain

In [58]:
from langchain import PromptTemplate, SagemakerEndpoint
from langchain.llms.sagemaker_endpoint import LLMContentHandler

class ContentHandler(LLMContentHandler):
    content_type = "application/json"
    accepts = "application/json"

    def transform_input(self, prompt: str, model_kwargs: dict) -> bytes:
        input_str = json.dumps({'inputs': prompt, 'parameters': model_kwargs})
        return input_str.encode('utf-8')
      
    def transform_output(self, output: bytes) -> str:
        response_json = json.loads(output.read().decode("utf-8"))        
        # return response_json[0]["generated_text"]
        return response_json

In [59]:
endpoint_name = ENDPOINT_NAME
aws_region = boto3.Session().region_name
parameters = {
    "max_new_tokens": 300
}
content_handler = ContentHandler()

llm = SagemakerEndpoint(
    endpoint_name = endpoint_name, 
    region_name = aws_region, 
    model_kwargs = parameters,
    content_handler = content_handler
)

In [61]:
print(llm("Tell me a joke"))

Sure, here's a joke for you:

Why don't scientists trust atoms?

Because they make up everything!
