In [1]:
import os
import json
from typing import Dict
import boto3
from langchain_aws.llms import SagemakerEndpoint
from langchain_aws.llms.sagemaker_endpoint import LLMContentHandler
from langchain_core.prompts import PromptTemplate
os.chdir("../")

In [89]:
client = boto3.client("sagemaker-runtime")

class Llama38BContentHandler(LLMContentHandler):
    content_type = "application/json"
    accepts = "application/json"
 
    def transform_input(self, prompt: str, model_kwargs: dict) -> bytes:
        payload = {
            "inputs": prompt,
            "parameters": {
                "max_new_tokens": 2046,
                "top_p": 0.9,
                "temperature": 0.6,
            },
        }
        input_str = json.dumps(
            payload,
        )
        # print(input_str)
        return input_str.encode("utf-8")
 
    def transform_output(self, output: bytes) -> str:
        response_json = json.loads(output.read().decode("utf-8"))
        # print(response_json)
        content = response_json["generated_text"].strip()
        return content 

content_handler = Llama38BContentHandler()

llm = SagemakerEndpoint(
     endpoint_name="jumpstart-dft-meta-textgeneration-l-20241014-233047",
     client=client,
    #  model_kwargs={"max_new_tokens": 30, "top_p": 0.9, "temperature": 0.7, "stop": ["\n"] },
     content_handler=content_handler
 )

In [84]:
# messages = {"instruction": query, "query": example_query}
example_query = "How many times does the letter 'e' appear in the word 'computer'?"
example_user = """{
#       "result": "3"
#     }"""
query = f"""You are a bot and you should reply to the user based on the function calling return. The return is {example_user}"""



In [85]:
from langchain.prompts import PromptTemplate

# Define the prompt template

prompt_template = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{instruction}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{question}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"""

PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["instruction", "question"]
)
example_instruct = "You are a bot and you should reply to the user based on the function calling return. The return is {'weather': 'Sunny', 'location': 'USA'}"
example_prompt = "what is the weather in USA now?"
prompt = PROMPT.format(instruction=example_instruct, question=example_prompt)
# Sample input to the prompt
# prompt_input = PROMPT.format(system=query, user=example_query)
# prompt_input = PROMPT.format(prompt_2)

# Invoke the model using the `llm` object
response = llm.invoke(prompt)

# Print the response
print("Response from model:")
print(response)

Response from model:
According to the current information, the weather in USA is Sunny.


In [86]:
from src.kevin_function import count_letter

## read test file
with open ("ks_instruct/test.txt", "r") as file:
    text = file.read() # it cannot be read currently

text = json.loads(text)



def call_function(info: dict):
    query = info["query"]
    answer = info["answer"]
    func_to_call = eval(answer["function"])
    arguments = answer["arguments"]
    return {"query": query, "output": func_to_call(**arguments)}

def generate_response(query_output, llm):
    instruct = f"You are a bot and you should reply to the user based on the function calling return. The return is {query_output['output']}"
    user_prompt = query_output["query"]
    prompt = PROMPT.format(instruction=instruct, question=user_prompt)
    query_output["response"] = llm.invoke(prompt)
    return query_output

query_output = call_function(text[0])
query_response = generate_response(query_output, llm)


In [87]:
query_response

{'query': "How many times does the letter 'a' appear in the word 'banana'?",
 'output': '{"result": 3}',
 'response': "The letter 'a' appears 3 times in the word 'banana'."}

In [90]:
from deepeval import evaluate
from deepeval.metrics import AnswerRelevancyMetric
from deepeval.test_case import LLMTestCase
from deepeval.models.base_model import DeepEvalBaseLLM

class AWSBedrock(DeepEvalBaseLLM):
    def __init__(
        self,
        model
    ):
        self.model = model

    def load_model(self):
        return self.model

    def generate(self, prompt: str) -> str:
        print(prompt)
        with open("log.txt", "w") as file:
            file.write(prompt)
        chat_model = self.load_model()
        return chat_model.invoke(prompt)

    async def a_generate(self, prompt: str) -> str:
        # print(prompt)
        chat_model = self.load_model()
        res = await chat_model.ainvoke(prompt)
        return res

    def get_model_name(self):
        return "llama-3-1-8b"

aws_bedrock = AWSBedrock(model=llm)

# Replace this with the actual output from your LLM application

metric = AnswerRelevancyMetric(
    threshold=0.3,
    model=aws_bedrock,
    include_reason=True,
    async_mode=False
)


test_case = LLMTestCase(
    input=query_response["query"],
    actual_output= query_response["response"]
)

metric.measure(test_case)
print(metric.score)
print(metric.reason)


ERROR:root:Error raised by inference endpoint: Read timeout on endpoint URL: "https://runtime.sagemaker.us-west-2.amazonaws.com/endpoints/jumpstart-dft-meta-textgeneration-l-20241014-233047/invocations"


ReadTimeoutError: Read timeout on endpoint URL: "https://runtime.sagemaker.us-west-2.amazonaws.com/endpoints/jumpstart-dft-meta-textgeneration-l-20241014-233047/invocations"

In [47]:
llm.invoke("""Given the text, breakdown and generate a list of statements presented. Ambiguous statements and single words can also be considered as statements.

Example:
Example text: Shoes. The shoes can be refunded at no extra cost. Thanks for asking the question!

{
    "statements": ["Shoes.", "Shoes can be refunded at no extra cost", "Thanks for asking the question!"]
}
===== END OF EXAMPLE ======
        
**
IMPORTANT: Please make sure to only return in JSON format, with the "statements" key mapping to a list of strings. No words or explanation is needed.
**

Text:
The letter 'a' appears 3 times in the word 'banana'.

JSON:""")

'{\n    "statements": ["The letter \'a\' appears 3 times in the word \'banana\'."]\n} \n===== END OF TEXT'

In [54]:
from sagemaker.predictor import retrieve_default
endpoint_name = "jumpstart-dft-llama-3-1-8b-instruct-20241014-225126"
predictor = retrieve_default(endpoint_name)
payload = {
    "inputs": "<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\nwhat is the recipe of mayonnaise?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
    "parameters": {
        "max_new_tokens": 256,
        "top_p": 0.9,
        "temperature": 0.6
    }
}
response = predictor.predict(payload)

In [58]:
payload = {
    "inputs": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nAlways answer with Haiku<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nI am going to Paris, what should I see?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
    "parameters": {
        "max_new_tokens": 256,
        "top_p": 0.9,
        "temperature": 0.6
    }
}
response = predictor.predict(payload)
print(response)

{'generated_text': "Eiffel Tower high\nRiver Seine's gentle waters\nLove in every stone"}
