In [53]:
import os
import json
import pandas as pd
import boto3

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_text_splitters import MarkdownHeaderTextSplitter

import giskard
from giskard.rag import KnowledgeBase, generate_testset, QATestset, evaluate
from giskard.llm.client.bedrock import ClaudeBedrockClient
from giskard.llm.embeddings import set_default_embedding
from giskard.llm.embeddings.bedrock import BedrockEmbedding


In [54]:
def read_md_file(file_path):
    """
    Reads a Markdown file and returns its content as a string.

    :param file_path: Relative or absolute path to the Markdown file.
    :return: Content of the file as a string.
    """
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            content = file.read()
        return content
    except FileNotFoundError:
        return f"Error: The file at {file_path} was not found."
    except Exception as e:
        return f"An error occurred: {e}"   

In [55]:
def split_markdown_document(file_path, headers_to_split_on, chunk_size, chunk_overlap):
    """
    Splits a markdown document into smaller chunks based on specified headers and chunk size.

    :param file_path: The path to the markdown file to be split.
    :param headers_to_split_on: A list of tuples where each tuple contains a header level (e.g., "#") and a header name.
    :param chunk_size: The maximum size of each chunk.
    :param chunk_overlap: The number of overlapping characters between chunks.
    :return: A list of split markdown document chunks.
    """
    # Read the markdown document from the specified file path
    markdown_document = read_md_file(file_path)

    # Initialize the MarkdownHeaderTextSplitter with the headers to split on and whether to strip headers
    markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on, strip_headers=False)
    
    # Split the markdown document into sections based on the specified headers
    md_header_splits = markdown_splitter.split_text(markdown_document)

    # Initialize the RecursiveCharacterTextSplitter with the specified chunk size and overlap
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size, chunk_overlap=chunk_overlap
    )

    # Split the sections into smaller chunks based on the specified chunk size and overlap
    splits = text_splitter.split_documents(md_header_splits)

    return splits

In [56]:
def setup_bedrock_clients(profile_name_env, region_env, claude_model, embed_model):
    """
    Set up AWS Bedrock clients for language model and embedding model.

    :param profile_name_env: The name of the environment variable that contains the AWS profile name.
    :param region_env: The name of the environment variable that contains the AWS region.
    :param claude_model: The name or identifier of the Claude language model to be used.
    :param embed_model: The name or identifier of the embedding model to be used.
    :raises KeyError: If the environment variables for profile name or region are not set.
    """
    boto3.setup_default_session(profile_name=os.getenv(profile_name_env))
    bedrock = boto3.client("bedrock-runtime", region_name=os.getenv(region_env))
    claude_client = ClaudeBedrockClient(bedrock, model=claude_model)
    embed_client = BedrockEmbedding(bedrock, model=embed_model)
    giskard.llm.set_default_client(claude_client)
    set_default_embedding(embed_client)

In [57]:

def answer_question(question, model_id="anthropic.claude-3-sonnet-20240229-v1:0", max_tokens=2000, temperature=0.8, top_p=0.5):
    """
    Generate an answer for a given question using the Claude model on Bedrock.

    :param question: The question to be answered.
    :param model_id: The model ID to be used for generating the answer. Default is "anthropic.claude-3-sonnet-20240229-v1:0".
    :param max_tokens: The maximum number of tokens to generate. Default is 2000.
    :param temperature: The sampling temperature. Default is 0.8.
    :param top_p: The cumulative probability of token selections. Default is 0.5.
    :return: The generated answer.
    """
    try:
        # Construct the prompt
        prompt = {
            "anthropic_version": "bedrock-2023-05-31",
            "max_tokens": max_tokens,
            "temperature": temperature,
            "top_p": top_p,
            "messages": [
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": question
                        }
                    ]
                }
            ]
        }

        # Convert the prompt to JSON
        json_prompt = json.dumps(prompt)

        # Invoke the model
        response = bedrock.invoke_model(body=json_prompt, modelId=model_id)

        # Parse the response body
        response_body = json.loads(response.get('body').read())

        # Extract and return the answer
        final_answer = response_body['content'][0]['text']
        return final_answer

    except Exception as e:
        # Handle any exceptions that occur and provide a meaningful error message
        return f"An error occurred: {e}"

In [58]:
# Import markdown file and split into chunks
file_path = '..\\data\\amazon-sagemaker-toolkits.md'
headers_to_split_on = [
    ("#", "Header 1"),
    ("##", "Header 2"),
    ("###", "Header 3"),
]

chunk_size = 300
chunk_overlap = 30

splits = split_markdown_document(file_path, headers_to_split_on, chunk_size, chunk_overlap)

In [59]:
data = pd.DataFrame([d.page_content for d in splits], columns=['text'])
data

Unnamed: 0,text
0,# Using the SageMaker Training and Inference T...
1,The [SageMaker Training](https://github.com/aw...
2,"deploy models on SageMaker\. When installed, t..."
3,+ The locations for storing code and other res...
4,+ Other information that a container needs to ...
5,## SageMaker Toolkits Containers Structure<a n...
6,│ │ └── resourceConfig.json\n│ └── data\...
7,"When you run a model *training* job, the SageM..."
8,contains files that specify the channels throu...
9,script should write the model generated by you...


In [60]:
# Set up Bedrock clients in order to invoke the model and generate answers
profile_name_env = 'profile_name'
region_env = 'bedrock_region'
claude_model = "anthropic.claude-3-sonnet-20240229-v1:0"
embed_model = "amazon.titan-embed-text-v1"

setup_bedrock_clients(profile_name_env, region_env, claude_model, embed_model)

In [61]:
#Create the knowledge base from the chunks
knowledge_base = KnowledgeBase.from_pandas(data, columns=["text"])

In [62]:
# Generate a testset with X number of questions & answers for each question types
testset = generate_testset(
    knowledge_base, 
    num_questions=20,
    language='en',  # optional, we'll auto detect if not provided
    agent_description="An assistant chatbot for Amazon Web Services"
)

2024-06-05 14:54:40,858 pid:1348 MainThread giskard.rag  INFO     Finding topics in the knowledge base.


  warn(


2024-06-05 14:54:48,654 pid:1348 MainThread giskard.rag  INFO     Found 1 topics in the knowledge base.


Generating questions:  25%|██▌       | 5/20 [00:13<00:46,  3.13s/it]





Generating questions: 100%|██████████| 20/20 [01:53<00:00,  5.67s/it]


In [63]:
# Save the generated testset
testset.save("evaluation/testset.jsonl")

# You can easily load it back
loaded_testset = QATestset.load("evaluation/testset.jsonl")

In [64]:
# Convert it to a pandas dataframe
df = loaded_testset.to_pandas()
df

Unnamed: 0_level_0,question,reference_answer,reference_context,conversation_history,metadata
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
42fc41c7-4017-4504-b9e6-9d90ba727763,What is the potential issue with providing two...,Providing two Docker images can increase stora...,Document 14: + Providing two Docker images can...,[],"{'question_type': 'simple', 'seed_document_id'..."
a0a33256-2354-46a2-9648-90f838de6ddf,What is required for a model to make real-time...,When you host a trained model on SageMaker to ...,Document 10: When you *host* a trained model o...,[],"{'question_type': 'simple', 'seed_document_id'..."
fa2dfecc-70d1-4f5a-b18f-3f80560957b2,Where are the model files located in a hosting...,"In a hosting or batch transform container, the...",Document 11: In a hosting or batch transform c...,[],"{'question_type': 'simple', 'seed_document_id'..."
43ea8abd-a470-4f0e-9561-4e51984143d8,"According to the given context, what is a comm...",This is especially common when you use GPUs fo...,Document 16: + You might be able to write an i...,[],"{'question_type': 'simple', 'seed_document_id'..."
ff7ffdd2-a859-43b1-82dd-b5455823c2c8,When running a model training job on Amazon Sa...,When running a model training job on SageMaker...,Document 7: When you run a model *training* jo...,[],"{'question_type': 'complex', 'seed_document_id..."
9af9e48d-0e0a-4f8a-a664-b2aa30b953b1,What potential issue could arise from offering...,Providing two Docker images can increase stora...,Document 14: + Providing two Docker images can...,[],"{'question_type': 'complex', 'seed_document_id..."
92c5e54f-713c-49da-ba4a-bd34d8b7edd6,What is the specific path where model files ar...,"In a hosting or batch transform container, the...",Document 11: In a hosting or batch transform c...,[],"{'question_type': 'complex', 'seed_document_id..."
af1f653e-a460-4654-aa34-a6a34b4ebcd9,Given that SageMaker Training and SageMaker In...,The SageMaker Training and SageMaker Inference...,Document 1: The [SageMaker Training](https://g...,[],"{'question_type': 'complex', 'seed_document_id..."
fcacbdfe-f576-4cfe-8714-15b462833e57,Given that SageMaker containers have specific ...,They provide lists of important environmental ...,Document 19: repositories on GitHub\. They als...,[],"{'question_type': 'distracting element', 'seed..."
ea4f4097-0078-4c05-8e3c-8367e71b8aa8,For an engineering student with limited mobili...,"When SageMaker trains a model, it creates the ...",Document 5: ## SageMaker Toolkits Containers S...,[],"{'question_type': 'distracting element', 'seed..."


In [65]:
bedrock = boto3.client("bedrock-runtime", region_name=os.getenv(region_env))

report = evaluate(answer_question, testset=testset,knowledge_base=knowledge_base)

Asking questions to the agent:   0%|          | 0/20 [00:00<?, ?it/s]

Asking questions to the agent: 100%|██████████| 20/20 [04:06<00:00, 12.32s/it]
CorrectnessMetric evaluation: 100%|██████████| 20/20 [01:26<00:00,  4.32s/it]




In [66]:
display(report)

In [67]:
report.correctness_by_question_type()

Unnamed: 0_level_0,correctness
question_type,Unnamed: 1_level_1
complex,1.0
conversational,0.333333
distracting element,0.0
double,0.666667
simple,0.5
situational,0.0
