# Let's chunk Plato and Aristotle

In [None]:
%load_ext autoreload
%autoreload 2

from langchain_aws import BedrockLLM
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from pydantic import BaseModel, Field
from typing import Optional, List, Dict
import os
import json

# Define your structured output model
# class TransferNewsGrader(BaseModel):
#     binary_score: str = Field(description="The article is about Magnus, 'yes' or 'no'")

# Load the configuration from the config.json file
with open('config.json', 'r') as config_file:
    config = json.load(config_file)

# Set the state variables
os.environ["AWS_ACCESS_KEY_ID"] = config["AWS_ACCESS_KEY_ID"]
os.environ["AWS_SECRET_ACCESS_KEY"] = config["AWS_SECRET_ACCESS_KEY"]

# Usage
aws_access_key_id = os.getenv("AWS_ACCESS_KEY_ID")
aws_secret_access_key = os.getenv("AWS_SECRET_ACCESS_KEY")


class SectionMetadata(BaseModel):
    book_number: Optional[int] = Field(default=None, description="The number of the book in the series.")
    chapter_number: Optional[int] = Field(default=None, description="The number of the chapter.")
    section_number: Optional[int] = Field(default=None, description="The number of the section within the chapter.")
    
    def to_dict(self) -> Dict[str, Optional[int]]:
        """
        Convert the metadata to a dictionary format for easy access.
        """
        return {
            "book_number": self.book_number,
            "chapter_number": self.chapter_number,
            "section_number": self.section_number
        }

class DocumentChunk(BaseModel):
    text: str = Field(description="The extracted chunk of text.")
    metadata: SectionMetadata = Field(description="The metadata related to the text chunk.")

# Example usage
if __name__ == "__main__":
    # Simulating an example chunk of text along with its metadata
    text_chunk = "This is an example text for chapter four."
    
    # Assume that the text chunk corresponds to Book 1, Chapter 4, Section 1
    metadata = SectionMetadata(book_number=1, chapter_number=4, section_number=1)
    document_chunk = DocumentChunk(text=text_chunk, metadata=metadata)

    # Display the text and the associated metadata
    print(document_chunk.text)
    print(document_chunk.metadata.to_dict())



In [None]:

bedrock_llm = BedrockLLM(
    model_id="meta.llama3-70b-instruct-v1:0",
    aws_access_key_id=aws_access_key_id,
    aws_secret_access_key=aws_secret_access_key,
)

# Create a prompt template
prompt_template = PromptTemplate(
    input_variables=["input"],
    template="Give me the metadata for this chunk: {input}"
)

# Create the LangChain LLM Chain
transfer_news_chain = LLMChain(
    llm=bedrock_llm,
    prompt=prompt_template
)

# Use the chain
input_text = "  "
result = transfer_news_chain.run(input_text)

# Assuming result is a string that corresponds to binary_score
graded_result = MetaDataGrader(binary_score=result)
print(graded_result)