# Information Extraction using Claude v2 on Amazon Bedrock

### This note book has been tested on SM studio ***Base Python 3.0*** kernel and SageMaker notbeooks ***conda_python3*** Kernel.
### For getting started with Amazon bedrock and associated IAM role to run from SM please follow insturctions on the docs: https://docs.aws.amazon.com/bedrock/latest/userguide/api-setup.html

In [None]:
#%pip install --quiet langchain==0.0.304

In [None]:
import json
import os
import sys
import boto3
import botocore
from langchain.chains import LLMChain
from langchain.llms.bedrock import Bedrock
from langchain.prompts import PromptTemplate
from langchain.embeddings import BedrockEmbeddings

In [None]:
boto3_bedrock = boto3.client(service_name='bedrock-runtime')

body = json.dumps({
    "prompt": "\n\nHuman:explain black holes to 8th graders\n\nAssistant:",
    "max_tokens_to_sample": 300,
    "temperature": 0.1,
    "top_p": 0.9,
})

modelId = 'anthropic.claude-v2'
accept = 'application/json'
contentType = 'application/json'

response = boto3_bedrock.invoke_model(body=body, modelId=modelId, accept=accept, contentType=contentType)

response_body = json.loads(response.get('body').read())
# text
print(response_body.get('completion'))

In [None]:
#boto3_bedrock.list_foundation_models()

In [None]:
from langchain.chains import create_extraction_chain
from langchain.llms import Bedrock


llm = Bedrock(
    region_name="us-east-1",
    client=boto3_bedrock,
    model_id="anthropic.claude-v2",
    model_kwargs={"temperature": 0.0, "top_p": .5, "max_tokens_to_sample": 500}
)
print(llm("what is SageMaker?"))

In [None]:
#pip install pydantic --upgrade

# Extraction- Basic approach- via promot engineering

In [None]:
email_sample="Hello Zhang Wei, I am John. Your AnyCompany Financial Services, LLC credit card account 1111-0000-1111-0008 has a minimum payment of $24.53 that is due by July 31st. Based on your autopay settings, we will withdraw your payment on the due date from your bank account number XXXXXX1111 with the routing number XXXXX0000. Customer feedback for Sunshine Spa, 123 Main St, Anywhere. Send comments to Alice at alic_aa@anycompany.com. I enjoyed visiting the spa. It was very comfortable but it was also very expensive. The amenities were ok but the service made the spa a great experience."

In [None]:
prompt = f"""

Human: Please precisely copy any email addresses from the following text and then write them, one per line. Only write an email address if it's precisely spelled out in the input text. If there are no email addresses in the text, write "N/A". Do not say anything else.

```
{email_sample}
```

Assistant:"""

In [None]:
query = prompt.format(email=email_sample)
result = llm(query)
print(result.strip())

In [None]:
prompt = f"""


Human: Here is some text. We want to remove all personally identifying information from this text and replace it with XXX. It's very important that names, phone numbers, and email addresses, gets replaced with XXX.
Here is the text, inside <text></text> XML tags

<text>
{email_sample}
</text>

Please put your sanitized version of the text with PII removed in <response></response> XML tags.

Assistant:"""

In [None]:
query = prompt.format(email=email_sample)
result = llm(query)
print(result.strip())

In [None]:
prompt = f"""


Human: Here is some text. We want to extract all the key name entities from the text. If there are no named entities say N/A. Here is the text, inside <text></text> XML tags.

<text>
{email_sample}
</text>

Please put your response in a Json format as a key value pair.Key is the type of the entity and value represents the entity itself. Say nothing else.

Assistant:"""

In [None]:
query = prompt.format(email=email_sample)
result = llm(query)
print(result.strip())

In [None]:
# import pydantic
# pydantic.__version__

# Output parsers in Langchain and Pydantic

In [None]:
from typing import Sequence, Optional
from langchain.prompts import (
    PromptTemplate,
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
)
#from langchain.llms import OpenAI
from pydantic import BaseModel, Field, validator
from langchain.output_parsers import PydanticOutputParser

class org(BaseModel):
    company_name: str
    founder: str
    year_founded: int
    location: Optional[str]
    products: Optional[str]

class company(BaseModel):
    """Identifying information about the company in a text."""
    company: Sequence[org]

        
# Run 
query = """Apple was founded in 1976 by Steve Jobs and his friend Zwynak. They wanted to make computers small so people could put them in their homes or offices. Apple is an American company that manufactures personal computers and accessories. It was the first company, a successful personal computer company, and began expanding and developing it to include manufacturing phones Portable, Bluetooth headsets, and many other products from Apple. This organization is headquartered in California."""

# Set up a parser + inject instructions into the prompt template.
parser = PydanticOutputParser(pydantic_object=company)

# Prompt
prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

# Run
_input = prompt.format_prompt(query=query)
model = llm
output = model(_input.to_string())
parser.parse(output)