# Lambda 1: Extracting Entities for Form Enrollment

In [33]:
import boto3
import json
import os
import sys
import base64
import time
import tzlocal
import re
from botocore.exceptions import ClientError
import logging

bedrock_runtime = boto3.client(
    service_name="bedrock-runtime",
    region_name="us-east-1",
)

bedrock = boto3.client(
    service_name='bedrock', 
    region_name='us-east-1'
)

##Loading JSON to read data
file_path = "Conversation_2_14_formatted.json"
# Open the JSON file for reading
with open(file_path, 'r') as file:
    # Parse the JSON file
    data = json.load(file)

##Processing data to fetch role and content
def data_preprocessing(transcription):
    convo = ""
    for i in range(len(transcription['transcriptions'])):
        convo = convo + transcription['transcriptions'][i]['ParticipantRole'] + ": " + transcription['transcriptions'][i]['Content']
        convo += "\n"
    return convo

final_transcript = data_preprocessing(data)

##Prompt Retreival
def get_prompt(bucket,file,prompt_category,required_prompt):
    s3 = boto3.client('s3') 
    response = s3.get_object(Bucket=bucket,Key=file)
    content = response['Body'].read().decode('utf-8')
    json_content = json.loads(content)
    prompt = json_content[prompt_category][required_prompt]
    
    return prompt
###Below is the LLM model to extract entities

#Defining function to connect to Bedrock LLM
def load_claude2(bedrock_runtime , prompt , temp , top_p,top_k):
    try:
        body = {
            "prompt": prompt,
            "temperature": temp,
            "top_p": top_p,
            "top_k":top_k,
            "max_tokens_to_sample": 1000
            }

        response = bedrock_runtime.invoke_model(
            modelId="anthropic.claude-v2", body=json.dumps(body), accept="application/json", contentType="application/json"
                 )
        
        response_body = json.loads(response["body"].read())
        completion = response_body.get("completion")

        return completion

    except ClientError:
        logging.error("Couldn't invoke Claude")
        raise

#Defining prompt generator for entity extraction  -- to be removed post prompt library retreival implementation
entities = "name of patient, status of insurance, insurance number, demographic details"
def enrollment_prompt_generator(conversation,entities):
    prompt_claude = """Human: \"""" + conversation + """\" 

    The above is a transcript between a call center agent and an insurance subscriber or patient. Identify and extract key entities such as \"""" + entities + """\" from the transcript. Include only the information present.

    Output the results as a structured JSON containing only the extracted fields.

    Assistant:
    """

    return prompt_claude

bucket = 'ch-agent-assist-prompt-library-bucket'
file_key = 'prompts_library.json' 
# Lambda handler to intgerate with AWS
def lambda_handler1(data):
    final_transcript = data_preprocessing(data)
    prompt_enrollment = get_prompt(bucket,file_key,"entity_extraction","enrollment_form_claude")#enrollment_prompt_generator(final_transcript,entities)
    #print(prompt_enrollment)
    enrollment_data = load_claude2(bedrock_runtime,prompt_enrollment,0,0.9,1)

    pattern = re.compile(r"```json\n([\s\S]*?)\n```", re.MULTILINE)
    match = pattern.search(enrollment_data)
    if match:
        json_content = match.group(1)
        #print(json_content)
    else:
        print("No JSON content found.")
    #enrollment_json_object = json.loads(enrollment_data)
    return json_content#{"statusCode": 200,"body": json.dumps(enrollment_json_object)}

d1 = lambda_handler1(data)
print(d1)

{
  "patient_name": "John Doe",
  "insurance_status": "active",  
  "appointment_date": "2023-02-17" 
}


In [20]:
d1

'{\n  "patient_name": "Jordan Andrew Smith",\n  "patient_gender": "Male",\n  "patient_dob": "March 14, 1989", \n  "patient_phone": "555-123-4567",\n  "patient_email": "jordan.smith@email.com",\n  "insurance_provider": "HealthCare Plus",\n  "insurance_policy_number": "HP123456789",\n  "insurance_group_number": "987654"\n}'

In [30]:
bucket = 'ch-agent-assist-prompt-library-bucket'
file_key = 'prompts_library.json' 
type(file_key)

str

In [31]:
s3 = boto3.client('s3') 
s3.get_object(Bucket=bucket,Key=file_key)

{'ResponseMetadata': {'RequestId': '6PXA0FX7KZ8SSG22',
  'HostId': 'jwR0Dl/AoHNQHgt96dmuupCD7/CQ4jSk2oIGphIXs4bJYTP1FR6+T+h5rniMDvqx4hFmL1cgPPA=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'jwR0Dl/AoHNQHgt96dmuupCD7/CQ4jSk2oIGphIXs4bJYTP1FR6+T+h5rniMDvqx4hFmL1cgPPA=',
   'x-amz-request-id': '6PXA0FX7KZ8SSG22',
   'date': 'Tue, 27 Feb 2024 15:52:08 GMT',
   'last-modified': 'Tue, 27 Feb 2024 15:23:19 GMT',
   'etag': '"152252b67a5a1e96ef9800945a1437d3"',
   'x-amz-server-side-encryption': 'AES256',
   'x-amz-version-id': 'EMKl8xz7z2HAsR8S0EZbSjPX.nHuXtYB',
   'accept-ranges': 'bytes',
   'content-type': 'application/json',
   'server': 'AmazonS3',
   'content-length': '2086'},
  'RetryAttempts': 0},
 'AcceptRanges': 'bytes',
 'LastModified': datetime.datetime(2024, 2, 27, 15, 23, 19, tzinfo=tzutc()),
 'ContentLength': 2086,
 'ETag': '"152252b67a5a1e96ef9800945a1437d3"',
 'VersionId': 'EMKl8xz7z2HAsR8S0EZbSjPX.nHuXtYB',
 'ContentType': 'application/json',
 'ServerSideEncry

# Testing

In [25]:
import logging

In [6]:
data2 = {'stream': 'TRANSCRIPTION',
 'contactId': 'aa621db9-934b-462c-bc7a-85f2e01c4c9f',
 'transcriptions': [{'ParticipantId': 'CUSTOMER',
   'ParticipantRole': 'CUSTOMER',
   'Content': "Hello I'm Mark",
   'BeginOffsetMillis': 1257,
   'EndOffsetMillis': 9697,
   'Id': 'fb29489e-d06b-48b1-8e0b-519f17c4a68e',
   'Sentiment': 'NEUTRAL',
   'IssuesDetected': []}]}

d2 = lambda_handler1(data2)
print(d2)

 {
  "name": "Mark"
}


In [7]:
data3 = {'stream': 'TRANSCRIPTION',
 'contactId': 'aa621db9-934b-462c-bc7a-85f2e01c4c9f',
 'transcriptions': [{'ParticipantId': 'CUSTOMER',
   'ParticipantRole': 'CUSTOMER',
   'Content': "Thank you, Jordan. May I ask which gender you identify with? This information helps us to tailor our care to your specific needs, but please know that providing this information is entirely optional",
   'BeginOffsetMillis': 1257,
   'EndOffsetMillis': 9697,
   'Id': 'fb29489e-d06b-48b1-8e0b-519f17c4a68e',
   'Sentiment': 'NEUTRAL',
   'IssuesDetected': []}]}

d3 = lambda_handler1(data3)
print(d3)

 {
  "agent_name": "Jordan"
}


In [8]:
data4 = {'stream': 'TRANSCRIPTION',
 'contactId': 'aa621db9-934b-462c-bc7a-85f2e01c4c9f',
 'transcriptions': [{'ParticipantId': 'CUSTOMER',
   'ParticipantRole': 'CUSTOMER',
   'Content': "Hello",
   'BeginOffsetMillis': 1257,
   'EndOffsetMillis': 9697,
   'Id': 'fb29489e-d06b-48b1-8e0b-519f17c4a68e',
   'Sentiment': 'NEUTRAL',
   'IssuesDetected': []}]}

d4 = lambda_handler1(data4)
print(d4)

 {
  "name": null,
  "insurance_status": null, 
  "insurance_number": null,
  "demographics": null
}


In [1]:
data2 = {'stream': 'TRANSCRIPTION',
 'contactId': 'aa621db9-934b-462c-bc7a-85f2e01c4c9f',
 'transcriptions': [{'ParticipantId': 'CUSTOMER',
   'ParticipantRole': 'CUSTOMER',
   'Content': "Hello I'm Mark",
   'BeginOffsetMillis': 1257,
   'EndOffsetMillis': 9697,
   'Id': 'fb29489e-d06b-48b1-8e0b-519f17c4a68e',
   'Sentiment': 'NEUTRAL',
   'IssuesDetected': []}]}



# Prompt Testing

In [3]:
pip install anthropic

Collecting anthropic
  Downloading anthropic-0.16.0-py3-none-any.whl.metadata (16 kB)
Collecting distro<2,>=1.7.0 (from anthropic)
  Downloading distro-1.9.0-py3-none-any.whl.metadata (6.8 kB)
Collecting httpx<1,>=0.23.0 (from anthropic)
  Downloading httpx-0.27.0-py3-none-any.whl.metadata (7.2 kB)
Collecting httpcore==1.* (from httpx<1,>=0.23.0->anthropic)
  Downloading httpcore-1.0.4-py3-none-any.whl.metadata (20 kB)
Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->anthropic)
  Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)
Downloading anthropic-0.16.0-py3-none-any.whl (846 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m846.4/846.4 kB[0m [31m39.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading distro-1.9.0-py3-none-any.whl (20 kB)
Downloading httpx-0.27.0-py3-none-any.whl (75 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading httpcore-1.0.4-py3-non

In [4]:
import anthropic
from anthropic import HUMAN_PROMPT, AI_PROMPT

In [60]:
print(HUMAN_PROMPT)



Human:


In [32]:
data = {'stream': 'TRANSCRIPTION',
 'contactId': 'aa621db9-934b-462c-bc7a-85f2e01c4c9f',
 'transcriptions': [{'ParticipantId': 'CUSTOMER',
   'ParticipantRole': 'CUSTOMER',
   'Content': "Hello I'm Mark",
   'BeginOffsetMillis': 1257,
   'EndOffsetMillis': 9697,
   'Id': 'fb29489e-d06b-48b1-8e0b-519f17c4a68e',
   'Sentiment': 'NEUTRAL',
   'IssuesDetected': []},
   {'ParticipantId': 'AGENT',
   'ParticipantRole': 'AGENT',
   'Content': "Hello,may I know who is your Insurance with?",
   'BeginOffsetMillis': 1257,
   'EndOffsetMillis': 9697,
   'Id': 'fb29489e-d06b-48b1-8e0b-519f17c4a68e',
   'Sentiment': 'NEUTRAL',
   'IssuesDetected': []},
    {'ParticipantId': 'CUSTOMER',
   'ParticipantRole': 'CUSTOMER',
   'Content': "Sure,it's with Anthem INC.",
   'BeginOffsetMillis': 1257,
   'EndOffsetMillis': 9697,
   'Id': 'fb29489e-d06b-48b1-8e0b-519f17c4a68e',
   'Sentiment': 'NEUTRAL',
   'IssuesDetected': []},
    {'ParticipantId': 'AGENT',
   'ParticipantRole': 'AGENT',
   'Content': "Hello,may I know what is your Insurance Number and your age too?",
   'BeginOffsetMillis': 1257,
   'EndOffsetMillis': 9697,
   'Id': 'fb29489e-d06b-48b1-8e0b-519f17c4a68e',
   'Sentiment': 'NEUTRAL',
   'IssuesDetected': []},
   {'ParticipantId': 'CUSTOMER',
   'ParticipantRole': 'CUSTOMER',
   'Content': "Sure,it's AG1248K",
   'BeginOffsetMillis': 1257,
   'EndOffsetMillis': 9697,
   'Id': 'fb29489e-d06b-48b1-8e0b-519f17c4a68e',
   'Sentiment': 'NEUTRAL',
   'IssuesDetected': []},
    {'ParticipantId': 'AGENT',
   'ParticipantRole': 'AGENT',
   'Content': "Thanks for contacting us,we'll get back to you shortly.",
   'BeginOffsetMillis': 1257,
   'EndOffsetMillis': 9697,
   'Id': 'fb29489e-d06b-48b1-8e0b-519f17c4a68e',
   'Sentiment': 'NEUTRAL',
   'IssuesDetected': []}]}

In [33]:
data

{'stream': 'TRANSCRIPTION',
 'contactId': 'aa621db9-934b-462c-bc7a-85f2e01c4c9f',
 'transcriptions': [{'ParticipantId': 'CUSTOMER',
   'ParticipantRole': 'CUSTOMER',
   'Content': "Hello I'm Mark",
   'BeginOffsetMillis': 1257,
   'EndOffsetMillis': 9697,
   'Id': 'fb29489e-d06b-48b1-8e0b-519f17c4a68e',
   'Sentiment': 'NEUTRAL',
   'IssuesDetected': []},
  {'ParticipantId': 'AGENT',
   'ParticipantRole': 'AGENT',
   'Content': 'Hello,may I know who is your Insurance with?',
   'BeginOffsetMillis': 1257,
   'EndOffsetMillis': 9697,
   'Id': 'fb29489e-d06b-48b1-8e0b-519f17c4a68e',
   'Sentiment': 'NEUTRAL',
   'IssuesDetected': []},
  {'ParticipantId': 'CUSTOMER',
   'ParticipantRole': 'CUSTOMER',
   'Content': "Sure,it's with Anthem INC.",
   'BeginOffsetMillis': 1257,
   'EndOffsetMillis': 9697,
   'Id': 'fb29489e-d06b-48b1-8e0b-519f17c4a68e',
   'Sentiment': 'NEUTRAL',
   'IssuesDetected': []},
  {'ParticipantId': 'AGENT',
   'ParticipantRole': 'AGENT',
   'Content': 'Hello,may I kno

In [34]:
print(data_preprocessing(data))

CUSTOMER: Hello I'm Mark
AGENT: Hello,may I know who is your Insurance with?
CUSTOMER: Sure,it's with Anthem INC.
AGENT: Hello,may I know what is your Insurance Number and your age too?
CUSTOMER: Sure,it's AG1248K
AGENT: Thanks for contacting us,we'll get back to you shortly.



In [35]:
example_transcript1 = "CUSTOMER : Hello I'm Mark"
example_response1 = '''{
  "name": "Mark",
}'''

In [51]:
entities = "name of patient, status of insurance, insurance number, demographic details"
def enrollment_prompt_generator(conversation,entities):
    prompt_claude = f"""{HUMAN_PROMPT} Human: {conversation}

    The above is a transcript between a call center agent and an insurance subscriber or patient. Identify and extract key entities such as {entities} from the transcript. Include only the information present.

    Output the results as a structured JSON containing only the extracted fields.
    
    Strictly Follow the rules to provide ouput in JSON format and do not provide the extra sentence 'Here are the key entities extracted from the conversation before the JSON' as part of your response.

    Assistant:{AI_PROMPT}
    """

    return prompt_claude

In [52]:
final_transcript = data_preprocessing(data)
print(final_transcript)
print("\n")
prompt_enrollment = enrollment_prompt_generator(final_transcript,entities)
enrollment_data = load_claude2(bedrock_runtime,prompt_enrollment,0,0.9,1)
print(enrollment_data)

CUSTOMER: Hello I'm Mark
AGENT: Hello,may I know who is your Insurance with?
CUSTOMER: Sure,it's with Anthem INC.
AGENT: Hello,may I know what is your Insurance Number and your age too?
CUSTOMER: Sure,it's AG1248K
AGENT: Thanks for contacting us,we'll get back to you shortly.



 Here are the key entities extracted from the conversation:

```json
{
  "name": "Mark",
  "insurance_company": "Anthem INC.",
  "insurance_number": "AG1248K"
}
```


In [53]:
print(enrollment_data)

 Here are the key entities extracted from the conversation:

```json
{
  "name": "Mark",
  "insurance_company": "Anthem INC.",
  "insurance_number": "AG1248K"
}
```


In [58]:
import re
import json

text = """
"Here are the key entities extracted from the conversation:

```json
{
  "name": "Mark",
  "insurance_company": "Anthem INC.",
  "insurance_number": "AG1248K"
}
``` "
"""

# Use regular expression to extract JSON
json_pattern = r'```json(.*?)```'
json_text = re.search(json_pattern, text, re.DOTALL).group(1)

# Remove extra whitespace
json_text = json_text.strip()

# Load JSON
json_data = json.loads(json_text)

print(json.dumps(json_data, indent=2))

{
  "name": "Mark",
  "insurance_company": "Anthem INC.",
  "insurance_number": "AG1248K"
}


In [59]:
json_pattern = r'```json(.*?)```'
extracted_json = re.search(json_pattern, enrollment_data, re.DOTALL).group(1)
extracted_json = extracted_json.strip()
print(extracted_json)

{
  "name": "Mark",
  "insurance_company": "Anthem INC.",
  "insurance_number": "AG1248K"
}


In [56]:
input1 = enrollment_data
data_cleansing_prompt = f'''Human :{HUMAN_PROMPT} Remove all unnecessary lines like 'Here are the key entities extracted from the 
                            conversation' from the provided input : {input1}
                            
                            Assistant:{AI_PROMPT}
                            '''
cleansed_data = load_claude2(bedrock_runtime,data_cleansing_prompt,0,0.9,1)
print(cleansed_data)

 Here is the cleaned up input:

```json
{
  "name": "Mark",
  "insurance_company": "Anthem INC.",
  "insurance_number": "AG1248K"
}
```


In [None]:
d1 = {"Patient Name" : '',
      "Age": '',
      "Phone Number" : '',
      "Email Address":'',
      "Insurance Provider":'',
      "Insurance Number":'',
      "Status Of Insurance":'',
      "Other demographic Details" : ''
     }

# Final Code

In [74]:
# import boto3
import json
import os
import sys
import base64
import time
import tzlocal
import re
from botocore.exceptions import ClientError
import logging

bedrock_runtime = boto3.client(
    service_name="bedrock-runtime",
    region_name="us-east-1",
)

bedrock = boto3.client(
    service_name='bedrock', 
    region_name='us-east-1'
)

##Processing data to fetch role and content
def data_preprocessing(transcription):
    convo = ""
    for i in range(len(transcription['transcriptions'])):
        convo = convo + transcription['transcriptions'][i]['ParticipantRole'] + ": " + transcription['transcriptions'][i]['Content']
        convo += "\n"
    return convo

#final_transcript = data_preprocessing(data)

##Prompt Retreival
def get_prompt(bucket,file,prompt_category,required_prompt):
    s3 = boto3.client('s3') 
    response = s3.get_object(Bucket=bucket,Key=file)
    content = response['Body'].read().decode('utf-8')
    json_content = json.loads(content)
    prompt = json_content[prompt_category][required_prompt]
    
    return prompt
###Below is the LLM model to extract entities

#Defining function to connect to Bedrock LLM
def load_claude2(bedrock_runtime , prompt , temp , top_p,top_k):
    try:
        body = {
            "prompt": prompt,
            "temperature": temp,
            "top_p": top_p,
            "top_k":top_k,
            "max_tokens_to_sample": 1000
            }

        response = bedrock_runtime.invoke_model(
            modelId="anthropic.claude-v2", body=json.dumps(body), accept="application/json", contentType="application/json"
                 )
        
        response_body = json.loads(response["body"].read())
        completion = response_body.get("completion")

        return completion

    except ClientError:
        logging.error("Couldn't invoke Claude")
        raise

#Defining prompt generator for entity extraction  -- to be removed post prompt library retreival implementation
entities = "name of patient, status of insurance, insurance number, demographic details"
def enrollment_prompt_generator(conversation,entities):
    prompt_claude = f"""{HUMAN_PROMPT} Human: {conversation}

    The above is a transcript between a call center agent and an insurance subscriber or patient. Identify and extract key entities such as {entities} from the transcript. Include only the information present.

    Output the results as a structured JSON containing only the extracted fields.
    
    Strictly Follow the rules to provide ouput in JSON format and do not provide the extra sentence 'Here are the key entities extracted from the conversation before the JSON' as part of your response.

    Assistant:{AI_PROMPT}
    """

    return prompt_claude

bucket = 'ch-agent-assist-prompt-library-bucket'
file_key = 'prompts_library.json' 
# Lambda handler to intgerate with AWS
def lambda_handler1(data):
    final_transcript = data_preprocessing(data)
    #prompt_enrollment = get_prompt(bucket,file_key,"entity_extraction","enrollment_form_claude")
    prompt_enrollment = enrollment_prompt_generator(final_transcript,entities)
    #print(prompt_enrollment)
    enrollment_data = load_claude2(bedrock_runtime,prompt_enrollment,0,0.9,1)

    pattern = re.compile(r"```json\n([\s\S]*?)\n```", re.MULTILINE)
    json_content = {}
    match = pattern.search(enrollment_data)
    if match:
        json_content = match.group(1)
        #print(json_content)
    else:
        print("No JSON content found.")
    #enrollment_json_object = json.loads(enrollment_data)
    return enrollment_data#json_content#{"statusCode": 200,"body": json.dumps(enrollment_json_object)}

d1 = lambda_handler1(data)
print(d1)

 Here are the key entities extracted from the conversation:

```json
{
  "name": "Mark",
  "insurance_company": "Anthem INC.", 
  "insurance_number": "AG1248K"
}
```


# Dynamic Array Testing

In [61]:
entity_dict = {
  "patient_information": {
    "first_name": "",
    "middle_initial": "",
    "last_name": "",
    "dob": "",
    "gender": "",
    "preferred_language": ""
  },
  "address": {
    "street_address": "",
    "street_name": "",
    "city": "",
    "state": "",
    "zip_code": ""
  },
  "contact_information": {
    "email": "",
    "phone": "",
    "phone_type": ""
  },
  "medical_information": {
    "prior_therapy": "",
    "diagnosis": {
      "diagnosis_name": "",
      "diagnosis_icd_10_code": "",
      "date_of_diagnosis": ""
    }
  },
  "insurance_information": {
    "primary_rx_insurance": {
      "payer_name": "",
      "insurance_id": "",
      "plan_type": "",
      "effective_date": "",
      "expiry_date": ""
    },
    "record_type": "",
    "pharmacy_benefit_information": {
      "rx_bin": "",
      "rx_group": "",
      "rx_pcn": ""
    },
    "card_holder_information": {
      "relationship_with_patient": "",
      "name": "",
      "dob": ""
    }
  },
  "prescriber_information": {
    "prescriber_name": "",
    "specialty": "",
    "address": "",
    "facility_name": ""
  },
  "specialty_pharmacy_information": {
    "sp_name": "",
    "sp_phone": "",
    "sp_fax": ""
  }
}

In [65]:
entity_category = list(entity_dict.keys())
sub_category = [list(entity_dict[val].keys()) for val in entity_category]
print(sub_category)

[['first_name', 'middle_initial', 'last_name', 'dob', 'gender', 'preferred_language'], ['street_address', 'street_name', 'city', 'state', 'zip_code'], ['email', 'phone', 'phone_type'], ['prior_therapy', 'diagnosis'], ['primary_rx_insurance', 'record_type', 'pharmacy_benefit_information', 'card_holder_information'], ['prescriber_name', 'specialty', 'address', 'facility_name'], ['sp_name', 'sp_phone', 'sp_fax']]


In [67]:
for value in entity_category:
    print(value)

patient_information
address
contact_information
medical_information
insurance_information
prescriber_information
specialty_pharmacy_information


In [69]:
for val in sub_category:
        print(val)

['first_name', 'middle_initial', 'last_name', 'dob', 'gender', 'preferred_language']
['street_address', 'street_name', 'city', 'state', 'zip_code']
['email', 'phone', 'phone_type']
['prior_therapy', 'diagnosis']
['primary_rx_insurance', 'record_type', 'pharmacy_benefit_information', 'card_holder_information']
['prescriber_name', 'specialty', 'address', 'facility_name']
['sp_name', 'sp_phone', 'sp_fax']


In [76]:
entity_prompt = "Identify and extract the following sub category entities under the following categories from the transcript :" + "\n"
for key in entity_category:
    entity_prompt += key + " : " +"\n"
    for value in sub_category:
        for val in value:
            entity_prompt += val + "\n"#+ " : " + "\n"
    entity_prompt += "\n" + "\n"
print(entity_prompt)

Identify and extract the following sub category entities under the following categories from the transcript :
patient_information : 
first_name
middle_initial
last_name
dob
gender
preferred_language
street_address
street_name
city
state
zip_code
email
phone
phone_type
prior_therapy
diagnosis
primary_rx_insurance
record_type
pharmacy_benefit_information
card_holder_information
prescriber_name
specialty
address
facility_name
sp_name
sp_phone
sp_fax


address : 
first_name
middle_initial
last_name
dob
gender
preferred_language
street_address
street_name
city
state
zip_code
email
phone
phone_type
prior_therapy
diagnosis
primary_rx_insurance
record_type
pharmacy_benefit_information
card_holder_information
prescriber_name
specialty
address
facility_name
sp_name
sp_phone
sp_fax


contact_information : 
first_name
middle_initial
last_name
dob
gender
preferred_language
street_address
street_name
city
state
zip_code
email
phone
phone_type
prior_therapy
diagnosis
primary_rx_insurance
record_type


In [77]:
def enrollment_prompt_generator(conversation,entity_prompt):
    prompt_claude = f"""{HUMAN_PROMPT} Human: {conversation}

    The above is a transcript between a call center agent and an insurance subscriber or patient.{entity_prompt}.Include only the information present in the provided transcript.

    Output the results as a structured JSON containing only the extracted fields.
    
    Strictly Follow the rules to provide ouput in JSON format and do not provide the extra sentence 'Here are the key entities extracted from the conversation before the JSON' as part of your response.

    Assistant:{AI_PROMPT}
    """

    return prompt_claude

In [78]:
final_transcript = data_preprocessing(data)
print(final_transcript)
print("\n")
prompt_enrollment = enrollment_prompt_generator(final_transcript,entity_prompt)
enrollment_data = load_claude2(bedrock_runtime,prompt_enrollment,0,0.9,1)
print(enrollment_data)

CUSTOMER: Hello I'm Mark
AGENT: Hello,may I know who is your Insurance with?
CUSTOMER: Sure,it's with Anthem INC.
AGENT: Hello,may I know what is your Insurance Number and your age too?
CUSTOMER: Sure,it's AG1248K
AGENT: Thanks for contacting us,we'll get back to you shortly.



 Here are the key entities extracted from the conversation:

{
  "patient_information": {
    
  },
  "address": {
    
  },  
  "contact_information": {
    "first_name": "Mark"
  },
  "medical_information": {
    
  },
  "insurance_information": {
    "primary_rx_insurance": "Anthem INC."
  },
  "prescriber_information": {
    
  },
  "specialty_pharmacy_information": {
    
  }
}


# Removing Categories and Testing

In [81]:
entity_dict_v1 = {
   "first_name": "",
    "middle_initial": "",
    "last_name": "",
    "dob": "",
    "gender": "",
    "preferred_language": "",
    "street_address": "",
    "street_name": "",
    "city": "",
    "state": "",
    "zip_code": "",
    "email": "",
    "phone": "",
    "phone_type": "",
    "prior_therapy": "",
    "diagnosis_name": "",
    "diagnosis_icd_10_code": "",
    "date_of_diagnosis": "",
    "payer_name": "",
    "insurance_id": "",
    "plan_type": "",
    "effective_date": "",
    "expiry_date": "",
    "record_type": "",
    "rx_bin": "",
    "rx_group": "",
    "rx_pcn": "",
    "card_holder_relationship_with_the_patient":"",
    "card_holder_name":"",
    "card_holder_dob":"",
    "prescriber_name": "",
    "specialty": "",
    "address": "",
    "facility_name": "",
    "sp_name": "",
    "sp_phone": "",
    "sp_fax": ""
  }

In [83]:
entity_list = list(entity_dict_v1.keys())
entities = ""
for val in entity_list:
    entities += val + "," 
entities = entities[:len(entities)-1]
print(entities)

first_name,middle_initial,last_name,dob,gender,preferred_language,street_address,street_name,city,state,zip_code,email,phone,phone_type,prior_therapy,diagnosis_name,diagnosis_icd_10_code,date_of_diagnosis,payer_name,insurance_id,plan_type,effective_date,expiry_date,record_type,rx_bin,rx_group,rx_pcn,card_holder_relationship_with_the_patient,card_holder_name,card_holder_dob,prescriber_name,specialty,address,facility_name,sp_name,sp_phone,sp_fax


In [86]:
def enrollment_prompt_generator(conversation,entities):
    prompt_claude = f"""Human: {conversation}

    The above is a transcript between a call center agent and an insurance subscriber or patient. Identify and extract key entities such as {entities} from the transcript. Include only the information present.

    Output the results as a structured JSON containing only the extracted fields.
    
    Strictly Follow the rules to provide ouput in JSON format and do not provide the extra sentence 'Here are the key entities extracted from the conversation before the JSON' as part of your response.

    Assistant:
    """

    return prompt_claude

In [87]:
final_transcript = data_preprocessing(data)
print(final_transcript)
print("\n")
prompt_enrollment = enrollment_prompt_generator(final_transcript,entities)
enrollment_data = load_claude2(bedrock_runtime,prompt_enrollment,0,0.9,1)
print(enrollment_data)

CUSTOMER: Hello I'm Mark
AGENT: Hello,may I know who is your Insurance with?
CUSTOMER: Sure,it's with Anthem INC.
AGENT: Hello,may I know what is your Insurance Number and your age too?
CUSTOMER: Sure,it's AG1248K
AGENT: Thanks for contacting us,we'll get back to you shortly.



 {
  "first_name": "Mark",
  "last_name": null,
  "middle_initial": null,
  "dob": null,
  "gender": null,
  "preferred_language": null,
  "street_address": null,
  "street_name": null,
  "city": null,
  "state": null,
  "zip_code": null,
  "email": null,
  "phone": null,
  "phone_type": null,
  "prior_therapy": null,
  "diagnosis_name": null,
  "diagnosis_icd_10_code": null,
  "date_of_diagnosis": null,
  "payer_name": "Anthem INC",
  "insurance_id": "AG1248K",
  "plan_type": null,
  "effective_date": null,
  "expiry_date": null,
  "record_type": null,
  "rx_bin": null,
  "rx_group": null,
  "rx_pcn": null,
  "card_holder_relationship_with_the_patient": null,
  "card_holder_name": null,
  "card_holder_dob": nu

In [88]:
print(enrollment_data)

 {
  "first_name": "Mark",
  "last_name": null,
  "middle_initial": null,
  "dob": null,
  "gender": null,
  "preferred_language": null,
  "street_address": null,
  "street_name": null,
  "city": null,
  "state": null,
  "zip_code": null,
  "email": null,
  "phone": null,
  "phone_type": null,
  "prior_therapy": null,
  "diagnosis_name": null,
  "diagnosis_icd_10_code": null,
  "date_of_diagnosis": null,
  "payer_name": "Anthem INC",
  "insurance_id": "AG1248K",
  "plan_type": null,
  "effective_date": null,
  "expiry_date": null,
  "record_type": null,
  "rx_bin": null,
  "rx_group": null,
  "rx_pcn": null,
  "card_holder_relationship_with_the_patient": null,
  "card_holder_name": null,
  "card_holder_dob": null,
  "prescriber_name": null,
  "specialty": null,
  "address": null,
  "facility_name": null,
  "sp_name": null,
  "sp_phone": null,
  "sp_fax": null
}


In [89]:
prompt_enrollment = enrollment_prompt_generator(final_transcript,entities)
enrollment_data = load_claude2(bedrock_runtime,prompt_enrollment,0,0.9,1)
print(enrollment_data)

 {
  "first_name": "Mark",
  "last_name": null,
  "middle_initial": null,
  "dob": null,
  "gender": null,
  "preferred_language": null,
  "street_address": null,
  "street_name": null,
  "city": null,
  "state": null,
  "zip_code": null,
  "email": null,
  "phone": null,
  "phone_type": null,
  "prior_therapy": null,
  "diagnosis_name": null,
  "diagnosis_icd_10_code": null,
  "date_of_diagnosis": null,
  "payer_name": "Anthem INC",
  "insurance_id": "AG1248K",
  "plan_type": null,
  "effective_date": null,
  "expiry_date": null,
  "record_type": null,
  "rx_bin": null,
  "rx_group": null,
  "rx_pcn": null,
  "card_holder_relationship_with_the_patient": null,
  "card_holder_name": null,
  "card_holder_dob": null,
  "prescriber_name": null,
  "specialty": null,
  "address": null,
  "facility_name": null,
  "sp_name": null,
  "sp_phone": null,
  "sp_fax": null
}


In [90]:
#json_pattern = r'```json(.*?)```'
#extracted_json = re.search(json_pattern, enrollment_data, re.DOTALL).group(1)
extracted_json = enrollment_data.strip()
print(extracted_json)

{
  "first_name": "Mark",
  "last_name": null,
  "middle_initial": null,
  "dob": null,
  "gender": null,
  "preferred_language": null,
  "street_address": null,
  "street_name": null,
  "city": null,
  "state": null,
  "zip_code": null,
  "email": null,
  "phone": null,
  "phone_type": null,
  "prior_therapy": null,
  "diagnosis_name": null,
  "diagnosis_icd_10_code": null,
  "date_of_diagnosis": null,
  "payer_name": "Anthem INC",
  "insurance_id": "AG1248K",
  "plan_type": null,
  "effective_date": null,
  "expiry_date": null,
  "record_type": null,
  "rx_bin": null,
  "rx_group": null,
  "rx_pcn": null,
  "card_holder_relationship_with_the_patient": null,
  "card_holder_name": null,
  "card_holder_dob": null,
  "prescriber_name": null,
  "specialty": null,
  "address": null,
  "facility_name": null,
  "sp_name": null,
  "sp_phone": null,
  "sp_fax": null
}
