<p style="background-color:orange; padding:15px;"> <b>Write File: </b>Exceptions File</p>


In [1]:
%%writefile exception.py

import sys

def format_error_message(error, error_detail: sys):
    """
    Formats error message with details like file name and line number where the error occurred.
    
    :param error: Exception object
    :param error_detail: sys.exc_info() for accessing error details
    :return: Formatted error message as a string
    """
    _, _, exc_tb = error_detail.exc_info()

    file_name = exc_tb.tb_frame.f_code.co_filename
    line_number = exc_tb.tb_lineno

    # ANSI escape codes for colors
    color_red = "\033[91m"
    color_green = "\033[92m"
    color_end = "\033[0m"  # Reset color to default

    error_message = "{color_red}Error occurred in Python script '{0}', line {1}: {2}{color_end}".format(
        file_name, line_number, str(error), color_red=color_red, color_end=color_end
    )

    return error_message

class CustomException(Exception):

    def __init__(self, error_message, error_detail: sys):
        """
        :param error_message: Error message in string format
        :param error_detail: sys.exc_info() for accessing error details
        """
        super().__init__(error_message)

        self.error_message = format_error_message(
            error_message, error_detail=error_detail
        )

    def __str__(self):
        """
        Overrides the string representation of the exception.
        
        :return: Formatted error message
        """
        return self.error_message


Overwriting exception.py


<p style="background-color:orange; padding:15px;"> <b>Write File: </b>Logger File</p>

In [2]:
%%writefile logger.py

import os
import logging
from datetime import datetime
import webbrowser


LOG_DIR = os.path.join(os.getcwd(), "logs")
os.makedirs(LOG_DIR, exist_ok=True)

LOG_FILE = f"{datetime.now().strftime('%m_%d_%Y_%H_%M_%S')}.log"
LOG_FILE_PATH = os.path.join(LOG_DIR, LOG_FILE)

logging.basicConfig(
    filename=LOG_FILE_PATH,
    format="[%(asctime)s] %(levelname)s [%(module)s:%(lineno)d] - %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
    level=logging.INFO
)
def open_log():
    webbrowser.open(LOG_FILE_PATH)


Overwriting logger.py


<p style="background-color: #153E7E; padding:15px;"> <b></b> Import Modules</p>

In [19]:
import os
import pprint

from tqdm import tqdm
from dotenv import load_dotenv

from langchain.chat_models import ChatOpenAI
from langchain_nvidia_ai_endpoints import ChatNVIDIA
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain import PromptTemplate
from langchain_core.prompts import PromptTemplate, load_prompt
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers.json import SimpleJsonOutputParser 

In [4]:
from logger import logging
from exception import CustomException

In [5]:
import warnings
warnings.filterwarnings("ignore")

load_dotenv()

True

In [6]:
%%writefile .env

# For "gemma 7b"
NVIDIA_API_KEY = "<Insert Key>"

# For "meta/llama3-8b-instruct"

OPENAI_API_KEY= "<Insert Key>"
GOOGLE_API_KEY = "<Insert Key>"

Overwriting .env


In [7]:
import os 
from dotenv import load_dotenv

load_dotenv()
openai_api_key = os.getenv("NVIDIA_API_KEY")
if not openai_api_key:
    raise ValueError("NVIDIA_API_KEY not found in environment variables.")


In [9]:
# pip install -U langchain-nvidia-ai-endpoints

In [11]:
import pandas as pd

transcript_df = pd.read_excel("transcript_shuffled.xlsx")
transcript_df

Unnamed: 0.1,Unnamed: 0,ID,Transcript,Category,DX code
0,245,653,"""This is Nurse Thompson at Lakeside Hospital. ...",Level 2 interfacility transfer,Dx01123
1,469,71,"""My friend has a known allergy to soy and acci...",Anaphylaxis,Dx1010
2,317,213,"""My wife just stopped seizing, but she's confu...",Fitting now,Dx0103
3,20,17,"""A man at the restaurant just fell and isn't r...",Cardiac Arrest,Dx010
4,318,378,I slipped on the ice and my leg buckled. I thi...,Potential Broken Arm/Leg,Dx02
...,...,...,...,...,...
795,633,171,My wife is unconscious! She's been complaining...,Unconsciousness,Dx0107
796,766,759,"Hello, I need an ambulance. My wife, she's bee...",Unknown,UNK
797,205,345,"""It's not a big deal, I'll be fine.""",Refused care,Dx0321
798,305,681,"""This is Nurse Miller from Willow Creek Hospit...",Level 2 interfacility transfer,Dx01123


In [12]:
# from langchain_nvidia_ai_endpoints import ChatNVIDIA

# llm = ChatNVIDIA(model="google/gemma-7b") #"mixtral_8x7b")
# result = llm.invoke("Write a ballad about LangChain.")
# print(result.content)

In [13]:
CALL_PROMPT_FILE_NAME: str = "CALL_PROMPT.yaml"
CALL_SUMMARIZATION_PROMPT_FILENAME: str = "SUMMARIZATION_PROMPT.yaml"
DEFAULT_MODEL_NAME: str = "gpt-4-turbo"
# "google/gemma-7b"
#"gpt-4-turbo"

In [14]:
CALL_PROMPT = """ 

Role: As a Language Model Assistant in an ambulance healthcare setting, your role is to assist with predicting call categories by accurately analyzing patient call transcripts to expedite the triage process.
    Vision:  You are extracting key information based on the Patient transcripts.
    Mission: Provide concise, relevant, and clear summaries of patient issues based on the patient transcripts, ensuring that they are classified appropriately and directed to the correct team for immediate attention. 

    While answering, for Severity Levels, you can choose a severity based on the context below:

    Low Severity:
        - Genito-Urinary conditions
        - Dental problem
        - Failed contraception
        - Palliative Care
        - Suicidal
    Medium Severity:
        - Level 2 interfacility transfer
        - Acute coronary syndrome
        - Refused Ambulance disposition
        - Potential Broken Arm/Leg
        - Covid symptoms with respiratory distress
    High Severity:
        - Major Blood Loss
        - Fitting now
        - Unconsciousness
        - Anaphylaxis
        - Cardiac arrest
        
    Provide the output in JSON format with the following keys:
        - Issue: (string) - Brief description of the patient's concern.
        - Issue Summary: (string, less than 20 words) - Concise overview of the problem.
        - Severity: ( Low , Medium , High ) - Urgency of the patient's situation.
        - Clinician to Contact: (general, support, technical) - If the transcript contains 3 or more “Not Sure” Keywords or “I don’t know” In it then output “Clinician to Contact Patient”
    
    Note: Before answering a question, ensure that the provided transcript is related to a reporting an issue or a similar scenario and not anything else apart from this. If not, then output 'Invalid' for all keys.

    Transcript:
    {Transcript}

"""

SUMMARIZATION_PROMPT = \
"""Please summarize the following text, capturing the key points and main concerns expressed in less than 10 words:

{Transcript}

Make sure to include relevant details, such as symptoms, background information, current situations, and any specific requests or questions posed.
"""

In [15]:
# os.environ['GOOGLE_API_KEY'] = ""
# os.environ['OPENAI_API_KEY'] = ""
# os.environ['NVIDIA_API_KEY'] = ""

In [16]:
def save_prompt(template: str, input_variables: list, file_name: str):
    """
    Save a prompt template to a file and then load it back.

    Args:
        template (str): The template string for the prompt.
        input_variables (list): List of input variables used in the template.
        file_name (str): The name of the file to save the prompt template to.

    """
    prompt = PromptTemplate(
        template=template,
        input_variables=input_variables,
    )

    prompt.save(file_name)
    print(f"Prompt template saved to {file_name}")
    
    return 0

In [17]:
save_prompt(CALL_PROMPT, [], CALL_PROMPT_FILE_NAME)
save_prompt(SUMMARIZATION_PROMPT, [], CALL_SUMMARIZATION_PROMPT_FILENAME)

Prompt template saved to CALL_PROMPT.yaml
Prompt template saved to SUMMARIZATION_PROMPT.yaml


0

In [18]:
PROCESSED_DATA = []
RAW_SUMMARY_OUTPUT = []
RAW_CLASSIFIER_OUTPUT = []

In [20]:

class CallDataProcessor:

    def __init__(self, model_name, summarization_prompt_path, classifier_prompt_path):
        self.summarization_prompt = load_prompt(summarization_prompt_path)
        self.classifier_prompt = load_prompt(classifier_prompt_path)
        self.model_name = model_name 

    def process_input(self, input_data):
        try:

            llm = ChatOpenAI(model=self.model_name, temperature=0)
            llm_json = ChatOpenAI(model=self.model_name, temperature=0, model_kwargs={'response_format': {"type": "json_object"}})

            # llm = ChatNVIDIA(model=self.model_name, temperature=0) #Uncomment ChatNVIDIA and Comment ChatOPENAI, if using with NVDIA API
            # llm_json = ChatNVIDIA(model=self.model_name, temperature=0, model_kwargs={'response_format': {"type": "json_object"}})
            
            # Create the classifier and summary chains
            classifier_chain = self.classifier_prompt | llm_json | SimpleJsonOutputParser()
            classifier_result = classifier_chain.invoke(input_data)
            
            summary_chain = self.summarization_prompt | llm
            summary_result = summary_chain.invoke(input_data).content
            
            logging.info(f"Transcript: {input_data}|\
                        {self.model_name} - Summary Output: {summary_result}|\
                        {self.model_name} - Classifier Output: {classifier_result}\n")
           
            
            # Structure the results
            result = {
                "Transcript": input_data,
                f"{self.model_name} - Summary": summary_result,
                f"{self.model_name} - Issue": classifier_result['Issue'],
                f"{self.model_name} - Issue Summary": classifier_result['Issue Summary'],
                f"{self.model_name} - Severity": classifier_result['Severity'],
                f"{self.model_name} - Clinician to Contact": classifier_result['Clinician to Contact']
            }
        
            
            RAW_SUMMARY_OUTPUT.append(summary_result)
            RAW_CLASSIFIER_OUTPUT.append(classifier_result)
            
            PROCESSED_DATA.append(result) 
        
            return result      
    
        except Exception as e:
            print(f"An error occurred: {e}")
            return None

In [21]:
classifier_prompt_path= CALL_PROMPT_FILE_NAME
summarization_prompt_path = CALL_SUMMARIZATION_PROMPT_FILENAME

# processor = CallDataProcessor(DEFAULT_MODEL_NAME, summarization_prompt_path, classifier_prompt_path)

In [22]:
# input = "Please hurry, she's unconscious on the floor! I tried checking her pulse, but there's nothing. We've already called 911, but she needs help now! I think she might have hit her head when she fell. Is there anything we can do while we wait for the ambulance?"

In [23]:
# llm_response = processor.process_input(input)
# llm_response

In [None]:
DEFAULT_MODEL_NAME: str = "google/gemma-7b"
# "meta/llama3-8b-instruct"
# "meta/llama3-70b"
# "meta/llama3-70b-instruct" 
# "meta/llama3-70b"
#"mixtral_8x7b"

# "google/gemma-7b"

In [25]:
NUM_RECORDS = 200

# Comment out the following line to use the whole dataset as input.
df=transcript_df[:NUM_RECORDS]


df.reset_index(inplace=True)
df

Unnamed: 0.1,index,Unnamed: 0,ID,Transcript,Category,DX code
0,0,245,653,"""This is Nurse Thompson at Lakeside Hospital. ...",Level 2 interfacility transfer,Dx01123
1,1,469,71,"""My friend has a known allergy to soy and acci...",Anaphylaxis,Dx1010
2,2,317,213,"""My wife just stopped seizing, but she's confu...",Fitting now,Dx0103
3,3,20,17,"""A man at the restaurant just fell and isn't r...",Cardiac Arrest,Dx010
4,4,318,378,I slipped on the ice and my leg buckled. I thi...,Potential Broken Arm/Leg,Dx02
...,...,...,...,...,...,...
195,195,513,135,"""My friend's dental implant fell out and is ca...",Dental Problem,Dx019
196,196,493,151,My husband just collapsed! He's not breathing ...,Unconsciousness,Dx0107
197,197,38,137,"""My partner's dental filling came out and is c...",Dental Problem,Dx019
198,198,255,448,"""My girlfriend's contraceptive shot wore off, ...",Failed Contraception,Dx329


In [27]:
PROCESSED_DATA
PROCESSED_DATA = []

[]

In [28]:
processor = CallDataProcessor(DEFAULT_MODEL_NAME, summarization_prompt_path, classifier_prompt_path)

for index, row in tqdm(df.iterrows(), total=len(df), desc="Processing"):
    notes = row['Transcript']
    processor.process_input(notes)

Processing: 100%|██████████| 200/200 [12:39<00:00,  3.80s/it]


In [29]:
PROCESSED_DATA

[{'Transcript': '"This is Nurse Thompson at Lakeside Hospital. We have a patient, Mr. David Brown, in room 205, who requires an urgent transfer. He\'s exhibiting signs of a stroke."',
  'gpt-4-turbo - Summary': 'Urgent transfer needed for stroke patient in room 205.',
  'gpt-4-turbo - Issue': 'Urgent patient transfer required for potential stroke',
  'gpt-4-turbo - Issue Summary': 'Patient showing stroke symptoms, needs urgent transfer.',
  'gpt-4-turbo - Severity': 'High',
  'gpt-4-turbo - Clinician to Contact': 'general'},
 {'Transcript': '"My friend has a known allergy to soy and accidentally ingested some. Please send help fast!"',
  'gpt-4-turbo - Summary': 'Friend ingested soy, has allergy, needs immediate help.',
  'gpt-4-turbo - Issue': 'Accidental ingestion of allergen (soy)',
  'gpt-4-turbo - Issue Summary': 'Friend ingested soy, has known allergy',
  'gpt-4-turbo - Severity': 'High',
  'gpt-4-turbo - Clinician to Contact': 'general'},
 {'Transcript': '"My wife just stopped s

In [30]:
RAW_SUMMARY_OUTPUT

['Urgent transfer needed for stroke patient in room 205.',
 'Friend ingested soy, has allergy, needs immediate help.',
 'Post-seizure confusion; memory loss; seeks normalcy confirmation.',
 'Man fell, unresponsive, no pulse at restaurant; needs urgent help.',
 'Slipped on ice, leg injury, possible break.',
 'Daughter grunts during seizure; parent questions normalcy.',
 'Person fears hospital solitude; seeks companionship.',
 'Accident victim bleeding, urgently needs help.',
 'Daughter with asthma history has chest pain, breathing issues.',
 'Friend had seizure, memory loss, disoriented.',
 'Sister unconscious, previously hallucinating.',
 "Sister ate sesame, can't breathe, needs ambulance.",
 'Elderly parent with severe flank pain, needs ambulance now.',
 'Parent lacks childcare support.',
 'Child with tooth abscess in pain; needs ambulance.',
 'Wife fell, not breathing, no pulse; needs urgent ambulance.',
 'Requesting transfer for severely dehydrated patient with electrolyte imbalance

In [31]:
RAW_CLASSIFIER_OUTPUT

[{'Issue': 'Urgent patient transfer required for potential stroke',
  'Issue Summary': 'Patient showing stroke symptoms, needs urgent transfer.',
  'Severity': 'High',
  'Clinician to Contact': 'general'},
 {'Issue': 'Accidental ingestion of allergen (soy)',
  'Issue Summary': 'Friend ingested soy, has known allergy',
  'Severity': 'High',
  'Clinician to Contact': 'general'},
 {'Issue': "Post-seizure confusion in patient's wife",
  'Issue Summary': 'Wife experienced seizure, now confused and amnesic.',
  'Severity': 'High',
  'Clinician to Contact': 'general'},
 {'Issue': 'Man unresponsive with no pulse at a restaurant',
  'Issue Summary': 'Unresponsive man, no pulse, emergency required',
  'Severity': 'High',
  'Clinician to Contact': 'general'},
 {'Issue': 'Possible broken leg due to slipping on ice',
  'Issue Summary': 'Patient suspects broken leg after slipping',
  'Severity': 'Medium',
  'Clinician to Contact': 'general'},
 {'Issue': 'Daughter experiencing seizure with grunting n

In [32]:
dfs = pd.DataFrame(PROCESSED_DATA)
dfs

Unnamed: 0,Transcript,gpt-4-turbo - Summary,gpt-4-turbo - Issue,gpt-4-turbo - Issue Summary,gpt-4-turbo - Severity,gpt-4-turbo - Clinician to Contact
0,"""This is Nurse Thompson at Lakeside Hospital. ...",Urgent transfer needed for stroke patient in r...,Urgent patient transfer required for potential...,"Patient showing stroke symptoms, needs urgent ...",High,general
1,"""My friend has a known allergy to soy and acci...","Friend ingested soy, has allergy, needs immedi...",Accidental ingestion of allergen (soy),"Friend ingested soy, has known allergy",High,general
2,"""My wife just stopped seizing, but she's confu...",Post-seizure confusion; memory loss; seeks nor...,Post-seizure confusion in patient's wife,"Wife experienced seizure, now confused and amn...",High,general
3,"""A man at the restaurant just fell and isn't r...","Man fell, unresponsive, no pulse at restaurant...",Man unresponsive with no pulse at a restaurant,"Unresponsive man, no pulse, emergency required",High,general
4,I slipped on the ice and my leg buckled. I thi...,"Slipped on ice, leg injury, possible break.",Possible broken leg due to slipping on ice,Patient suspects broken leg after slipping,Medium,general
...,...,...,...,...,...,...
195,"""My friend's dental implant fell out and is ca...","Dental implant fell out; causing pain, bleedin...",Dental implant fell out causing pain and bleeding,Intense pain and bleeding from dental implant ...,Low,general
196,My husband just collapsed! He's not breathing ...,"Husband collapsed, not breathing, unresponsive.",Husband collapsed and is unresponsive,"Husband collapsed, not breathing, unresponsive",High,general
197,"""My partner's dental filling came out and is c...","Partner's filling out, in severe pain, needs a...",Dental filling came out causing severe pain,Excruciating pain from lost dental filling.,Low,general
198,"""My girlfriend's contraceptive shot wore off, ...",Girlfriend fears pregnancy after contraceptive...,Concern about failed contraception,"Girlfriend's contraceptive shot wore off, worr...",Low,general


In [33]:
dfs.to_excel("gpt-4-turbo-shuffled_transcript-F200.xlsx")

In [None]:
# import pandas as pd
# from datetime import datetime
# import getpass

# def save_data(df,comments):
#     try:
#         current_datetime = datetime.now().strftime('%Y%m%d_%H%M%S')
#         username = getpass.getuser()
#         filename = f'{comments}_output{username}_{current_datetime}.xlsx'
#         df = pd.DataFrame(PROCESSED_DATA)
#         df.to_csv(filename, index=False)
#         print(f"DataFrame saved as {filename}")
#     except Exception as e:
#         print(f"An error occurred: {e}")



In [None]:
# save_data(dfs,"mixtral_8x7b")

In [None]:
# transcript_df_shuffled = transcript_df.sample(frac=1).reset_index(drop=True)
# transcript_df_shuffled.to_excel("transcript_shuffled.xlsx", index=False)
# transcript_df_shuffled