#### Import Libraries

In [63]:
import numpy as np 
import pandas as pd
import json
import os 
from dotenv import load_dotenv  

In [65]:
load_dotenv()

True

In [68]:
os.environ['GEMINI_API_KEY'] = os.getenv('GEMINI_API_KEY')
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')
os.environ['GROQ_API_KEY'] = os.getenv('GROQ_API_KEY')

#### Load Data 

In [2]:
maude_data = pd.read_excel(r'Data\fda_device_data - Subset.xlsx')

In [3]:
maude_data.head(2)

Unnamed: 0,event_type,date_of_event,product_problems,complaint_txt,follow_up,manufacturer_narrative,device_generic_name,device_manufacturer_name,device_model_number,device_lot_number,device_report_product_code,expiration_date_of_device,date_returned_to_manufacturer,device_availability,device_operator,device_name
0,Malfunction,20200418,"['Battery Problem', 'Power Problem']",INFORMATION RECEIVED BY MEDTRONIC INDICATED TH...,,(B)(4). CURRENTLY IT IS UNKNOWN WHETHER OR NOT...,"ARTIFICIAL PANCREAS DEVICE SYSTEM, THRESHOLD S...",MEDTRONIC PUERTO RICO OPERATIONS CO.,MMT-1715K,HG1B44Y,OZO,,20200429.0,Device was returned to manufacturer,LAY USER/PATIENT,"Automated Insulin Dosing , Threshold Suspend"
1,Malfunction,20200421,"['Excess Flow or Over-Infusion', 'Battery Prob...",INFORMATION RECEIVED BY MEDTRONIC INDICATED TH...,,CURRENTLY IT IS UNKNOWN WHETHER OR NOT THE DEV...,"PUMP, INFUSION, INSULIN, TO BE USED WITH INVAS...",MEDTRONIC MINIMED,MMT-XXX,,OYC,,,No,LAY USER/PATIENT,"Pump, Infusion, Insulin, To Be Used With Invas..."


#### Data Pre-Processing

In [4]:
maude_subset = maude_data[['product_problems', 'complaint_txt', 'follow_up', 'manufacturer_narrative']]
maude_subset.head(2)

Unnamed: 0,product_problems,complaint_txt,follow_up,manufacturer_narrative
0,"['Battery Problem', 'Power Problem']",INFORMATION RECEIVED BY MEDTRONIC INDICATED TH...,,(B)(4). CURRENTLY IT IS UNKNOWN WHETHER OR NOT...
1,"['Excess Flow or Over-Infusion', 'Battery Prob...",INFORMATION RECEIVED BY MEDTRONIC INDICATED TH...,,CURRENTLY IT IS UNKNOWN WHETHER OR NOT THE DEV...


In [17]:
for index, item in maude_subset.iterrows():
    maude_subset.loc[index, 'event_info'] = 'product_problems - ' + ' ' + str(item.product_problems) + ', ' + 'complaint_text-' + ' ' +str(item.complaint_txt) + ', ' + 'complaint_follow_up-' + ' ' + str(item.follow_up) + ', '+'manufacturer_narrative-'+ ' ' + str(item.manufacturer_narrative)

In [18]:
maude_subset.head(2)

Unnamed: 0,product_problems,complaint_txt,follow_up,manufacturer_narrative,event_info
0,"['Battery Problem', 'Power Problem']",INFORMATION RECEIVED BY MEDTRONIC INDICATED TH...,,(B)(4). CURRENTLY IT IS UNKNOWN WHETHER OR NOT...,"product_problems - ['Battery Problem', 'Power..."
1,"['Excess Flow or Over-Infusion', 'Battery Prob...",INFORMATION RECEIVED BY MEDTRONIC INDICATED TH...,,CURRENTLY IT IS UNKNOWN WHETHER OR NOT THE DEV...,product_problems - ['Excess Flow or Over-Infu...


#### Schema Definition & Prompt Template Creation

In [60]:
from langchain.output_parsers import ResponseSchema, StructuredOutputParser
from langchain_core.output_parsers import StrOutputParser
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_groq import ChatGroq
from phi.model.google import Gemini
from phi.tools.duckduckgo import DuckDuckGo
from tqdm import tqdm

In [72]:
prompt_info = '''You are an expert in identifying and categorizing technical & medical device related risks and issues in text. 
You are tasked with extracting structured information from text related to medical device failures. 
Your goal is to extract and organize the following fields from the provided content:

Hazard: A single-word or concise description of the unique hazard (e.g., "Battery Depletion"). This should not be a sentence.
Hazardous Situation: Detailed description of the situation, capturing the context from complaint text, follow-up, and manufacturer narrative.
Harm/Potential Harm: A paragraph explaining the potential harm or adverse outcomes that could result from the issue in the given context.
Manufacturer Name: A list of all unique manufacturer names mentioned in the provided data.
Format the output as follows:

Hazard: [Single word or short phrase]
Hazardous Situation: [Detailed description]
Harm/Potential Harm: [Paragraph explaining potential harm]
Manufacturer Name: [Unique manufacturer names as a list]
'''

In [94]:
prompt_info_new = '''

You are a highly skilled medical device safety expert with extensive knowledge in risk assessment and failure analysis. Analyze the provided data and structure your response as follows:

### 1. Hazard
- Identify the unique hazard using a single word or a concise phrase.
- Avoid full sentences and ensure it is a precise descriptor.

### 2. Hazardous Situation
- Provide a detailed description of the situation, including context from complaint text, follow-up, and manufacturer narrative.
- Focus on capturing the specific conditions leading to the hazard.

### 3. Harm/Potential Harm
- Write a paragraph explaining the potential harm or adverse outcomes that could result from the identified hazard in the given context.
- Be thorough in describing the potential risks and their impacts.

### 4. Manufacturer Name
- Extract and list all unique manufacturer names mentioned in the data.
- Ensure the list includes only distinct names with no duplicates.

### 5. Contextual Research (Optional)
- If requested, use external sources to:
  - Provide examples of similar hazards in medical devices.
  - Search for risk mitigation strategies.
  - Cite recent articles or research supporting the analysis.

Format your response using clear markdown headers and bullet points. Be precise, structured, and thorough in your analysis.'''

In [74]:
independent_schema = [
    ResponseSchema(name="Hazard", description="A unique descriptor of the hazard, typically a single word or concise phrase."),
    ResponseSchema(name="Hazardous Situation", description="A detailed description of the situation derived from the complaint text, follow-up, and manufacturer narrative."),
    ResponseSchema(name="Harm/Potential Harm", description="A paragraph explaining the potential adverse outcomes or harm caused by the identified hazard in the given context."),
    ResponseSchema(name="Manufacturer Name", description="A list of unique manufacturer names mentioned in the provided data.")
]


In [75]:
response_schemas = [
    ResponseSchema(
        name="MaudeRiskList",
        description="A list of all failure mode and hazard information extracted from the maude data",
        type="array",
        items={"type": "object", "properties": independent_schema},
    )
]

In [76]:
# Create an output parser
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
format_instructions = output_parser.get_format_instructions()


In [78]:
# Define the prompt template
prompt_one = PromptTemplate(
    template= prompt_info + "\n{format_instructions}\n\nContext: {paragraph}",
    input_variables=["paragraph"],
    partial_variables={"format_instructions": format_instructions},
)

prompt_two = PromptTemplate(
    template= prompt_info_new + "\n{format_instructions}\n\nContext: {paragraph}",
    input_variables=["paragraph"],
    partial_variables={"format_instructions": format_instructions},
)

##### LLM Set up 

In [85]:
llama_vers_llm = ChatGroq(
    api_key = os.getenv('GROQ_API_KEY'),
    model_name="llama-3.3-70b-versatile",
    temperature=0
)

llama_spectdoc_llm = ChatGroq(
    api_key = os.getenv('GROQ_API_KEY'),
    model_name="llama-3.3-70b-specdec",
    temperature=0
)

llama_8192_llm = ChatGroq(
    api_key = os.getenv('GROQ_API_KEY'),
    model_name="llama3-70b-8192",
    temperature=0
)

mixtral_8x7b_llm = ChatGroq(
    api_key = os.getenv('GROQ_API_KEY'),
    model_name="mixtral-8x7b-32768",
    temperature=0
)

gemma_llm = ChatGroq(
    api_key = os.getenv('GROQ_API_KEY'),
    model_name="gemma2-9b-it", 
    temperature=0
)

gemini_model=Gemini(
        api_key = os.getenv('GEMINI_API_KEY'),
        id="gemini-2.0-flash-exp"
    )


#### Before LLM call verify the data

In [20]:
maude_hazard_extraction = maude_subset.copy()

In [25]:
display(maude_hazard_extraction[['event_info']].head(2))

Unnamed: 0,event_info
0,"product_problems - ['Battery Problem', 'Power..."
1,product_problems - ['Excess Flow or Over-Infu...


In [39]:
maude_hazard_extraction.shape

(10, 5)

In [36]:
eventinfo_all = ''
for _, row in maude_hazard_extraction.iterrows():
    eventinfo_all += row['event_info']

In [52]:
maude_hazard_extraction

Unnamed: 0,product_problems,complaint_txt,follow_up,manufacturer_narrative,event_info
0,"['Battery Problem', 'Power Problem']",INFORMATION RECEIVED BY MEDTRONIC INDICATED TH...,,(B)(4). CURRENTLY IT IS UNKNOWN WHETHER OR NOT...,"product_problems - ['Battery Problem', 'Power..."
1,"['Excess Flow or Over-Infusion', 'Battery Prob...",INFORMATION RECEIVED BY MEDTRONIC INDICATED TH...,,CURRENTLY IT IS UNKNOWN WHETHER OR NOT THE DEV...,product_problems - ['Excess Flow or Over-Infu...
2,"['Failure to Charge', 'Battery Problem']",FOLLOWING THE BATTERY PERFORMANCE ALERT (BPA) ...,,THE RESULTS/METHOD AND CONCLUSION CODES ALONG ...,"product_problems - ['Failure to Charge', 'Bat..."
3,['Battery Problem'],IT WAS REPORTED THAT THE PUMP BATTERY WAS DEPL...,,NO PRODUCT WAS RETURNED FOR EVALUATION. SHOULD...,"product_problems - ['Battery Problem'], compl..."
4,['Battery Problem'],IT WAS REPORTED THAT THIS DEVICE TRIPPED ERI O...,,THE DEVICE WAS NOT RETURNED FOR ANALYSIS. THE ...,"product_problems - ['Battery Problem'], compl..."
5,"['No Display/Image', 'Battery Problem', 'Devic...",INFORMATION RECEIVED BY MEDTRONIC INDICATED TH...,,"UPDATED H9: 2032227-060322-002-C MEDTRONIC, IN...","product_problems - ['No Display/Image', 'Batt..."
6,['Battery Problem'],FOLLOWING THE BATTERY PERFORMANCE ALERT (BPA) ...,,THE DEVICE IS INCLUDED IN THE BATTERY PERFORMA...,"product_problems - ['Battery Problem'], compl..."
7,"['Battery Problem', 'Insufficient Information']",ADDITIONAL INFORMATION INDICATES THE IPG WAS E...,IT WAS REPORTED THE IPG HAS REACHED END OF SER...,THE RESULTS OF THE INVESTIGATION ARE INCONCLUS...,"product_problems - ['Battery Problem', 'Insuf..."
8,['Battery Problem'],INFORMATION WAS RECEIVED FROM A CONSUMER (CON)...,ADDITIONAL INFORMATION WAS RECEIVED FROM THE P...,"IF INFORMATION IS PROVIDED IN THE FUTURE, A SU...","product_problems - ['Battery Problem'], compl..."
9,"['Circuit Failure', 'Battery Problem', 'Materi...",INFORMATION RECEIVED BY MEDTRONIC INDICATED TH...,,(B)(4). CURRENTLY IT IS UNKNOWN WHETHER OR NOT...,"product_problems - ['Circuit Failure', 'Batte..."


#### Extract the information

In [80]:
from langchain.schema import BaseOutputParser, AIMessage 

In [81]:
class InfoListParser(BaseOutputParser):
    def parse(self, text: dict) -> dict:
        return text

In [82]:
### Chaining with LCEL
parser=StrOutputParser()

chain= prompt_one | llama_vers_llm | parser

result = chain.invoke({"paragraph": eventinfo_all})

parser = InfoListParser()

parsed_output = parser.parse(output_parser.parse(result))


In [83]:
result

'```json\n{\n    "MaudeRiskList": [\n        {\n            "Hazard": "Battery Depletion",\n            "Hazardous Situation": "The insulin pump alarmed with a power system error, which did not prevent the pump from running. The customer experienced a power error for 3 days and was able to clear the alarm. The device was returned for analysis, and the investigation revealed a battery issue due to connector resistance on the electrical board.",\n            "Harm/Potential Harm": "The potential harm associated with this issue is that the insulin pump may not function properly, leading to incorrect insulin delivery, which can cause serious health complications, including hypoglycemia or hyperglycemia. If the device fails to deliver the correct amount of insulin, it can lead to serious health consequences, including seizures, coma, or even death.",\n            "Manufacturer Name": ["Medtronic"]\n        },\n        {\n            "Hazard": "Over-Infusion",\n            "Hazardous Situati

In [84]:
parsed_output

{'MaudeRiskList': [{'Hazard': 'Battery Depletion',
   'Hazardous Situation': 'The insulin pump alarmed with a power system error, which did not prevent the pump from running. The customer experienced a power error for 3 days and was able to clear the alarm. The device was returned for analysis, and the investigation revealed a battery issue due to connector resistance on the electrical board.',
   'Harm/Potential Harm': 'The potential harm associated with this issue is that the insulin pump may not function properly, leading to incorrect insulin delivery, which can cause serious health complications, including hypoglycemia or hyperglycemia. If the device fails to deliver the correct amount of insulin, it can lead to serious health consequences, including seizures, coma, or even death.',
   'Manufacturer Name': ['Medtronic']},
  {'Hazard': 'Over-Infusion',
   'Hazardous Situation': 'The insulin pump and reservoir had an over-delivery issue, but no harm requiring medical intervention was

#### Gemini LLM 

In [95]:
from phi.agent import Agent

In [96]:
maude_agent = Agent(
    model=Gemini(
        api_key = os.environ['GEMINI_API_KEY'],
        id="gemini-2.0-flash-exp"
    ),
    tools=[DuckDuckGo()],
    markdown=True
)

In [103]:
eventinfo_combined = f"""
### Event Information:
{eventinfo_all}

### Result Information:
{result}
"""
prompt_info_new = f"""
You are a highly skilled medical device safety expert with extensive knowledge in risk assessment and failure analysis. Analyze the provided data and structure your response as follows:

### 1. Hazard
- Identify the unique hazard using a single word or a concise phrase.
- Avoid full sentences and ensure it is a precise descriptor.

### 2. Hazardous Situation
- Provide a detailed description of the situation, including context from complaint text, follow-up, and manufacturer narrative.
- Focus on capturing the specific conditions leading to the hazard.

### 3. Harm/Potential Harm
- Write a paragraph explaining the potential harm or adverse outcomes that could result from the identified hazard in the given context.
- Be thorough in describing the potential risks and their impacts.

### 4. Manufacturer Name
- Extract and list all unique manufacturer names mentioned in the data.
- Ensure the list includes only distinct names with no duplicates.

### Provided Data:
{eventinfo_combined}
"""

In [104]:
response = maude_agent.run(prompt_info_new)

In [106]:
response.content

'### 1. Hazard\n- Battery Depletion\n- Over-Infusion\n- Failure to Charge\n- Low Battery\n- Premature Battery Depletion\n- Battery Issue\n- Circuit Failure\n\n### 2. Hazardous Situation\n\n- **Battery Depletion:** An insulin pump exhibited power system errors, with the user experiencing persistent errors for three days. Though the pump continued to function, analysis revealed a battery issue stemming from connector resistance on the electrical board, causing premature battery depletion.\n\n- **Over-Infusion:** An insulin pump and its reservoir experienced an over-delivery of insulin. No medical intervention was required, however, the device was not returned for analysis, leaving the cause undetermined.\n\n- **Failure to Charge:** A device exhibited battery performance issues, resulting in a clinician receiving a battery performance alert. The device was explanted and replaced. Further analysis showed the capacitor exceeded the charged time and the issue was caused by premature battery 