#### Importing Libraries

In [1]:
import numpy as np 
import pandas as pd

In [2]:
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from langchain.schema import BaseOutputParser, AIMessage  
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
from langchain_groq import ChatGroq
from langchain.vectorstores import FAISS
from langchain.docstore.document import Document
from langchain.embeddings import OpenAIEmbeddings
from langchain.chains import LLMChain
from ollama import chat
from langchain.chains import RetrievalQA
import json
from datetime import datetime
from langchain_core.runnables import RunnableSequence
from langchain.llms import OpenAI

##### Maude Data Processing

In [3]:
### Load the Maude data for battery problem with LWP
maude_data = pd.read_excel('H:\Interview Preparation\Coding\Agentic AI\Tryouts\Maude DB Analysis\Maude.xlsx')
maude_data.describe()
maude_data.drop('Web Address', axis=1, inplace=True)
maude_data.drop('Exemption Number', axis=1, inplace=True)
maude_data.info()
maude_data.head()

  maude_data = pd.read_excel('H:\Interview Preparation\Coding\Agentic AI\Tryouts\Maude DB Analysis\Maude.xlsx')


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45 entries, 0 to 44
Data columns (total 12 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   Report Number     45 non-null     object        
 1   Event Date        45 non-null     datetime64[ns]
 2   Event Type        45 non-null     object        
 3   Manufacturer      45 non-null     object        
 4   Date Received     45 non-null     datetime64[ns]
 5   Product Code      45 non-null     object        
 6    Brand Name       45 non-null     object        
 7    Device Problem   45 non-null     object        
 8   Patient Problem   45 non-null     object        
 9   PMA/PMN Number    39 non-null     object        
 10  Number of Events  45 non-null     int64         
 11  Event Text        45 non-null     object        
dtypes: datetime64[ns](2), int64(1), object(9)
memory usage: 4.3+ KB


Unnamed: 0,Report Number,Event Date,Event Type,Manufacturer,Date Received,Product Code,Brand Name,Device Problem,Patient Problem,PMA/PMN Number,Number of Events,Event Text
0,2124215-2024-75434,2024-11-05 05:00:00,Injury,BOSTON SCIENTIFIC CORPORATION,2024-11-29 05:00:00,LWP,INCEPTA ICD,Premature Discharge of Battery; Battery Problem,"No Clinical Signs, Symptoms or Conditions",P960040,1,Event Description: IT WAS REPORTED THAT THIS I...
1,2124215-2024-75483,2023-09-08 04:00:00,Injury,BOSTON SCIENTIFIC CORPORATION,2024-11-29 05:00:00,LWP,ACCOLADE MRI EL DR,Premature Discharge of Battery; Battery Problem,"No Clinical Signs, Symptoms or Conditions",P150012/S000,1,Event Description: IT WAS REPORTED THAT THE BA...
2,2124215-2024-75490,2024-10-25 04:00:00,Injury,BOSTON SCIENTIFIC CORPORATION,2024-11-29 05:00:00,LWP,ENERGEN CRT-D,Premature Discharge of Battery; Delayed Charg...,Loss of consciousness; Syncope/Fainting,P010012,1,Event Description: IT WAS REPORTED THAT THIS C...
3,2124215-2024-75234,2024-11-08 05:00:00,Malfunction,BOSTON SCIENTIFIC CORPORATION,2024-11-28 05:00:00,LWP,PUNCTUA ICD,High impedance; Off-Label Use; Battery Problem,"No Clinical Signs, Symptoms or Conditions",P960040,1,Event Description: IT WAS REPORTED THAT ASSIST...
4,2124215-2024-75371,2024-11-22 05:00:00,Injury,BOSTON SCIENTIFIC CORPORATION,2024-11-28 05:00:00,LWP,ACCOLADE MRI DR,Signal Artifact/Noise; Low impedance; Battery...,"No Clinical Signs, Symptoms or Conditions",P150012,1,Event Description: IT WAS REPORTED THAT THIS R...


In [4]:
# Normalize dates to a standard format (ISO 8601: YYYY-MM-DD)
maude_data ['Event Date'] = pd.to_datetime(maude_data ['Event Date']).dt.strftime('%Y-%m-%d')
maude_data ['Date Received'] = pd.to_datetime(maude_data ['Date Received']).dt.strftime('%Y-%m-%d')

In [5]:
maude_data.head(2)

Unnamed: 0,Report Number,Event Date,Event Type,Manufacturer,Date Received,Product Code,Brand Name,Device Problem,Patient Problem,PMA/PMN Number,Number of Events,Event Text
0,2124215-2024-75434,2024-11-05,Injury,BOSTON SCIENTIFIC CORPORATION,2024-11-29,LWP,INCEPTA ICD,Premature Discharge of Battery; Battery Problem,"No Clinical Signs, Symptoms or Conditions",P960040,1,Event Description: IT WAS REPORTED THAT THIS I...
1,2124215-2024-75483,2023-09-08,Injury,BOSTON SCIENTIFIC CORPORATION,2024-11-29,LWP,ACCOLADE MRI EL DR,Premature Discharge of Battery; Battery Problem,"No Clinical Signs, Symptoms or Conditions",P150012/S000,1,Event Description: IT WAS REPORTED THAT THE BA...


In [9]:
def create_json_summary(row):
    return json.dumps({
        "Report Number": row['Report Number'],
        "Event Type": row['Event Type'],
        "Manufacturer": row['Manufacturer'],
        "Product Code": row['Product Code'],
        "Brand Name": row[' Brand Name'],
        "Device Problem": row[' Device Problem'],
        "Patient Problem": row['Patient Problem'],
        "PMA/PMN Number": row['PMA/PMN Number'],
        "Number of Events": row['Number of Events'],
        "Event Text": row['Event Text'],
        "Event Date": row['Event Date'],
        "Date Received": row['Date Received']
    })

In [10]:
maude_data.columns

Index(['Report Number', 'Event Date', 'Event Type', 'Manufacturer',
       'Date Received', 'Product Code', ' Brand Name', ' Device Problem',
       'Patient Problem', 'PMA/PMN Number', 'Number of Events', 'Event Text'],
      dtype='object')

In [11]:
maude_data['summary'] = maude_data.apply(create_json_summary, axis=1)

In [12]:
maude_data.head(2)

Unnamed: 0,Report Number,Event Date,Event Type,Manufacturer,Date Received,Product Code,Brand Name,Device Problem,Patient Problem,PMA/PMN Number,Number of Events,Event Text,summary
0,2124215-2024-75434,2024-11-05,Injury,BOSTON SCIENTIFIC CORPORATION,2024-11-29,LWP,INCEPTA ICD,Premature Discharge of Battery; Battery Problem,"No Clinical Signs, Symptoms or Conditions",P960040,1,Event Description: IT WAS REPORTED THAT THIS I...,"{""Report Number"": ""2124215-2024-75434"", ""Event..."
1,2124215-2024-75483,2023-09-08,Injury,BOSTON SCIENTIFIC CORPORATION,2024-11-29,LWP,ACCOLADE MRI EL DR,Premature Discharge of Battery; Battery Problem,"No Clinical Signs, Symptoms or Conditions",P150012/S000,1,Event Description: IT WAS REPORTED THAT THE BA...,"{""Report Number"": ""2124215-2024-75483"", ""Event..."


In [13]:
maude_data['summary'][0]

'{"Report Number": "2124215-2024-75434", "Event Type": "Injury", "Manufacturer": "BOSTON SCIENTIFIC CORPORATION", "Product Code": "LWP", "Brand Name": "INCEPTA ICD", "Device Problem": " Premature Discharge of Battery; Battery Problem", "Patient Problem": " No Clinical Signs, Symptoms or Conditions", "PMA/PMN Number": "P960040", "Number of Events": 1, "Event Text": "Event Description: IT WAS REPORTED THAT THIS IMPLANTABLE CARDIOVERTER DEFIBRILLATOR (ICD) EXHIBITED PREMATURE BATTERY DEPLETION (PBD). THIS DEVICE WAS EXPLANTED AND REPLACED. NO ADDITIONAL ADVERSE PATIENT EFFECTS WERE REPORTED.", "Event Date": "2024-11-05", "Date Received": "2024-11-29"}'

### Maude Data - Prepare for LLM Input - Vector Store

In [14]:
# Create documents with metadata from the dataframe
documents = [
    Document(
        page_content=row['summary'],  # Store the JSON summary as content
        metadata={
            "Report Number": row['Report Number'],
            "Event Type": row['Event Type'],
            "Manufacturer": row['Manufacturer']
        }
    )
    for _, row in maude_data.iterrows()
]

embeddings = OpenAIEmbeddings()
# Store the documents in a FAISS vector store
vectorstore = FAISS.from_documents(documents, embeddings)

# Save the FAISS index locally
vectorstore.save_local("faiss_index")

  embeddings = OpenAIEmbeddings()


In [15]:
llm = ChatGroq(
    model_name="llama-3.3-70b-versatile",
    temperature=0.7
)
#llm = OpenAI(model="gpt-4")

### RAG Implementation

In [15]:
# # Create Output Schema for LLM response
# independent_schema = [
#     ResponseSchema(name="FailureMode", description="The failure mode available in the maude data"),
#     ResponseSchema(name="PotentialCause", description="The potential cause available in the maude data"),
#     ResponseSchema(name="Hazard", description="The hazardavailable in the maude data"),
# ]

# response_schemas = [
#     ResponseSchema(
#         name="MaudeList",
#         description="A list of all failure mode details from maude data and its related details",
#         type="array",
#         items={"type": "object", "properties": independent_schema},
#     )
# ]

In [16]:
# # Create an output parser
# output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
# format_instructions = output_parser.get_format_instructions()

In [17]:
# # Creating prompt template
# prompt_template = PromptTemplate(
#     template=(
#         "You are given the following retrieved context:\n\n"
#         "{context}\n\n"
#         "Based on this, provide a structured response with the following fields:\n\n"
#         "{format_instructions}\n\n"
#         "Make sure to follow the schema strictly."
#     ),
#     input_variables=["context"],
#     partial_variables={"format_instructions": format_instructions},
# )

In [18]:
# # Setup the retriever to narrow the context
# retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5})

In [19]:
# rag_chain = RetrievalQA.from_chain_type(llm=llm, chain_type='stuff', retriever=retriever, verbose = False, return_source_documents = True, chain_type_kwargs={'prompt':prompt_template})

In [20]:
# # Step 6: Query the system with structured output
# query = "What are the differnt failre mode available in the data and also shae the potential cause , hazard happen due to that of the failure in a given response format ?"
# result = rag_chain.invoke(query)

In [21]:
# raw_result = result['result']

In [22]:
# # Step 2: Clean the result to remove ```json tags
# cleaned_result = raw_result.strip("```json").strip()

# # Step 3: Parse the cleaned JSON into a Python dictionary
# parsed_result = json.loads(cleaned_result)

# # Step 4: Access specific fields in the parsed result
# maude_list = parsed_result.get("MaudeList", [])

# # Print the parsed result
# print(json.dumps(maude_list, indent=4))

In [23]:
# prompt = PromptTemplate(
#     template="Extract the information from the given context.\n{format_instructions}\n\nContext: {paragraph}",
#     input_variables=["paragraph"],
#     partial_variables={"format_instructions": format_instructions},
# )

In [24]:
# chain = RunnableSequence(prompt | llm)

In [25]:
# result = chain.invoke({"paragraph": retriever})

#### Simple Prompting without RAG - Structured Output

In [16]:
short_maude_data = maude_data[:15]

In [17]:
short_maude_data.shape

(15, 13)

In [18]:
summary_all = ''
for _, row in short_maude_data.iterrows():
    summary_all += row['summary']
    #print(row['summary'])

In [19]:
summary_all

'{"Report Number": "2124215-2024-75434", "Event Type": "Injury", "Manufacturer": "BOSTON SCIENTIFIC CORPORATION", "Product Code": "LWP", "Brand Name": "INCEPTA ICD", "Device Problem": " Premature Discharge of Battery; Battery Problem", "Patient Problem": " No Clinical Signs, Symptoms or Conditions", "PMA/PMN Number": "P960040", "Number of Events": 1, "Event Text": "Event Description: IT WAS REPORTED THAT THIS IMPLANTABLE CARDIOVERTER DEFIBRILLATOR (ICD) EXHIBITED PREMATURE BATTERY DEPLETION (PBD). THIS DEVICE WAS EXPLANTED AND REPLACED. NO ADDITIONAL ADVERSE PATIENT EFFECTS WERE REPORTED.", "Event Date": "2024-11-05", "Date Received": "2024-11-29"}{"Report Number": "2124215-2024-75483", "Event Type": "Injury", "Manufacturer": "BOSTON SCIENTIFIC CORPORATION", "Product Code": "LWP", "Brand Name": "ACCOLADE MRI EL DR", "Device Problem": " Premature Discharge of Battery; Battery Problem", "Patient Problem": " No Clinical Signs, Symptoms or Conditions", "PMA/PMN Number": "P150012/S000", "Nu

In [22]:
# Create Output Schema for LLM response
independent_schema = [
    ResponseSchema(name="FailureMode", description="The failure mode available in the maude data"),
    ResponseSchema(name="PotentialCause", description="The potential cause available due to the given failure mode"),
    ResponseSchema(name="Hazard", description="The hazard available due to the given failure mode"),
    ResponseSchema(name="HazardousSituation", description="The hazardous situation available due to the given failure mode"), 
    ResponseSchema(name="Harm", description="The harm available due to the given failure mode")
]

response_schemas = [
    ResponseSchema(
        name="MaudeList",
        description="A list of all failure mode details from maude data and its related details",
        type="array",
        items={"type": "object", "properties": independent_schema},
    )
]

# Parse the example output
# class PetListParser(BaseOutputParser):
#     def parse(self, text: dict) -> dict:
#         return text  # Just returning the text as structured data

# Parse the example output
class PersonListParser(BaseOutputParser):
    def parse(self, text: dict) -> dict:
        return text  # Returning the adjusted structured data

# Create an output parser
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
format_instructions = output_parser.get_format_instructions()

# Define the prompt template
prompt = PromptTemplate(
    template="Extract the information from the given context.What are the differnt unique failure mode available in the data and also share the potential cause , hazard, actual hazardous situation and harm happen due to that of the failure in a given response format.\n{format_instructions}\n\nContext: {paragraph}",
    input_variables=["paragraph"],
    partial_variables={"format_instructions": format_instructions},
)

# Example paragraph
paragraph = summary_all


# Combine the prompt and LLM into a chain
chain = LLMChain(prompt=prompt, llm=llm)

# Run the chain with the paragraph
result = chain.run({"paragraph": paragraph})

# Parse the output using the output parser
parser = PersonListParser()
parsed_output = parser.parse(output_parser.parse(result))


print(parsed_output)


{'MaudeList': [{'Failure Mode': 'Premature Discharge of Battery', 'Potential Cause': 'Battery Problem', 'Hazard': 'Loss of device function', 'Actual Hazardous Situation': 'Device malfunction', 'Harm': 'No adverse patient effects reported'}, {'Failure Mode': 'Premature Discharge of Battery', 'Potential Cause': 'Battery Problem', 'Hazard': 'Loss of device function', 'Actual Hazardous Situation': 'Device malfunction', 'Harm': 'No adverse patient effects reported'}, {'Failure Mode': 'Premature Discharge of Battery; Delayed Charge Time', 'Potential Cause': 'Battery Problem', 'Hazard': 'Loss of device function', 'Actual Hazardous Situation': 'Device malfunction', 'Harm': 'Patient experienced two syncopal episodes'}, {'Failure Mode': 'High impedance; Off-Label Use', 'Potential Cause': 'Battery Problem', 'Hazard': 'Loss of device function', 'Actual Hazardous Situation': 'Device malfunction', 'Harm': 'No adverse patient effects reported'}, {'Failure Mode': 'Signal Artifact/Noise; Low impedance;

In [21]:
maude_list = parsed_output.get("MaudeList", [])

# Print the parsed result
print(json.dumps(maude_list, indent=4))

[
    {
        "Failure Mode": "Premature Discharge of Battery",
        "Potential Cause": "Battery Problem",
        "Hazard": "Loss of device functionality",
        "Hazardous Situation": "Patient may experience loss of consciousness or syncope",
        "Harm": "Injury or death"
    },
    {
        "Failure Mode": "Delayed Charge Time",
        "Potential Cause": "Battery Problem",
        "Hazard": "Device malfunction",
        "Hazardous Situation": "Patient may experience loss of consciousness or syncope",
        "Harm": "Injury or death"
    },
    {
        "Failure Mode": "High Impedance",
        "Potential Cause": "Device malfunction",
        "Hazard": "Inappropriate therapy",
        "Hazardous Situation": "Patient may experience inappropriate shocks or pacing",
        "Harm": "Injury or death"
    },
    {
        "Failure Mode": "Low Impedance",
        "Potential Cause": "Device malfunction",
        "Hazard": "Inappropriate therapy",
        "Hazardous Situation"

#### Simple Prompting with RAG - Structured Output