**ePartogram-GPT using Zephyr 7B Beta and Pytorch NLP**

**System Setup**

In [1]:
#install required packages
%%capture
!pip install -q transformers peft accelerate bitsandbytes safetensors sentencepiece streamlit chromadb langchain langchain-community langchain-core langchain-huggingface sentence-transformers gradio pypdf SpeechRecognition
!pip install haystack-ai transformers accelerate bitsandbytes sentence_transformers bitsandbytes

In [2]:
# import audio packages
%%capture
!apt-get update
!apt-get install libasound2-dev portaudio19-dev libportaudio2 libportaudiocpp0 ffmpeg
!pip install pyaudio
!pip install spacy
!python -m spacy download en_core_web_sm

In [3]:
# fixing unicode error in google colab
import locale
locale.getpreferredencoding = lambda: "UTF-8"

In [4]:
# import dependencies
import os
import gradio as gr
from google.colab import drive
import csv

import chromadb
#from langchain.llms import HuggingFacePipeline

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
from langchain_huggingface import HuggingFacePipeline
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain import HuggingFacePipeline
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory

In [5]:
# Prompt Engineering Packages
from haystack import Pipeline
from haystack.components.builders import PromptBuilder
from haystack.components.generators import HuggingFaceLocalGenerator

In [6]:
# Speech Recognition and Chat History Packages
import speech_recognition as sr
import re
import csv
from datetime import datetime, timedelta
# from pocketsphinx import LiveSpeech

**Define and Train Model**

In [7]:
def speech_to_text():
    recognizer = sr.Recognizer()
    with sr.Microphone() as source:
        print("Listening...")
        audio = recognizer.listen(source)

    try:
        text = recognizer.recognize_google(audio, language="en-US")
        return text
    except sr.UnknownValueError:
        return "Speech recognition could not understand audio"
    except sr.RequestError as e:
        return f"Could not request results from speech recognition service; {e}"

def process_audio(audio_file):
    if audio_file is not None:
        try:
            recognizer = sr.Recognizer()
            with sr.AudioFile(audio_file) as source:
                audio_data = recognizer.record(source)
            text = recognizer.recognize_google(audio_data)
            return text
        except Exception as e:
            return f"Error in speech recognition: {str(e)}"
    return "No audio input received"

In [8]:
# specify model huggingface mode name
model_name = "HuggingFaceH4/zephyr-7b-beta"

# function for loading 4-bit quantized model
def load_quantized_model(model_name: str):
    """
    :param model_name: Name or path of the model to be loaded.
    :return: Loaded quantized model.
    """
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16
    )

    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch.bfloat16,
        quantization_config=bnb_config
    )
    return model

In [9]:
# function for initializing tokenizer
def initialize_tokenizer(model_name: str):
    """
    Initialize the tokenizer with the specified model_name.

    :param model_name: Name or path of the model for tokenizer initialization.
    :return: Initialized tokenizer.
    """
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    tokenizer.bos_token_id = 1  # Set beginning of sentence token id
    return tokenizer

In [10]:
!pip install -U bitsandbytes
# load model
model = load_quantized_model(model_name)

# initialize tokenizer
tokenizer = initialize_tokenizer(model_name)

# specify stop token ids
stop_token_ids = [0]



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/638 [00:00<?, ?B/s]

CUDA is required but not available for bitsandbytes. Please consider installing the multi-platform enabled version of bitsandbytes, which is currently a work in progress. Please check currently supported platforms and installation instructions at https://huggingface.co/docs/bitsandbytes/main/en/installation#multi-backend


RuntimeError: CUDA is required but not available for bitsandbytes. Please consider installing the multi-platform enabled version of bitsandbytes, which is currently a work in progress. Please check currently supported platforms and installation instructions at https://huggingface.co/docs/bitsandbytes/main/en/installation#multi-backend

In [None]:
# mount google drive and specify folder path
drive.mount('/content/drive')
folder_path = '/content/drive/MyDrive/Colab Notebooks/My_PDFs_AIML'
!ls '/content/drive/MyDrive/Colab Notebooks/My_PDFs_AIML'

**Retrieval Augmented Generation (RAG)**

In [None]:
# load pdf files
loader = PyPDFDirectoryLoader(folder_path)
documents = loader.load()
print(documents)

# split the documents in small chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) #Change the chunk_size and chunk_overlap as needed
all_splits = text_splitter.split_documents(documents)

# specify embedding model (using huggingface sentence transformer)
embedding_model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {"device": "cuda"}
embeddings = HuggingFaceEmbeddings(model_name = embedding_model_name, model_kwargs = model_kwargs) #TO DO

#embed document chunks
vectordb = Chroma.from_documents(documents = all_splits, embedding = embeddings, persist_directory="chroma_db") #TO DO

# specify the retriever
retriever = vectordb.as_retriever()

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain.llms import HuggingFacePipeline

model_name = "HuggingFaceH4/zephyr-7b-beta"
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", load_in_4bit=True)
tokenizer = AutoTokenizer.from_pretrained(model_name)

text_generation_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    do_sample=True,
    top_k=5,
    num_return_sequences=1
)

# max_length = 2048

llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

**Prompt Engineering**

In [None]:
from langchain.prompts import PromptTemplate

prompt_template = """<|system|>
You are an AI assistant specialized in analyzing and interpreting medical documents. Provide accurate and helpful information based on the given context and question. Always prioritize patient safety and adhere to medical best practices in your responses.
</s>
<|user|>
Context: {context}
Question: {question}
</s>
<|assistant|>
"""

PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

In [None]:
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory

memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)

qa_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory,
    combine_docs_chain_kwargs={"prompt": PROMPT}
)

**NLP Processing of Chat Input and AI Output**

In [None]:
import spacy

# Load the English language model
nlp = spacy.load("en_core_web_sm")

def extract_relevant_info(text):
    # Process the text with spaCy
    doc = nlp(text)

    # Define the entities we want to extract
    entities = {
        'Companion': None,
        'Pain Relief': None,
        'Oral fluid': None,
        'Posture': None,
        'Baseline FHR': None,
        'FHR Decceleration': None,
        'Amniotic fluid': None,
        'Fetal position': None,
        'Caput': None,
        'Moulding': None,
        'Pulse': None,
        'Systolic BP': None,
        'Diastolic BP': None,
        'Temperature': None,
        'Urine': None,
        'Contractions per 10 min': None,
        'Duration of contractions': None,
        'Cervix': None,
        'Descent': None,
        'Medication': None,
        'Assessment': None,
        'Plan': None
    }

    extracted_info = {}
    for key, pattern in entities.items():
        match = re.search(pattern, text, re.IGNORECASE)
        extracted_info[key] = match.group(1).strip() if match else None

    return entities

In [None]:
def save_chat_to_csv(chat_history: list, filename: str = '/content/drive/MyDrive/digital_lcg.csv'):
    # Define parameters to LCG
    fields = ['Timestamp', 'Companion', 'Pain Relief', 'Oral fluid', 'Posture', 'Baseline FHR',
              'FHR Decceleration', 'Amniotic fluid', 'Fetal position', 'Caput', 'Moulding',
              'Pulse', 'Systolic BP', 'Diastolic BP', 'Temperature', 'Urine',
              'Contractions per 10 min', 'Duration of contractions', 'Cervix', 'Descent',
              'Medication', 'Assessment', 'Plan']

    # Read existing data from digital LCG
    try:
        with open(filename, 'r', newline='', encoding='utf-8') as file:
            existing_data = list(csv.DictReader(file))
    except FileNotFoundError:
        existing_data = []

    # Extract data from chat history
    new_entries = []
    for query, response in chat_history:
        info = extract_relevant_info(response)
        new_entry = {field: '' for field in fields}
        new_entry['Timestamp'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        new_entry.update(info)
        new_entries.append(new_entry)

    # Write all data back to the CSV
    with open(filename, 'w', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=fields)
        writer.writeheader()
        writer.writerows(existing_data + new_entries)

**UI Development**

In [None]:
def get_response(query: str, chat_history: list):
    result = qa_chain({"question": query, "chat_history": chat_history})
    return result["answer"]

In [None]:
def create_conversation(message, history):
    try:
        response = get_response(message, history)
        assistant_response = response.split("<|assistant|>")[-1].strip()
        formatted_response = f"Question: {message}\n\nHelpful Answer: {assistant_response}"
        history.append((message, formatted_response))

        save_to_csv_message = message

        # Segment the message into the WHO Labor Care Guide
        save_chat_to_csv(save_to_csv_message)
        return "", history
    except Exception as e:
        error_message = f"Question: {message}\n\nHelpful Answer: An error occurred: {str(e)}"
        history.append((message, error_message))
        return "", history

with gr.Blocks() as demo:
    gr.Markdown("ePartogram-GPT")

    chatbot = gr.Chatbot(label="Chat History")
    msg = gr.Textbox(label="Enter your question", placeholder="Type your question here...")
    audio_input = gr.Audio(sources=["microphone"], type="filepath", label="Or speak your question")
    clear = gr.ClearButton([msg, chatbot])

    msg.submit(create_conversation, inputs=[msg, chatbot], outputs=[msg, chatbot])
    msg.submit(lambda: "", inputs=[], outputs=[msg])
    audio_input.change(process_audio, inputs=[audio_input], outputs=[msg])

demo.launch()

**Test Cases**

**Test Query 1:** Mrs. A, Para 0+0 presents in labor at 39 weeks , uneventful ANC, Contractions 1/10 each 20 seconds, Pulse 88, BP 130/80, FHR 140/min, no decelerations, Head all up, Cervix mostly effaced, membranes intact, 4 cm dilatated. Assess the state of the patient.

In [None]:
# TEST QUERY 1
query = "Mrs. A, Para 0+0 presents in labor at 39 weeks , uneventful ANC, Contractions 1/10 each 20 seconds, Pulse 88, BP 130/80, FHR 140/min, no decelerations, Head all up, Cervix mostly effaced, membranes intact, 4 cm dilatated. Assess the state of the patient."
chat_history = []  # Initialize an empty chat history
response = get_response(query, chat_history)
print(response)

In [None]:
# Actual AI Result
response_filtered = response.split("<|assistant|>")[-1].strip()
print(response_filtered)

**Test Query 2:** Mrs. A, Para 0+0 presents in labor at 39 weeks , uneventful ANC, Contractions 1/10 each 20 seconds, Pulse 88, BP 130/80, FHR 140/min, no decelerations, Head all up, Cervix mostly effaced, membranes intact, 4 cm dilated. Provide a plan of care.

In [None]:
# TEST QUERY 2:
query = "Mrs. A, Para 0+0 presents in labor at 39 weeks , uneventful ANC, Contractions 1/10 each 20 seconds, Pulse 88, BP 130/80, FHR 140/min, no decelerations, Head all up, Cervix mostly effaced, membranes intact, 4 cm dilated. Provide a plan of care."
chat_history = []  # Initialize an empty chat history
response = get_response(query, chat_history)
print(response)

In [None]:
# Actual AI Result
response_filtered = response.split("<|assistant|>")[-1].strip()
print(response_filtered)

**Test Query 3:** Mrs. C, 20 years old Para 0+1, TOP at 12 weeks 2 years ago, iron deficiency anemia on treatment with iron. Last hemoglobin 10.5g. Presents at 39 weeks with labor pains for 4 hours, Examination  revealed active labor, ARM done, clear amniotic fluid, normal progress, Buscopan given, instructed her sister to walk with Mrs. C. Comment on plan

In [None]:
query = "Mrs. C, 20 years old Para 0+1, TOP at 12 weeks 2 years ago, iron deficiency anemia on treatment with iron. Last hemoglobin 10.5g. Presents at 39 weeks with labor pains for 4 hours, Examination  revealed active labor, ARM done, clear amniotic fluid, normal progress, Buscopan given, instructed her sister to walk with Mrs. C. Comment on plan"
chat_history = []  # Initialize an empty chat history
response = get_response(query, chat_history)
print(response)

In [None]:
# Actual AI Result
response_filtered = response.split("<|assistant|>")[-1].strip()
print(response_filtered)

**Test Query 4:** (Bony Obstruction) A 28-year-old woman presents to the labor and delivery unit at 41 weeks gestation with regular contractions. After 18 hours of labor, she has made minimal cervical progress despite adequate uterine contractions. The fetus remains high in the pelvis, and the cervix is 6 cm dilated. Assess state of patient.

In [None]:
query = "A 28-year-old woman presents to the labor and delivery unit at 41 weeks gestation with regular contractions. After 18 hours of labor, she has made minimal cervical progress despite adequate uterine contractions. The fetus remains high in the pelvis, and the cervix is 6 cm dilated. Assess state of patient."
chat_history = []  # Initialize an empty chat history
response = get_response(query, chat_history)
print(response)

In [None]:
# Actual AI Result
response_filtered = response.split("<|assistant|>")[-1].strip()
print(response_filtered)

Issue with Test Query 4: Oxytocin would actually make things worse, so we need to add specific RAG rules here

**Test Query 5:**

In [None]:
query = ""
chat_history = []  # Initialize an empty chat history
response = get_response(query, chat_history)
response_filtered = response.split("<|assistant|>")[-1].strip()
print(response_filtered)

In [None]:
query = ""
chat_history = []  # Initialize an empty chat history
response = get_response(query, chat_history)
response_filtered = response.split("<|assistant|>")[-1].strip()
print(response_filtered)

In [None]:
query = ""
chat_history = []  # Initialize an empty chat history
response = get_response(query, chat_history)
response_filtered = response.split("<|assistant|>")[-1].strip()
print(response_filtered)