In [1]:
import os

from llama_index.llms.openai import OpenAI
from llama_index.core.schema import MetadataMode
import openai
from openai import OpenAI as OpenAIOG
import logging
import sys
llm = OpenAI(temperature=0.0, model="gpt-3.5-turbo")
client = OpenAIOG()

from langdetect import detect
from langdetect import DetectorFactory
DetectorFactory.seed = 0
from deep_translator import GoogleTranslator

# Load index
from llama_index.core import VectorStoreIndex
from llama_index.core import StorageContext
from llama_index.core import load_index_from_storage
storage_context = StorageContext.from_defaults(persist_dir="arv_metadata")
index = load_index_from_storage(storage_context)
query_engine = index.as_query_engine(similarity_top_k=3, llm=llm)

import gradio as gr

Current app

In [2]:
def nishauri(question: str, conversation_history: list[str]):
    
    context = " ".join([item["user"] + " " + item["chatbot"] for item in conversation_history])

    lang_question = detect(question)
    
    if lang_question=="sw":
        question = GoogleTranslator(source='sw', target='en').translate(question)
        
    response = query_engine.query(question)

    background = ("The person who asked the question is a person living with HIV."
                  " If the person says sasa or niaje, that is swahili slang for hello."
            " Recognize that they already have HIV and do not suggest that they have to get tested"
            " for HIV or take post-exposure prophylaxis, as that is not relevant, though their partners perhaps should."  
            " Do not suggest anything that is not relevant to someone who already has HIV."
             " Do not mention in the response that the person is living with HIV."
            " The following information about viral loads is authoritative for any question about viral loads:"
           " Under 50 copies/ml is low detectable level,"
           " 50 - 199 copies/ml is low level viremia, 200 - 999 is high level viremia, and "
           " 1000 and above is suspected treatment failure." 
           " A high viral load or non-suppressed viral load is any viral load above 200 copies/ml."
           " A suppressed viral load is one below 200 copies / ml.")

    question_final = (
        f"The user previously asked and answered the following: {context}"
        f" The user just asked the following question: {question}"
        f" The following response was generated in response: {response}"
        f" Please update the response provided only if needed, based on the following background information {background}"
    )

    completion = client.chat.completions.create(
      model="gpt-3.5-turbo",
        messages=[
        {"role": "user", "content": question_final}
      ]
    )

    reply_to_user = completion.choices[0].message.content
    
    if lang_question=="sw":
        reply_to_user = GoogleTranslator(source='auto', target='sw').translate(reply_to_user)
    
    conversation_history.append({"user": question, "chatbot": response.response})   

    return reply_to_user, conversation_history

demo = gr.Interface(
    title = "Nishauri Chatbot Demo",
    fn=nishauri,
    inputs=["text", gr.State(value=[])],
    outputs=["text", gr.State()],
)

demo.launch()

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




IMPORTANT: You are using gradio version 4.19.2, however version 4.29.0 is available, please upgrade.
--------


Retrying llama_index.embeddings.openai.base.get_embedding in 0.5778727161644901 seconds as it raised APIConnectionError: Connection error..
Traceback (most recent call last):
  File "C:\Users\jonathan.friedman\OneDrive - Palladium International, LLC\Documents\Data.FI\HOP_CLM\llama10\lib\site-packages\httpx\_transports\default.py", line 66, in map_httpcore_exceptions
    yield
  File "C:\Users\jonathan.friedman\OneDrive - Palladium International, LLC\Documents\Data.FI\HOP_CLM\llama10\lib\site-packages\httpx\_transports\default.py", line 228, in handle_request
    resp = self._pool.handle_request(req)
  File "C:\Users\jonathan.friedman\OneDrive - Palladium International, LLC\Documents\Data.FI\HOP_CLM\llama10\lib\site-packages\httpcore\_sync\connection_pool.py", line 216, in handle_request
    raise exc from None
  File "C:\Users\jonathan.friedman\OneDrive - Palladium International, LLC\Documents\Data.FI\HOP_CLM\llama10\lib\site-packages\httpcore\_sync\connection_pool.py", line 196, in ha

Set up DB

In [113]:
from sqlalchemy import (
    create_engine,
    MetaData,
    Table,
    Column,
    String,
    Integer,
    Date,
    select,
    column,
    insert,
    text
)

import pandas as pd

In [4]:
patient_file = 'patient_information.csv'
df = pd.read_csv(patient_file)

In [8]:
df = df.drop(['patient_id', 'gender', 'date_of_birth'], axis = 1)
df.columns

Index(['ccc_no', 'visit_date', 'visit_type', 'regimen', 'viral_load'], dtype='object')

In [10]:
df['visit_date'] = pd.to_datetime(df['visit_date'], format='%d/%m/%Y')
df

Unnamed: 0,ccc_no,visit_date,visit_type,regimen,viral_load
0,1234567891,2024-03-28,FOLLOWUP,TDF+3TC+NVP,150
1,1287602614,2024-05-06,FOLLOWUP,TDF/3TC/DTG,< LDL copies/ml
2,1287601459,2024-04-16,PHARMACY_REFILL,TDF/3TC/DTG,150
3,1287602420,2024-03-12,PHARMACY_REFILL,TDF/3TC/DTG,150
4,1287604453,2024-04-18,FOLLOWUP,TDF/3TC/DTG,150
...,...,...,...,...,...
1995,15815000197,2023-10-18,CLINICAL,CF2G,100 cp/mL
1996,1613800567,2023-10-11,CLINICAL,AF2E,100 cp/mL
1997,1582807378,2023-10-17,CLINICAL,AF2E,100 cp/mL
1998,1607500140,2023-09-05,CLINICAL,CF5X,100 cp/mL


In [108]:
ccc_user = "1287600796"

Let's create the DB

In [46]:
engine = create_engine('sqlite:///nishauri.db')
with engine.connect() as connection:
    connection.execute(
        text("""
        CREATE TABLE IF NOT EXISTS nishauri (
            ccc_no TEXT,
            visit_date DATE,
            visit_type TEXT,
            regimen TEXT,
            viral_load TEXT
        )
        """)
    )

# Step 3: Insert data from the CSV into the database
df.to_sql('nishauri', con=engine, index=False, if_exists='append')

# Step 4: Close the connection
engine.dispose()

Refinement

In [60]:
def nishauri(question: str, conversation_history: list[str]):
    
    context = " ".join([item["user"] + " " + item["chatbot"] for item in conversation_history])

    # Get patient info from DB
    engine = create_engine('sqlite:///nishauri.db')
    
    with engine.connect() as connection:
        # Select data using a parameterized query
        result = connection.execute(
            text("SELECT visit_date, visit_type, regimen, viral_load FROM nishauri WHERE ccc_no = :ccc_no"),
            {"ccc_no": ccc_user}
        )
    
    # Fetch and print results
    row = result.fetchall()

    last_appt = row[0][0]
    appt_purpose = row[0][1]
    regimen = row[0][2]
    vl_result = row[0][3]
    
    
    # Detect language of question - if Swahili, translate to English
    # only do this if there are at least 5 words in the text, otherwise lang detection is unreliable
    
    # Split the string into words
    words = question.split()

    # Count the number of words
    num_words = len(words)

    lang_question = "en"
    
    if num_words > 4:
        lang_question = detect(question)
    
#     lang_question = detect(question)

    if lang_question=="sw":
        question = GoogleTranslator(source='sw', target='en').translate(question)
    
    context_initial = ("Here is context for the user's question:"
                       f" The person's last appointment was on {last_appt} and the purpose was {appt_purpose}. "
                       f" The person is on the following regimen for HIV {regimen}. "
                       f" The person's most recent viral load result was {vl_result}. "
                       "Here is the user's question: ")
                    
    
    response = query_engine.query(question)

    print(response)
    
    # https://docs.llamaindex.ai/en/stable/examples/prompts/prompts_rag/
    # based on "refine" template
    
    # Refine the answer if needed
    
    background = ("The person who asked the question is a person living with HIV."
                  " If the person says sasa or niaje, that is swahili slang for hello."
            " Recognize that they already have HIV and do not suggest that they have to get tested"
            " for HIV or take post-exposure prophylaxis, as that is not relevant, though their partners perhaps should."  
            " Do not suggest anything that is not relevant to someone who already has HIV."
            " Do not mention in the response that the person is living with HIV."
#             f" The person's last appointment was on {last_appt} and the purpose was {appt_purpose}. "
#             f" The person is on the following regimen for HIV {regimen}. "
#             f" The person's most recent viral load result was {vl_result}. "
            " The following information about viral loads is authoritative for any question about viral loads:"
           " Under 50 copies/ml is low detectable level,"
           " 50 - 199 copies/ml is low level viremia, 200 - 999 is high level viremia, and "
           " 1000 and above is suspected treatment failure." 
           " A high viral load or non-suppressed viral load is any viral load above 200 copies/ml."
           " A suppressed viral load is one below 200 copies / ml.")

    question_final = (
        f" The user previously asked and answered the following: {context}. "
        f" The user just asked the following question: {question}."
        f" The following response was generated in response: {response}."
        f" Please update the response provided only if needed, based on the following context: {background}"
    )

#     print(question_final)
    
    completion = client.chat.completions.create(
      model="gpt-3.5-turbo",
        messages=[
        {"role": "user", "content": question_final}
      ]
    )

    reply_to_user = completion.choices[0].message.content
    
    
    # If initial question was in Swahili, translate response back to Swahili
    if lang_question=="sw":
        reply_to_user = GoogleTranslator(source='auto', target='sw').translate(reply_to_user)
    
    conversation_history.append({"user": question, "chatbot": reply_to_user})   

    return reply_to_user, conversation_history

demo = gr.Interface(
    title = "Nishauri Chatbot Demo",
    fn=nishauri,
    inputs=["text", gr.State(value=[])],
    outputs=["text", gr.State()],
)

demo.launch()

Running on local URL:  http://127.0.0.1:7875

To create a public link, set `share=True` in `launch()`.




IMPORTANT: You are using gradio version 4.19.2, however version 4.29.0 is available, please upgrade.
--------
sw
I'm here to help. What specific question or topic would you like assistance with?


Retrieval + Synthesizer

In [116]:
retriever = index.as_retriever(k = 3)

In [135]:
def nishauri(question: str, ccc_user: str, conversation_history: list[str]):
    
    context = " ".join([item["user"] + " " + item["chatbot"] for item in conversation_history])

    # Get patient info from DB
    engine = create_engine('sqlite:///nishauri.db')
    
    with engine.connect() as connection:
        # Select data using a parameterized query
        result = connection.execute(
            text("SELECT visit_date, visit_type, regimen, viral_load FROM nishauri WHERE ccc_no = :ccc_no"),
            {"ccc_no": ccc_user}
        )
    
    # Fetch and print results
    row = result.fetchall()

    last_appt = row[0][0]
    appt_purpose = row[0][1]
    regimen = row[0][2]
    vl_result = row[0][3]
    
    
    # Detect language of question - if Swahili, translate to English
    # only do this if there are at least 5 words in the text, otherwise lang detection is unreliable
    
    # Split the string into words
    words = question.split()

    # Count the number of words
    num_words = len(words)

    lang_question = "en"
    
    if num_words > 4:
        lang_question = detect(question)
    
#     lang_question = detect(question)

    if lang_question=="sw":
        question = GoogleTranslator(source='sw', target='en').translate(question)                   
    
    sources = retriever.retrieve(question)
    source0 = sources[0].text
    source1 = sources[1].text
    
    background = ("The person who asked the question is a person living with HIV."
                  " If the person says sasa or niaje, that is swahili slang for hello. Just say hello back and ask how you can help."
                  " Recognize that they already have HIV and do not suggest that they have to get tested"
                  " for HIV or take post-exposure prophylaxis, as that is not relevant, though their partners perhaps should."  
                  " Do not suggest anything that is not relevant to someone who already has HIV."
                  " Do not mention in the response that the person is living with HIV."
                  f" The person's next appointment is on {last_appt} and the purpose is {appt_purpose}. "
                  f" The person is on the following regimen for HIV: {regimen}. "
                  f" The person's most recent viral load result was {vl_result}. "
                  " The following information about viral loads is authoritative for any question about viral loads:"
                  " Under 50 copies/ml is low detectable level,"
                  " 50 - 199 copies/ml is low level viremia, 200 - 999 is high level viremia, and "
                  " 1000 and above is suspected treatment failure." 
                  " A high viral load or non-suppressed viral load is any viral load above 200 copies/ml."
                  " A suppressed viral load is one below 200 copies / ml."
                  " For questions about when patients should get their viral loads taken," 
                  " if they are newly initiated on ART, the first viral load sample should be taken after 3 months of"
                  " taking ART. Otherwise, if they are not new on ART, then if their previous result was below 50 to 199 cp/ml,"
                  " their viral load should be taken after every 12 months. If  their previous result was above 200cp/ml,"
                  " then viral load sample should be taken after three months.")

    question_final = (
        f" The user previously asked and answered the following: {context}. "
        f" The user just asked the following question: {question}."
        f" Please use the following content to generate a response: {source0} {source1}."
        f" The following background on the user should also inform the response as needed: {background}"
        " Keep answers brief and limited to the question that was asked."
        " Do not provide information the user did not ask about. If they start with a greeting, just greet them in return and don't share anything else."
    )
    
    completion = client.chat.completions.create(
      model="gpt-4-turbo",
        messages=[
        {"role": "user", "content": question_final}
      ]
    )

    reply_to_user = completion.choices[0].message.content
    
    
    # If initial question was in Swahili, translate response back to Swahili
    if lang_question=="sw":
        reply_to_user = GoogleTranslator(source='auto', target='sw').translate(reply_to_user)
    
    conversation_history.append({"user": question, "chatbot": reply_to_user})   

    return reply_to_user, conversation_history


demo = gr.Interface(
    title = "Nishauri Chatbot Demo",
    fn=nishauri,
    inputs=[gr.Textbox(label="question", placeholder="Type your question here..."),
            gr.Textbox(label="CCC", placeholder="Type your ccc here..."),
            gr.State(value = [])],
    outputs=["text", gr.State()],
)

demo.launch()

Running on local URL:  http://127.0.0.1:7893

To create a public link, set `share=True` in `launch()`.




IMPORTANT: You are using gradio version 4.19.2, however version 4.29.0 is available, please upgrade.
--------


In [136]:
pip list sqlalchemy

Package                                  VersionNote: you may need to restart the kernel to use updated packages.
---------------------------------------- -----------
accelerate                               0.27.2
aiofiles                                 23.2.1
aiohttp                                  3.9.3
aiosignal                                1.3.1
alembic                                  1.13.1
altair                                   5.2.0
annotated-types                          0.6.0
anyio                                    4.3.0
asgiref                                  3.7.2
asttokens                                2.4.1
async-timeout                            4.0.3
attrs                                    23.2.0
backoff                                  2.2.1
bcrypt                                   4.1.2
beautifulsoup4                           4.12.3
blinker                                  1.7.0
Bottleneck                               1.3.8
bs4                          

In [105]:
sources[2].text

IndexError: list index out of range