In [None]:
import os

from llama_index.llms.openai import OpenAI
from llama_index.core.schema import MetadataMode
import openai
from openai import OpenAI as OpenAIOG
import logging
import sys
llm = OpenAI(temperature=0.0, model="gpt-3.5-turbo")
client = OpenAIOG()

from lingua import Language, LanguageDetectorBuilder
from langdetect import detect
from langdetect import DetectorFactory
DetectorFactory.seed = 0
from deep_translator import GoogleTranslator
from lingua import Language, LanguageDetectorBuilder

# Load index
from llama_index.core import VectorStoreIndex
from llama_index.core import StorageContext
from llama_index.core import load_index_from_storage
storage_context = StorageContext.from_defaults(persist_dir="arv_metadata")
index = load_index_from_storage(storage_context)
query_engine = index.as_query_engine(similarity_top_k=3, llm=llm)
retriever = index.as_retriever(similarity_top_k = 3)

import gradio as gr


In [None]:
import re

acknowledgment_keywords_sw = ["sawa", "ndiyo", "naam", "hakika", "asante", "nimeelewa", "nimekupata", "ni kweli",
                             "kwa hakika", "nimesikia"]
acknowledgment_keywords_en = ["thanks", "thank you", "thx", "ok", "okay", "great", "got it", "appreciate", "good", "makes sense"]
follow_up_keywords = ["but", "also", "and", "what", "how", "why", "when",
                     "lakini", "pia", "na", "nini", "vipi", "kwanini", "wakati"]
greeting_keywords_sw = ["sasa", "niaje", "habari", "mambo", "jambo", "shikamoo", "marahaba", "hujambo", "hamjambo", "salama", "vipi"]
greeting_keywords_en = ["hi", "hello", "hey", "how's it", "what's up", "yo", "howdy"]

def contains_exact_word_or_phrase(text, keywords):
    text = text.lower()
    for keyword in keywords:
        if re.search(r'\b' + re.escape(keyword) + r'\b', text):
            return True
    return False

def contains_greeting_sw(question):
    # Check if the question contains acknowledgment keywords
    return contains_exact_word_or_phrase(question, greeting_keywords_sw)

def contains_greeting_en(question):
    # Check if the question contains acknowledgment keywords
    return contains_exact_word_or_phrase(question, greeting_keywords_en)

def contains_acknowledgment_sw(question):
    # Check if the question contains acknowledgment keywords
    return contains_exact_word_or_phrase(question, acknowledgment_keywords_sw)

def contains_acknowledgment_en(question):
    # Check if the question contains acknowledgment keywords
    return contains_exact_word_or_phrase(question, acknowledgment_keywords_en)

def contains_follow_up(question):
    # Check if the question contains follow-up indicators
    return contains_exact_word_or_phrase(question, follow_up_keywords)

def process_acknowledgment_response(question):
    # Handle simple acknowledgment
    if contains_acknowledgment_en(question) and not contains_follow_up(question):
        return "You're welcome! Is there anything else I can help with?"
    elif contains_acknowledgment_sw(question) and not contains_follow_up(question):
        return "Karibu! Kuna kitu kingine chochote ninachoweza kusaidia?"
    return None

def process_greeting_response(question):
    # Handle simple acknowledgment
    if contains_greeting_en(question):
        return "Hi! Can I assist with any question related to HIV?"
    elif contains_greeting_sw(question):
        return "Habari! Je, ninaweza kusaidia kwa swali lolote linalohusiana na virusi vya ukimwe?"
    return None


def nishauri(question: str, conversation_history: list[str]):

    # Process greeting
    greet_response = process_greeting_response(question)
    if greet_response:
        conversation_history.append({"user": question, "chatbot": greet_response})
        return greet_response, conversation_history
    
    # Process acknowledgment
    ack_response = process_acknowledgment_response(question)
    if ack_response:
        conversation_history.append({"user": question, "chatbot": ack_response})
        return ack_response, conversation_history
    
    context = " ".join([item["user"] + " " + item["chatbot"] for item in conversation_history])

    # # Split the string into words
    # words = question.split()

    # # Count the number of words
    # num_words = len(words)

    # lang_question = "en"
    
    # if num_words > 4:
    #     lang_question = detect(question)
def detect_language(question):
    # Check if the text has less than 5 words
    if len(question.split()) < 5:
        languages = [Language.ENGLISH, Language.SWAHILI]  # Add more languages as needed
        detector = LanguageDetectorBuilder.from_languages(*languages).build()
        detected_language = detector.detect_language_of(question)
        # Return language code for consistency
        if detected_language == Language.SWAHILI:
            return "sw"
        elif detected_language == Language.ENGLISH:
            return "en"
    else:
        try:
            lang_detect = detect(question)
            return lang_detect  
        except Exception as e:
            print(f"Error with langdetect: {e}")
            return "unknown"  

lang_question = detect_language(question)
    
if lang_question=="sw":
    question = GoogleTranslator(source='sw', target='en').translate(question)
        
    sources = retriever.retrieve(question)
    source0 = sources[0].text
    source1 = sources[1].text
    source2 = sources[2].text

    background = ("The person who asked the question is a person living with HIV."
                  " If the person says sasa or niaje, that is swahili slang for hello."
                  " They are asking questions about HIV. Do not talk about anything that is not related to HIV. "
                  " Recognize that they already have HIV and do not suggest that they have to get tested"
                  " for HIV or take post-exposure prophylaxis, as that is not relevant, though their partners perhaps should."  
                  " Do not suggest anything that is not relevant to someone who already has HIV."
                  " Do not mention in the response that the person is living with HIV."
                  " The following information about viral loads is authoritative for any question about viral loads:"
                  " Under 50 copies/ml is low detectable level,"
                  " 50 - 199 copies/ml is low level viremia, 200 - 999 is high level viremia, and "
                  " 1000 and above is suspected treatment failure." 
                  " A high viral load or non-suppressed viral load is any viral load above 200 copies/ml."
                  " A suppressed viral load is one below 200 copies / ml.")

    question_final = (
        f" The user previously asked and answered the following: {context}. "
        f" The user just asked the following question: {question}."
        f" Please use the following content to generate a response: {source0} {source1} {source2}."
        f" Please consider the following background information when generating a response: {background}."
        " Keep answers brief and limited to the question that was asked."
        " If they start with a greeting, just greet them in return and don't share anything else."
        " Do not change the subject or address anything the user didn't directly ask about."
        # " Do not give any affirmations."
        # " The user is not quitting anything."
        " If they respond with an acknowledgement, simply thank them ask if there is anything else that you can help with."
    )

    completion = client.chat.completions.create(
      model="gpt-4o",
        messages=[
        {"role": "user", "content": question_final}
      ]
    )

    reply_to_user = completion.choices[0].message.content
    
    if lang_question=="sw":
        reply_to_user = GoogleTranslator(source='auto', target='sw').translate(reply_to_user)
    
    conversation_history.append({"user": question, "chatbot": reply_to_user})   

    return reply_to_user, conversation_history

demo = gr.Interface(
    title = "Nishauri Chatbot Demo",
    fn=nishauri,
    inputs=["text", gr.State(value=[])],
    outputs=["text", gr.State()],
)

demo.launch()

In [9]:
pip show deep-translator

Name: deep-translator
Version: 1.11.4
Summary: A flexible free and unlimited python tool to translate between different languages in a simple way using multiple translators
Home-page: https://github.com/nidhaloff/deep_translator
Author: Nidhal Baccouri
Author-email: nidhalbacc@gmail.com
License: MIT
Location: C:\Users\Yoni\anaconda3\Lib\site-packages
Requires: beautifulsoup4, requests
Required-by: 
Note: you may need to restart the kernel to use updated packages.
