In [1]:
import gradio as gr
import importlib
import re
import os
import pickle
from IPython.display import Markdown
from python_scripts import llm_rag, machine_translation, text_to_speech, whisper_setup, get_audio, utils
import numpy as np
from scipy.io.wavfile import write
import librosa

# r8_Ivqc9hSVLVo3SD03jecneTB6XD6z7Ve1ScGPw

# Define paths dynamically
PATH = os.getcwd().replace('\\\\', '/')

with open('symptom_list.pkl', 'rb') as f:
    symptom_list = pickle.load(f)

root_path = PATH + '\\Datasets\\MeDAL'
audio_path = PATH + '\\Datasets\\Audio_Files'

In [4]:
def SMTS(Query):
    try:
        # Process the audio input
        file_path = 'output_testing.wav'
        write(file_path, data = np.array(Query[1], dtype = np.int16), rate = Query[0])
        audio_processed = utils.preprocess_audio(file_path)
        # Transcribe Query to English
        whisper_models = ["tiny", "base", "small", "medium", "large"]

        transcript = whisper_setup.transcribe_audio(audio_processed, ['tiny'])
        text = (transcript['tiny'][2]).lower()

        # Regular expression pattern to match symptoms containing 'or' any symptoms from the list
        pattern = r'\b(?:' + '|'.join(map(re.escape, symptom_list)) + \
        '|'.join('(?:{}|{})'.format(re.escape(symptom.split(' or ')[0]), re.escape(symptom.split(' or ')[1])) \
                 for symptom in symptom_list if ' or ' in symptom) + r')\b'

        # Extract symptoms from the query
        extracted_symptoms = re.findall(pattern, text, flags = re.IGNORECASE)

        # Feed query into the LLM
        models = {
        'llama_ours': 'ubaidtariq8/llama2-med-genai', # fine tuned model from replicate
        'lora_model': 'nehals_fine_tuned_model',      # fine tuned model from unsloth
        'nous-hermes2': 'maryams_fine_tuned_model',   # fine tuned model from gradientai - currently not supported with gradio due to version compatibility issues (detail in fine_tuning_gradientAI.ipynb notebook in our repo)
        'mixtral': 'mistralai/mixtral-8x7b-instruct-v0.1' # Used for Pipeline 1 with no fine tuning
        }
        # Note: We have made the supposed functionality needed for the gradientAI fine tuned model to be integated with gradio however due to compatibility issues we will ignore those.

        fine_tune = input('Please specify which pipeline to use. Press 1 for Pipeline 1 (No fine-tuning), 2 for Pipeline 2 \n')
        model_option = ''

        if fine_tune == '2':
            model_option = 'lora_model' if input('Please specify which fine-tuned model to use. Press 1 for Mistral 7B, 2 for Nous-Hermes2 \n') == '1' else 'nous-hermes2'
        else:
          model_option = 'mixtral'

        model = llm_rag.DocumentEmbeddingPipeline(model_version = models[model_option], chroma_path = root_path)
        model.setup_environment()
        model.prepare_documents(collection_name = "muqeem", joining = True, persistent = True)
        model.embed_and_index()

        instructions = 'You are a medical doctor. A patient has come to you for desperate need of help. Give as accurate diagnosis as possible on the symptoms listed. '
        input_lora = ', '.join(extracted_symptoms) + '. Also consider the whole query ' + text + ' ' + 'Give also suggestions for mitigating the problem.'
        query = instructions + input_lora

        # Pipeline 1 (response + translation into user's language)
        if model_option == 'mixtral':
          response = model.query_data(query)
          translated_text = machine_translation.translate_text(text = response.response, src_lang = 'en', trg_lang = transcript['tiny'][0])
        # Pipeline 2
        elif model_option == 'lora_model':
          response = model.setup_lora_model("lora_model", instructions, input_lora)
          translated_text = machine_translation.translate_text(text = response, src_lang = 'en', trg_lang = transcript['tiny'][0])
        else: # nous-hermes2
            response = model.setup_nous_hermes2(query) # clean response if needed and bring it into pure string format
            translated_text = machine_translation.translate_text(text = response, src_lang = 'en', trg_lang = transcript['tiny'][0])

        # Now speak the response in the user's language
        audio_answer_path = audio_path + '/audio.wav'
        text_to_speech.multilingual_text_to_speech(text = translated_text, filepath = audio_answer_path)
        utils.sasti_harkat(audio_answer_path)
        arr, sr = librosa.load(audio_answer_path)

        return text, translated_text, (sr, arr)
    except Exception as e:
        print("An error occurred:", e)

In [5]:
# Launch the Gradio Interface
demo = gr.Interface(
    fn = SMTS,
    inputs = [gr.Audio(label = 'Get your Voice Heard! 🔍', sources = ["microphone"])],
    outputs = [gr.Textbox(label = "We have heard your Voice! 👂"), gr.Textbox(label = "This is what we recommend: 📋"), gr.Audio(label = 'Press Play to listen to your medical report: 🔊')],
    allow_flagging = 'never',
    theme = 'gradio/base',
    title = '''SymptoCare 🤖''',
    description = '''## Welcome to SymptoCare! 🌟
    Discover the power of seamless communication in healthcare with SymptoCare, your personalized healthcare assistant!
    ### How It Works:
    1. 🎤 *Speak your symptoms.*
    2. 🔄 *Let SymptoCare translate them into actionable insights.*
    3. 🗨️ *Engage with your healthcare provider like never before!*''',

    article = '''### What We Offer:
    - 🗣️ *Breaking language barriers with ease.*
    - 📲 *Translating your symptoms into accurate diagnoses.*
    - 🤝 *Empowering your healthcare journey with personalized care.*

    ### Join Us Today:
    Get started now and take control of your healthcare journey! Check our [Github](https://github.com/CS-5302/CS-5302-Project-Group-15) here! Do give us a star if you like our work! 😀'''
)

demo.launch()

Running on local URL:  http://127.0.0.1:7861

To create a public link, set `share=True` in `launch()`.


