This file is used for voice translation

In [None]:
#Downloading required libraries:
#pip install gtts
#!pip install googletrans==4.0.0-rc1 ##used deep translator instead of this one, can avoid this one
#!pip install pydub
#!brew install ffmpeg
#!pip install playsound
#!brew install portaudio
#!pip install pyaudio
#!pip3 install PyObjC
#!pip install SpeechRecognition
#!pip install openai-whisper
#!pip install deep-translator
#!pip install spacy
#!python3 -m spacy download en_core_web_sm
#!pip install schedule


Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m15.6 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: en-core-web-sm
Successfully installed en-core-web-sm-3.8.0
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')


Following code is used for live audio translation during a call with patient.
Considering the patient to be on call with doctor and the doctor has put the speaker ON.

In [33]:
import speech_recognition as sr
from deep_translator import GoogleTranslator
from gtts import gTTS
from pydub import AudioSegment
from pydub.playback import play
import tempfile

# Initialize Recognizer
recognizer = sr.Recognizer()

def recognize_speech(lang="auto"):
    """Captures and recognizes speech using Google API, waits for clear input"""
    while True:  # Keep retrying until speech is clear
        with sr.Microphone() as source:
            print(f"Listening ({lang})... Speak now!")
            recognizer.adjust_for_ambient_noise(source)
            audio = recognizer.listen(source)
        try:
            # Recognize speech
            text = recognizer.recognize_google(audio, language=lang)
            print(f"Recognized ({lang}): {text}")
            return text  # Return only when speech is recognized clearly
        except sr.UnknownValueError:
            print("Could not understand audio. Please speak again.")
        except sr.RequestError:
            print("API unavailable. Check internet.")

def translate_text(text, src_lang, dest_lang):
    """Translates text between languages"""
    translator = GoogleTranslator(source=src_lang, target=dest_lang)
    translated_text = translator.translate(text)
    print(f"Translated ({src_lang} → {dest_lang}): {translated_text}")
    return translated_text

def text_to_speech(text, lang):
    """Converts text to speech and plays it"""
    tts = gTTS(text=text, lang=lang)
    with tempfile.NamedTemporaryFile(delete=True, suffix=".mp3") as fp:
        tts.save(fp.name)
        audio = AudioSegment.from_file(fp.name, format="mp3")
        play(audio)

# Main Loop: Real-Time Translation Between Patient & Doctor
while True:
    print("\n--- Patient's Turn (Speaking Local Language) ---")
    patient_text = recognize_speech(lang="te")  # Telugu input
    translated_to_english = translate_text(patient_text, src_lang="te", dest_lang="en")
    print("\n--- Doctor's Turn (Reply in English) ---")
    doctor_text = recognize_speech(lang="en")  # English input
    translated_to_patient = translate_text(doctor_text, src_lang="en", dest_lang="te")
    print("\n--- Speaking Translation to Patient ---")
    text_to_speech(translated_to_patient, lang="te")  # Doctor's response in Telugu


--- Patient's Turn (Speaking Local Language) ---
Listening (te)... Speak now!
Could not understand audio. Please speak again.
Listening (te)... Speak now!
Recognized (te): హలో ఎలా ఉన్నారు
Translated (te → en): Hello how are you

--- Doctor's Turn (Reply in English) ---
Listening (en)... Speak now!
Could not understand audio. Please speak again.
Listening (en)... Speak now!
Recognized (en): Hi how are you
Translated (en → te): హాయ్ మీరు ఎలా ఉన్నారు

--- Speaking Translation to Patient ---

--- Patient's Turn (Speaking Local Language) ---
Listening (te)... Speak now!
Could not understand audio. Please speak again.
Listening (te)... Speak now!


KeyboardInterrupt: 

Following code utilises Natural Language Processing for extracting content from doctors voice regarding prescription and converting to text. The output text can be sent as an input to any SMS tool for sending the message to patient.

In [34]:
# set remainders
import speech_recognition as sr
import spacy
import schedule
import time
from gtts import gTTS
from playsound import playsound
import tempfile

nlp = spacy.load('en_core_web_sm')  # Loading spacy

# mapping for time words to actual times
TIME_MAPPING = {
    "morning": "08:00",
    "afternoon": "14:00",
    "evening": "18:00",
    "night": "21:00",
    "before breakfast": "07:30",
    "after breakfast": "09:00",
    "before lunch": "12:30",
    "after lunch": "13:30",
    "before dinner": "19:30",
    "after dinner": "20:30",
    "every 6 hours": ["06:00", "12:00", "18:00", "00:00"], 
}
 
recognizer = sr.Recognizer() # Initializing speech recognizer

def recognize_speech():
    """Captures and converts doctor's speech to text"""
    with sr.Microphone() as source:
        print("Doctor, please dictate the prescription...")
        recognizer.adjust_for_ambient_noise(source)
        audio = recognizer.listen(source)
    try:
        text = recognizer.recognize_google(audio)
        print(f"Recognized Speech: {text}")
        return text
    except sr.UnknownValueError:
        print("Could not understand audio.")
        return None
    except sr.RequestError:
        print("Speech service unavailable.")
        return None

def extract_medicine_schedule(text):
    """Extracts medicines, dosages, and schedules using NLP"""
    medicines = {}
    doc = nlp(text.lower())

    current_medicine = None
    for token in doc:
        # Detect medicine name (Assumption: Proper nouns or words in medical vocabulary)
        if token.pos_ in ["NOUN", "PROPN"] and len(token.text) > 2:
            current_medicine = token.text
            medicines[current_medicine] = {"dosage": "Unknown", "times": []}
        # Detect dosage
        if token.like_num or "mg" in token.text or "ml" in token.text or "tablet" in token.text:
            if current_medicine:
                medicines[current_medicine]["dosage"] = token.text
        # Detect time expressions
        for key, time_value in TIME_MAPPING.items():
            if key in token.text:
                if isinstance(time_value, list):  # handles repetitive values
                    medicines[current_medicine]["times"].extend(time_value)
                else:
                    medicines[current_medicine]["times"].append(time_value)
    return medicines

def schedule_reminders(medicines):
    """Schedules medicine intake reminders"""
    for medicine, details in medicines.items():
        for time_slot in details["times"]:
            schedule.every().day.at(time_slot).do(send_notification, medicine, time_slot)

def send_notification(medicine_name, time_slot):
    """Actual notification push"""
    message = f"Reminder: Take your {medicine_name} now ({time_slot})."
    print(message)
    tts = gTTS(text=message, lang="en") # converts text to voice message for notification
    with tempfile.NamedTemporaryFile(delete=True, suffix=".mp3") as fp:
        tts.save(fp.name)
        playsound(fp.name)

# Main process
spoken_text = recognize_speech()
if spoken_text:
    extracted_data = extract_medicine_schedule(spoken_text)
    print("Extracted Prescription:", extracted_data)
    schedule_reminders(extracted_data)

    print("Reminder system is running... (Press Ctrl+C to stop)")
    while True:
        schedule.run_pending()
        time.sleep(1)  # Prevent excessive CPU usage


Doctor, please dictate the prescription...
Recognized Speech: hi please take paracetamol at 9:00 a.m. everyday
Extracted Prescription: {'paracetamol': {'dosage': 'Unknown', 'times': []}, 'a.m.': {'dosage': 'Unknown', 'times': []}, 'everyday': {'dosage': 'Unknown', 'times': []}}
Reminder system is running... (Press Ctrl+C to stop)


KeyboardInterrupt: 