In [8]:
import random
import fitz
import re
import json
import pandas as pd

In [9]:
def load_data(path):
    # Load intents
    with open(path, 'r') as file:
        intents = json.load(file)['intents']

    intents_dict = {}
    response_dict = {}
    for intent in intents:
        intents_dict[intent['tag']] = intent['patterns']
        response_dict[intent['tag']] = intent['responses']

    return intents_dict, response_dict

In [10]:
intents_dict, response_dict = load_data('../raw_data/intents.json')
n_categories = len(intents_dict)
print(n_categories)

14


# Programmatically expand intents sections, starting with greeting

thesaurus for ease of access to synonyms

In [11]:
# load the data
pdf_path = "../raw_data/thesaurus.pdf"
doc = fitz.open(pdf_path)
text = ""
for page in doc:
    text += page.get_text("text")

Dont need intro text

In [12]:
trimmed_text = text[text.find("aback"):]
print(trimmed_text[:100])

aback [adv] taken unawares confused, surprised,
thrown off, thrown off guard*; concept 403
abaft [ad


In [13]:
# Regex pattern to match the word, the middle part including square brackets, and the definition
start_pattern = r"(\w+)\s\[\w+\d?\]\s(.+)"
end_pattern = r";? ?concepts? \d{1,3}"
lines = trimmed_text.split("\n")
start_text = ""
text_cleaned = []

for i in range(len(lines)):
    start_match = re.match(start_pattern, lines[i])
    if start_match:
        word, start_text = start_match.groups()

        for j in range(i+1, len(lines)):
            end_match = re.search(end_pattern, lines[j])
            if end_match:
                start_text += re.split(end_pattern, lines[j])[0]
                i = j
                break
            else:
                start_text += lines[j]

        text_cleaned.append((word, start_text))

In [14]:
# look for word in the text
def find_syns(word_to_find, context):
    syns = []
    for word, definition in text_cleaned:
        if word == word_to_find and context in definition:
            possible_responses = definition.split(",")
            for response in possible_responses:
                syns.append(response.strip())

    return syns

# gets all occurences of the word "good". Not every "good" works for our context, so we add some (context, that is).
# For now, i'll just manually edit it.
print(find_syns("good", "excellent"))

['pleasant', 'ﬁne acceptable', 'ace*', 'admirable', 'agreeable', 'bad', 'boss*', 'bully', 'capital', 'choice', 'commendable', 'congenial', 'crack*', 'deluxe', 'excellent', 'exceptional', 'favorable', 'ﬁrst-class', 'ﬁrst-rate', 'gnarly*', 'gratifying', 'great', 'honorable', 'marvelous', 'neat*', 'nice', 'pleasing', 'positive', 'precious', 'prime', 'rad*', 'recherché*', 'reputable', 'satisfactory', 'satisfying', 'select', 'ship-shape*', 'sound', 'spanking*', 'splendid', 'sterling', 'stupendous', 'super', 'superb', 'super-eminent', 'super-excellent', 'superior', 'tip-top*', 'up to snuff*', 'valuable', 'welcome', 'wonderful', 'worthy;']


# adding greeting intents

Get rid of synonyms that don't make sense for our purposes. Copied from above and manually edited. Prepare lists for forming greeting phrases and responses.

In [15]:
import itertools

greetings = ["Hello", "Hi", "Hey", "Howdy", "Greetings", "Good morning", "Good afternoon", "Good evening", "Good day", "Hi there", "Hello there", "Hey hey"]
additional = ["how are you?", "how are you doing?", "how's it going?", "what's up?", "how do you do?", "How's it hangin?", "what's going on?", ""]
combinations = list(itertools.product(greetings, additional))
combinations_joined = [', '.join(combination) if combination[1] != "" else combination[0] for combination in combinations]
print(len(combinations_joined))

96


In [16]:
responses = ["Well hello", "Hi there", "Hey yourself", "Howdy there", "Greetings", "Good morning", "Good afternoon", "Good evening","Good day", "Hi", "Hello", "Hey yourself"]
help_inquire = " How may I help you today?" # for conciseness
responses_additional = ["I'm doing well, thank you.", "I'm doing well, thanks for asking.", "It's going well, thank you.", "Not much, thanks for asking.",
                         "I'm doing well, thank you.", "It's hangin'.", "Not much thanks for asking.", ". "]
responses_combined = list(itertools.product(responses, responses_additional))
responses_joined = ['. '.join(combination) if combination[1] != ". " else ''.join(combination) for combination in responses_combined]
responses_with_inquiry = [f"{response}{help_inquire}" for response in responses_joined]
print(len(responses_joined))

96


Save updated intents

In [17]:
intents_dict['greeting'] = combinations_joined
response_dict['greeting'] = responses_with_inquiry

# adding goodbye intents

In [18]:
find_syns("goodbye", "farewell")

['farewell statement adieu',
 'adios',
 'bye-bye',
 'cheerio',
 'ciao',
 'godspeed*',
 'leave-tak-ing',
 'parting',
 'so long*',
 'swan song*',
 'toodle-oo*;']

In [19]:
goodbye_syns = find_syns("goodbye", "farewell")
goodbye_syns.remove("leave-tak-ing")
goodbye_syns.remove("godspeed*") # debated on keeping this
goodbye_syns.remove("swan song*")
goodbye_syns.remove("toodle-oo*;")
goodbye_syns.remove("parting")
print(goodbye_syns)

['farewell statement adieu', 'adios', 'bye-bye', 'cheerio', 'ciao', 'so long*']


In [20]:
goodbye_syns[0] = goodbye_syns[0].split(" ")[0] # remove the extra words from the first one
# remove '*' from the end of the words
goodbye_syns = [word.strip("*") for word in goodbye_syns]
print(goodbye_syns)

['farewell', 'adios', 'bye-bye', 'cheerio', 'ciao', 'so long']


In [21]:
# for correct grammar
def a_or_an(word):
    if word in ["excellent", "amazing", "outstanding", "exceptional"]:
        return "an " + word
    else:
        return "a " + word

good_syns = ["good", "great", "excellent", "amazing", "fantastic", "superb", "terrific", "outstanding", "exceptional", "marvelous", "splendid", "wonderful"]

goodbye_templates = []
response_templates = []
goodbye_syns.append("goodbye")
goodbye_syns.append("till next time")
goodbye_syns.append("see you later")
goodbye_syns.append("bye")

times = ["one", "day", "evening", "night", "rest of your day", "rest of your night", "rest of your evening", "rest of your morning"]

goodbye_additional = ["Thanks for your help.", "Thanks again.", "Thanks for helping.", "Take care."]
for syn in good_syns:
    for time in times:
        goodbye_additional.append(f"Have {a_or_an(syn)} {time}.")
    goodbye_additional.append(f"You've been {a_or_an(syn)} help.")

goodbye_additional.append("")

In [22]:
i = 0
for gb_syn in goodbye_syns:
    for gb_add in goodbye_additional:
        goodbye_templates.append(f"{gb_syn}{random.choice(['!','.'])} {gb_add}")
        if i < 3: # first three are thank yous
            response_templates.append("It's my pleasure. I'm glad I could be of assistance. Goodbye!")
            i += 1
        else:
            if gb_add == "":
                response_templates.append("Goodbye! Have a great day!") # just a goodbye
            else:
                response_templates.append("Goodbye! Please come again!")

In [23]:
intents_dict['goodbye'] = goodbye_templates
response_dict['goodbye'] = response_templates

# Adding thanks intents

In [24]:
print(intents_dict['thanks'])

['Thanks', 'Thank you', "That's helpful", 'Awesome, thanks', 'Thanks for helping me']


In [25]:
print(response_dict['thanks'])

['Happy to help!', 'Any time!', 'My pleasure']


In [26]:
thanks = ['thank you very much', 'thanks a lot', 'thank you so much', 'thanks so much', 'thank you kindly', 'thank you for that.']
thanks = thanks + intents_dict['thanks']
print(thanks)

['thank you very much', 'thanks a lot', 'thank you so much', 'thanks so much', 'thank you kindly', 'thank you for that.', 'Thanks', 'Thank you', "That's helpful", 'Awesome, thanks', 'Thanks for helping me']


In [27]:
second_part_nouns = ["lifesaver", "star", "gem", "legend", "hero", "saint", "miracle worker", "godsend", "savior", "treasure"]
second_part = [" You've been a great help.", " You're a great help", " I don't know what I'd do without you."]

for noun in second_part_nouns:
    second_part.append(f" You're a {noun}.")
    second_part.append("") # for variation

In [28]:
# capitalize the first letter of each element
thanks = [word.capitalize() for word in thanks]
print(thanks)

['Thank you very much', 'Thanks a lot', 'Thank you so much', 'Thanks so much', 'Thank you kindly', 'Thank you for that.', 'Thanks', 'Thank you', "That's helpful", 'Awesome, thanks', 'Thanks for helping me']


In [29]:
combinations = list(itertools.product(thanks, second_part))
combinations_joined = ['. '.join(combination) if combination[1] != "" else combination[0] for combination in combinations]
print(len(combinations_joined))

253


In [30]:
res = []
for combination in combinations_joined:
    res.append("You're very welcome. Is there anything else I can assist you with today?")

intents_dict['thanks'] = combinations_joined
response_dict['thanks'] = res

# adding options intents

In [31]:
print(intents_dict['options'])

['How you could help me?', 'What you can do?', 'What help you provide?', 'How you can be helpful?', 'What support is offered']


In [32]:
print(response_dict['options'])

['I can guide you through Adverse drug reaction list, Blood pressure tracking, Hospitals and Pharmacies', 'Offering support for Adverse drug reaction, Blood pressure, Hospitals and Pharmacies']


# Todo: make responses more varied (?)

In [33]:
options = []
responses = []
end_part = ["?", " here?", " today?", " right now?", " now?", " this evening?", " this morning?", " this afternoon?"]
help = ["help", "assist", "support", "aid"]
choices = ["options", "choices"]
response_options = ['I can guide you through the adverse drug reaction list, blood pressure tracking, or find hospitals and pharmacies.',
                    'I can offer you support by giving you information on the adverse drug reaction list, blood pressure tracking, or finding hospitals and pharmacies.',
                    'I can help you with the adverse drug reaction list, blood pressure tracking, or finding hospitals and pharmacies.']

for part in end_part:
    options.append(f"What can I do{part}")
    options.append(f"What can you do for me{part}")
    for choice in choices:
        options.append(f"What are my {choice}{part}")
    
    for help_choice in help:
        options.append(f"What can you {help_choice} me with{part}")
        options.append(f"How can you {help_choice} me{part}")
        options.append(f"What can you do to {help_choice} me{part}")

for option in options:
    responses.append(random.choice(response_options))

In [34]:
len(options)

128

Save the updated options

In [35]:
intents_dict['options'] = options
response_dict['options'] = responses

# adding adverse_drug intents

In [36]:
intents_dict['adverse_drug']

['How to check Adverse drug reaction?',
 'Open adverse drugs module',
 'Give me a list of drugs causing adverse behavior',
 'List all drugs suitable for patient with adverse reaction',
 'Which drugs dont have adverse reaction?']

In [37]:
response_dict['adverse_drug']

['Navigating to Adverse drug reaction module']

In [38]:
def generate_adverse_drug_questions():
    inquiries = []
    # data collected using code in named_entity_recognition.ipynb
    medications = pd.read_csv('../clean_data/medications.csv')
    choices = ['bad', 'adverse']
    interactions = ['interactions', 'effects', 'side effects', 'behavior']
    display = ['display', 'show', 'list', 'provide', 'give']
    med_syns = ['medications', 'drugs', 'pharmaceuticals', 'prescriptions', 'meds', 'medicines']
    time = ['at the same time?', 'simultaneously?', 'together?', 'with each other?', 'concurrently?']

    for syn in med_syns:
        for choice in choices:
            for interaction in interactions:
                inquiries.append(f"Give me a list of {syn} causing {choice} {interaction}")
                inquiries.append(f"Which {syn} have {choice} {interaction}?")
                inquiries.append(f"{random.choice(display).capitalize()} all {syn} with {choice} {interaction}")
                inquiries.append(f"Do {random.choice(medications['medication'].values)} and {random.choice(medications['medication'].values)} have {choice} {interaction}?")
                inquiries.append(f"Is it safe to take {random.choice(medications['medication'].values)} and {random.choice(medications['medication'].values)} {random.choice(time)}")
        

    return inquiries

inquiries = generate_adverse_drug_questions()
intents_dict['adverse_drug'] = inquiries
response_dict['adverse_drug'] = ["Navigating to Adverse drug reaction module" for inquiry in inquiries]

# adding blood-pressure intents

In [39]:
intents_dict['blood_pressure']

['Open blood pressure module',
 'Task related to blood pressure',
 'Blood pressure data entry',
 'I want to log blood pressure results',
 'Blood pressure data management']

In [40]:
response_dict['blood_pressure']

['Navigating to Blood Pressure module']

In [41]:
inquiries = []
data = ['results', 'data', 'information', 'readings', 'numbers', 'stats']
open_module = ['Open', 'Show', 'Display', 'Start', 'Run', 'Begin', 'Launch']
module = ['module', 'manager', 'system', 'app', 'tool', 'program', 'application', 'software']
log = ['log', 'record', 'track', 'read', 'view', 'monitor', 'check', 'watch', 'see']
i = 0 # for use in first loop
for elem in open_module:
    for module_syn in module:
        inquiries.append(f"{elem} the {module_syn} for blood pressure")
        inquiries.append(f"{elem} blood pressure {module_syn}")

for data_syn in data:
    for module_syn in module:
        inquiries.append(f"Blood pressure {data_syn} {module_syn}")
    
    for elem in open_module:
        inquiries.append(f"{elem} blood pressure {data_syn}")

for log_syn in log:
    inquiries.append(f"{log_syn} my blood pressure")
    inquiries.append(f"{log_syn} blood pressure")
    inquiries.append(f"I want to {log_syn} my blood pressure")

    for data_syn in data:
        inquiries.append(f"{log_syn} my blood pressure {data_syn}")
        inquiries.append(f"{log_syn} blood pressure {data_syn}")
        inquiries.append(f"I want to {log_syn} my blood pressure {data_syn}")

responses = []

for inquiry in inquiries:
    responses.append('Navigating to Blood Pressure module')

intents_dict['blood_pressure'] = inquiries
response_dict['blood_pressure'] = responses

In [42]:
len(inquiries)

391

# adding blood_pressure_search intents

In [43]:
intents_dict['blood_pressure_search']

['I want to search for blood pressure result history',
 'Blood pressure for patient',
 'Load patient blood pressure result',
 'Show blood pressure results for patient',
 'Find blood pressure results by ID']

In [44]:
response_dict['blood_pressure_search']

['Please provide Patient ID', 'Patient ID?']

In [45]:
searches = []
search_words = ['search for', 'find', 'locate', 'view', 'show', 'display', 'pull up', 'load']
result_words = ['results', 'data', 'information', 'readings', 'numbers', 'stats', 'history', 'logs', 'records']
end_part = ['for patient', 'by ID', 'for patient by ID', 'for patient by name', 'for patient by name and ID', 'for patient by name or ID', 'for patient by ID or name']
for result_word in result_words:
    for end in end_part:
        for search_word in search_words:
            searches.append(f"I want to {search_word} blood pressure {result_word} {end}")
            searches.append(f"{search_word.capitalize()} blood pressure {result_word} {end}")
        searches.append(f"Show me blood pressure {result_word} {end}")
        searches.append(f"I need {result_word} {end}")
        searches.append(f"I want {result_word} {end}")
        searches.append(f"Blood pressure {result_word} {end}")

intents_dict['blood_pressure_search'] = searches
responses = []
for search in searches:
    responses.append('Please provide patient ID')

response_dict['blood_pressure_search'] = responses

# adding search_blood_pressure_by_patient_id intents

In [46]:
intents_dict['search_blood_pressure_by_patient_id']

[]

In [47]:
response_dict['search_blood_pressure_by_patient_id']

['Loading Blood pressure result for Patient']

In [48]:
def generate_random_patient_ids(num_ids=10, prefix="P", id_length=6):
    random_ids = []
    for _ in range(num_ids):
        id_number = ''.join([str(random.randint(0, 9)) for _ in range(id_length)])
        random_ids.append(prefix + id_number)
    return random_ids

# Generate 100 random patient IDs
random_patient_ids = generate_random_patient_ids(num_ids=100)

In [49]:
searches = []
search_words = ['search for', 'find', 'locate', 'view', 'show', 'display', 'pull up', 'load']
result_words = ['results', 'data', 'information', 'readings', 'numbers', 'stats', 'history', 'logs', 'records']
for result_word in result_words:
    for search_word in search_words:
        searches.append(f"I want to {search_word} blood pressure {result_word} for patient {random.choice(random_patient_ids)}")
        searches.append(f"{search_word.capitalize()} blood pressure {result_word} for patient {random.choice(random_patient_ids)}")
        searches.append(f"{random.choice(random_patient_ids)} blood pressure {result_word}")
        searches.append(random.choice(random_patient_ids))

intents_dict['search_blood_pressure_by_patient_id'] = searches
responses = []
for search in searches:
    responses.append('Loading blood pressure data')

response_dict['search_blood_pressure_by_patient_id'] = responses

# adding pharmacy_search intents

In [50]:
intents_dict['pharmacy_search']

['Find me a pharmacy',
 'Find pharmacy',
 'List of pharmacies nearby',
 'Locate pharmacy',
 'Search pharmacy']

In [51]:
response_dict['pharmacy_search']

['Please provide pharmacy name']

In [52]:
searches = []
search_words = ['search for', 'find', 'locate', 'show', 'navigate to']
location_words = ['location', 'address', 'home', 'position']
pharmacy_words = ['pharmacy', 'drugstore', 'pharmacies', 'drugstores']
for search_word in search_words:
    searches.append(f"I want to {search_word} a pharmacy")
    searches.append(f"{search_word.capitalize()} a pharmacy")
    for pharmacy_word in pharmacy_words:
        for location_word in location_words:
            searches.append(f"{search_word.capitalize()} {pharmacy_word} near my {location_word}")

        searches.append(f"{search_word.capitalize()} {pharmacy_word}")
        searches.append(f"{search_word} {pharmacy_word} near me")
        searches.append(f"{search_word.capitalize()} nearyby {pharmacy_word}")

intents_dict['pharmacy_search'] = searches
responses = []
for search in searches:
    responses.append('Please provide pharmacy name')

response_dict['pharmacy_search'] = responses

# adding search_pharmacy_by_name intents

In [53]:
intents_dict['search_pharmacy_by_name']

[]

In [54]:
response_dict['search_pharmacy_by_name']

['Loading pharmacy details']

In [55]:
searches = []
search_words = ['search for', 'find', 'locate', 'show', 'navigate to']
names = ['CVS', 'Walgreens', 'Rite Aid', 'Walmart', 'Costco', 'Kroger', 'Publix', 'Safeway', 'Albertsons', 'Target']
for search_word in search_words:
    for name in names:
        searches.append(f"I want to {search_word} {name}")
        searches.append(f"{search_word.capitalize()} {name}")
        searches.append(f"{search_word} {name} near me")
        searches.append(f"{search_word.capitalize()} {name} nearby")
        searches.append(f"{search_word.capitalize()} {name} near my location")
intents_dict['search_pharmacy_by_name'] = searches
responses = []
for search in searches:
    responses.append('Loading pharmacy details')
    
response_dict['search_pharmacy_by_name'] = responses

# adding hospital_search intents

In [56]:
intents_dict['hospital_search']

['Lookup for hospital',
 'Searching for hospital to transfer patient',
 'I want to search hospital data',
 'Hospital lookup for patient',
 'Looking up hospital details']

In [57]:
response_dict['hospital_search']

['Please provide hospital name or location']

In [58]:
searches = []
search_words = ['search for', 'find', 'locate', 'view', 'show', 'display', 'pull up', 'load', 'lookup']
hospital = ['hospital', 'hospitals']
location_words = ['location', 'address', 'area', 'position']


for search_word in search_words:
    searches.append(f"I want to {search_word} a hospital")
    searches.append(f"{search_word.capitalize()} a hospital")
    searches.append(f"{search_word.capitalize()} {random.choice(hospital)}")
    searches.append(f"{search_word.capitalize()} {random.choice(hospital)} near me")
    searches.append(f"{search_word.capitalize()} nearyby {random.choice(hospital)}")
    for location_word in location_words:
        searches.append(f"{search_word.capitalize()} {random.choice(hospital)} near my {location_word}")

    searches.append(f"{search_word.capitalize()} {random.choice(hospital)}")

intents_dict['hospital_search'] = searches
responses = []
for search in searches:
    responses.append('Please provide hospital name, location, or type')

response_dict['hospital_search'] = responses

# adding search_hospital_by_params intents

In [59]:
intents_dict['search_hospital_by_params']

[]

In [60]:
response_dict['search_hospital_by_params']

['Please provide hospital type']

In [61]:
# list of hospital names
names = ['Mayo Clinic', 'Cleveland Clinic', 'Johns Hopkins Hospital', 'Massachusetts General Hospital',
         'UCSF Medical Center', 'UCLA Medical Center', 'New York-Presbyterian Hospital', 'Stanford Health Care-Stanford Hospital',
         'Hospitals of the University of Pennsylvania-Penn Presbyterian', 'Cedars-Sinai Medical Center', 'Northwestern Memorial Hospital',
         'UPMC Presbyterian Shadyside', 'University of Michigan Hospitals-Michigan Medicine', 'Mount Sinai Hospital']
# list of hospital addresses
addresses = ['200 1st St SW, Rochester, MN 55905', '9500 Euclid Ave, Cleveland, OH 44195', '1800 Orleans St, Baltimore, MD 21287',
             '55 Fruit St, Boston, MA 02114', '505 Parnassus Ave, San Francisco, CA 94143', '757 Westwood Plaza, Los Angeles, CA 90095',
             '525 East 68th St, New York, NY 10065', '300 Pasteur Dr, Stanford, CA 94305', '51 N 39th St, Philadelphia, PA 19104',
             '8700 Beverly Blvd, Los Angeles, CA 90048', '251 E Huron St, Chicago, IL 60611', '200 Lothrop St, Pittsburgh, PA 15213',
             '1500 E Medical Center Dr, Ann Arbor, MI 48109', '1 Gustave L Levy Pl, New York, NY 10029']

full_list = names + addresses
searches = []
search_words = ['search for', 'find', 'locate', 'lookup', 'where is', 'show details for', '']

combinations = list(itertools.product(search_words, full_list))
combinations_joined = [' '.join(combination) if combination[1] != "" else combination[1] for combination in combinations]
print(len(combinations_joined))

196


In [62]:
intents_dict['search_hospital_by_params'] = combinations_joined

response_dict['search_hospital_by_params'] = ['Loading hospital details' for _ in range(len(intents_dict['search_hospital_by_params']))]

# adding search_hospital_by_type intents

In [63]:
# list of hospital types
types = ['general', 'community', 'teaching', 'specialty', 'clinic', 'psychiatric', 'rehabilitation', 'children', 'geriatric', 'maternity']
searches = []

for type in types:
    searches.append(f"Find me a {type} hospital")
    searches.append(f"{type} hospital")
    searches.append(f"Find me a {type} hospital near me")
    searches.append(f"{type} hospital near me")
    searches.append(f"Find me a {type} hospital near my location")
    searches.append(f"{type} hospital near my location")
    searches.append(f"Find me a {type} hospital near my address")
    searches.append(f"What's the phone number for the nearest {type} hospital?")
    searches.append(f"{type} hospital")
    searches.append(f"What are the details for the nearest {type} hospital?")

intents_dict['search_hospital_by_type'] = searches
response_dict['search_hospital_by_type'] = ['Loading hospital details' for search in searches]

# New intents

Fill perscription intents

In [64]:
medications = pd.read_csv('../clean_data/medications.csv')
print(medications.head())

      medication
0  acetaminophen
1      acyclovir
2       Adderall
3      albuterol
4    alendronate


In [65]:
# randomly generate dosage data
dosages = []
units = ['mg', 'g', 'mL', 'L']

for i in range(300):
    dosage = str(random.choice(range(5, 1001, 5))) + f' {random.choice(units)}'
    dosages.append(dosage)

print(dosages[:10])

['785 L', '365 mL', '560 mL', '175 mL', '635 g', '345 L', '240 L', '875 g', '510 g', '780 g']


In [66]:
script_queries = []
medications = medications['medication'].values
first_names = ['John', 'Jane', 'Michael', 'Emily', 'David', 'Sarah', 'James', 'Laura', 'Robert', 'Jennifer']
last_names = ['Smith', 'Johnson', 'Williams', 'Jones', 'Brown', 'Davis', 'Miller', 'Wilson', 'Moore', 'Taylor']

medications_for_response = []
patients_for_response = []

for dosage in dosages:
    medication = random.choice(medications)
    first_name = random.choice(first_names)
    last_name = random.choice(last_names)
    script_queries.append(f"Prescribe {dosage} of {medication} for {first_name} {last_name}")
    script_queries.append(f"Write a prescription for {dosage} of {medication} for {first_name} {last_name}")
    script_queries.append(f"{first_name} {last_name} needs a prescription for {dosage} of {medication}")
    medications_for_response.append(medication)
    patients_for_response.append(f'{first_name} {last_name}')

index = 0
incrementor = 1
responses = []
for query in script_queries:
    dosage = dosages[index]
    medication = medications_for_response[index]
    patient = patients_for_response[index]
    response = f'Dosage: {dosage}. Medication: {medication}. Patient: {patient}\nSend to which pharmacy?'
    responses.append(response)
    incrementor += 1
    if incrementor % 4 == 0:
        index += 1
    

intents_dict['prescribe_medication'] = script_queries
response_dict['prescribe_medication'] = responses

symptom_check intents

In [67]:
symptom_queries = []
symptoms = pd.read_csv('../clean_data/symptoms.csv')
print(symptoms.head())

                                  symptom
0  Acute Myeloid Leukaemia (AML) Detected
1        CD19 Antigen Negative Expression
2                                 Fatigue
3          Myeloperoxidase (MPO) Positive
4                     Swollen Lymph Nodes


In [68]:
print(len(symptoms))

1108


In [69]:
def generate_symptom_queries(symptoms, num_iters=100):
    symptom_queries = []
    responses = []
    symptoms = symptoms['symptom'].values

    for iter in range(num_iters):
        symptom = random.choice(symptoms)
        symptom_queries.append(f"What is the symptom {symptom} indicative of?")
        symptom_queries.append(f"What does {symptom} mean?")
        symptom_queries.append(f"Explain {symptom}")
        symptom_queries.append(f"Define {symptom}")
        symptom_queries.append(f"What is the meaning of {symptom}?")
        symptom_queries.append(f"What could be the cause of {symptom}?")
        for _ in range(6): # number of symptom queries added
            responses.append(f'Loading symptom details for {symptom}')

    return symptom_queries, responses

symptom_queries, responses = generate_symptom_queries(symptoms)
intents_dict['symptom_check'] = symptom_queries
response_dict['symptom_check'] = responses

Check to make sure data generated is equal in length for response generator training

In [70]:
# compare length of intents and responses and print if they are not equal
for key in intents_dict:
    if len(intents_dict[key]) != len(response_dict[key]):
        print(key)

noanswer


In [73]:
print(len(intents_dict['search_hospital_by_type']), len(intents_dict['search_hospital_by_params']),
      len(intents_dict['hospital_search']), len(intents_dict['search_pharmacy_by_name']), len(intents_dict['pharmacy_search']),
      len(intents_dict['search_blood_pressure_by_patient_id']), len(intents_dict['blood_pressure_search']), len(intents_dict['blood_pressure']),
      len(intents_dict['adverse_drug']), len(intents_dict['options']), len(intents_dict['thanks']), len(intents_dict['goodbye']),
      len(intents_dict['greeting']), len(intents_dict['prescribe_medication']), len(intents_dict['symptom_check']))

100 196 90 250 150 288 1260 391 240 128 253 1130 96 900 600


In [76]:
for intent in intents_dict:
    print(intent, len(intents_dict[intent]))

greeting 96
goodbye 1130
thanks 253
noanswer 0
options 128
adverse_drug 240
blood_pressure 391
blood_pressure_search 1260
search_blood_pressure_by_patient_id 288
pharmacy_search 150
search_pharmacy_by_name 250
hospital_search 90
search_hospital_by_params 196
search_hospital_by_type 100
prescribe_medication 900
symptom_check 600


In [78]:
# Load the original JSON file
with open('../raw_data/intents.json', 'r') as file:
    data = json.load(file)

# Update the intents and responses
data['intents'] = []
for key in intents_dict:
    data['intents'].append({
        'tag': key,
        'patterns': intents_dict[key],
        'responses': response_dict[key]
    })

# Remove the 'noanswer' intent
data['intents'] = [intent for intent in data['intents'] if intent['tag'] != 'noanswer']

# Write the updated JSON structure back to a file
with open('../clean_data/intents_enriched.json', 'w') as file:
    json.dump(data, file, indent=4)