In [40]:
import pandas as pd
import numpy as np
import ast

In [41]:
# Datasets

doctors = pd.read_csv('sharp_medical_professionals.csv')
diseases = pd.read_csv('data/diseases.csv')

In [42]:
# Dictionary to convert diseases to specializations

disease_to_specialization = {
    'vertigo': 'neurology',
    'aids': 'infectious disease',
    'acne': 'dermatology',
    'alcoholic hepatitis': 'hepatology',
    'allergy': 'allergy and immunology',
    'arthritis': 'rheumatology',
    'bronchial asthma': 'pulmonology',
    'cervical spondylosis': 'orthopedic surgery',
    'chicken pox': 'infectious disease',
    'chronic cholestasis': 'gastroenterology',
    'common cold': 'general medicine',
    'dengue': 'infectious disease',
    'diabetes': 'endocrinology',
    'dimorphic hemorrhoids(piles)': 'general surgery',
    'drug reaction': 'allergy and immunology',
    'fungal infection': 'infectious disease',
    'gerd': 'gastroenterology',
    'gastroenteritis': 'gastroenterology',
    'heart attack': 'cardiology',
    'hepatitis b': 'hepatology',
    'hepatitis c': 'hepatology',
    'hepatitis d': 'hepatology',
    'hepatitis e': 'hepatology',
    'hypertension': 'cardiology',
    'hyperthyroidism': 'endocrinology',
    'hypoglycemia': 'endocrinology',
    'hypothyroidism': 'endocrinology',
    'impetigo': 'dermatology',
    'jaundice': 'hepatology',
    'malaria': 'infectious disease',
    'migraine': 'neurology',
    'osteoarthritis': 'orthopedic surgery',
    'paralysis (brain hemorrhage)': 'neurology',
    'peptic ulcer disease': 'gastroenterology',
    'pneumonia': 'pulmonology',
    'psoriasis': 'dermatology',
    'tuberculosis': 'infectious disease',
    'typhoid': 'infectious disease',
    'urinary tract infection': 'urology',
    'varicose veins': 'vascular surgery',
    'hepatitis a': 'hepatology'
}


In [43]:
# Clean specialties

def convert_specialties(specialty):

    specialty = ast.literal_eval(specialty)
    return specialty

In [44]:
# Calculates score for every doctor based on disease_inputs

def specialties_to_scores(specialties, disease_inputs):

    score = 0

    for specialty in specialties:
        specialty = specialty.lower()

        for disease, probability in disease_inputs:
            if specialty == disease_to_specialization[disease]:
                score += probability

    return score


In [48]:
# Outputs a df with all of the doctors with positive scores (can get top three if necessary, maybe differentiate between them with star reviews)

def calculate_recommendation(disease_inputs):

    cleaned_specialties = doctors['Specialty'].transform(convert_specialties)
    scores = cleaned_specialties.apply(lambda x: specialties_to_scores(x, disease_inputs))
    new_df = doctors.assign(Score = scores)

    only_positive_scores = new_df[new_df['Score'] > 0].sort_values('Score', ascending = False)
    return only_positive_scores

# New df with scores

In [49]:
# Test case

diseases = [('pneumonia', .75), ('tuberculosis', .15), ('jaundice', .10)]

calculate_recommendation(diseases)


Unnamed: 0.1,Unnamed: 0,Name,Location,Specialty,Picture,Score
214,214,"Raymond Chinn, MD",['8010 Frost St'],['Infectious disease'],https://images.ctfassets.net/pxcfulgsd9e2/4vtf...,0.15
395,395,"Joshua Minuto, MD",['2020 Genesee Ave.'],['Infectious disease'],https://images.ctfassets.net/pxcfulgsd9e2/4vtf...,0.15
396,396,"George Sakoulas, MD",['2020 Genesee Ave.'],['Infectious disease'],https://images.ctfassets.net/pxcfulgsd9e2/4vtf...,0.15
401,401,"Ariella Goldblatt, MD",['2020 Genesee Ave.'],['Infectious disease'],https://images.ctfassets.net/pxcfulgsd9e2/5Bzm...,0.15
402,402,"Norihiro Yogo, MD",['2020 Genesee Ave.'],['Infectious disease'],https://images.ctfassets.net/pxcfulgsd9e2/69LQ...,0.15
449,449,"Gonzalo Ballon-Landa, MD",['4136 Bachman Pl'],['Infectious disease'],https://images.ctfassets.net/pxcfulgsd9e2/3S4m...,0.15
611,611,"Michael Butera, MD",['6699 Alvarado Rd'],['Infectious disease'],https://images.ctfassets.net/pxcfulgsd9e2/4vtf...,0.15
733,733,"Braden Hale, MD",[],['Infectious disease'],https://images.ctfassets.net/pxcfulgsd9e2/4vtf...,0.15
795,795,"Fadi Haddad, MD",['8860 Center Dr'],['Infectious disease'],https://images.ctfassets.net/pxcfulgsd9e2/3fh1...,0.15
616,616,"Nabil Baig, DO","['6216 Brockton Avenue', '303 H St.', '131 Ora...","['Gastroenterology', 'Hepatology', 'Internal m...",https://images.ctfassets.net/pxcfulgsd9e2/4vtf...,0.1
