In [3]:
import pandas as pd
import numpy as np
import ast
import json
from pathlib import Path

In [4]:
# Datasets

doctors = pd.read_csv(Path('..') / 'Data' / 'sharp_medical_professionals.csv')
diseases = pd.read_csv(Path('..') / 'Data' / 'diseases.csv')

In [5]:
# Dictionary to convert diseases to specializations

disease_to_specialization = {
    'vertigo': 'neurology',
    'aids': 'infectious disease',
    'acne': 'dermatology',
    'alcoholic hepatitis': 'hepatology',
    'allergy': 'allergy and immunology',
    'arthritis': 'rheumatology',
    'bronchial asthma': 'pulmonology',
    'cervical spondylosis': 'orthopedic surgery',
    'chicken pox': 'infectious disease',
    'chronic cholestasis': 'gastroenterology',
    'common cold': 'general medicine',
    'dengue': 'infectious disease',
    'diabetes': 'endocrinology',
    'dimorphic hemorrhoids(piles)': 'general surgery',
    'drug reaction': 'allergy and immunology',
    'fungal infection': 'infectious disease',
    'gerd': 'gastroenterology',
    'gastroenteritis': 'gastroenterology',
    'heart attack': 'cardiology',
    'hepatitis b': 'hepatology',
    'hepatitis c': 'hepatology',
    'hepatitis d': 'hepatology',
    'hepatitis e': 'hepatology',
    'hypertension': 'cardiology',
    'hyperthyroidism': 'endocrinology',
    'hypoglycemia': 'endocrinology',
    'hypothyroidism': 'endocrinology',
    'impetigo': 'dermatology',
    'jaundice': 'hepatology',
    'malaria': 'infectious disease',
    'migraine': 'neurology',
    'osteoarthritis': 'orthopedic surgery',
    'paralysis (brain hemorrhage)': 'neurology',
    'peptic ulcer disease': 'gastroenterology',
    'pneumonia': 'pulmonology',
    'psoriasis': 'dermatology',
    'tuberculosis': 'infectious disease',
    'typhoid': 'infectious disease',
    'urinary tract infection': 'urology',
    'varicose veins': 'vascular surgery',
    'hepatitis a': 'hepatology'
}


In [6]:
# Clean specialties

def convert_specialties(specialty):

    specialty = ast.literal_eval(specialty)
    return specialty

In [7]:
# Calculates score for every doctor based on disease_inputs

def specialties_to_scores(specialties, disease_inputs):

    score = 0

    for specialty in specialties:
        specialty = specialty.lower()

        for disease, probability in disease_inputs:
            if specialty == disease_to_specialization[disease]:
                score += probability

    return score


In [13]:
# Outputs a df with all of the doctors with positive scores (can get top three if necessary, maybe differentiate between them with star reviews)

def calculate_recommendation(disease_inputs):

    cleaned_specialties = doctors['Specialty'].transform(convert_specialties)
    scores = cleaned_specialties.apply(lambda x: specialties_to_scores(x, disease_inputs))
    new_df = doctors.assign(Score = scores)

    only_positive_scores = new_df[new_df['Score'] > 0].sort_values('Score', ascending = False)
    return only_positive_scores.drop('Unnamed: 0', axis=1).fillna(-1)

# New df with scores

In [14]:
# Test case
diseases = [('pneumonia', .75), ('tuberculosis', .15), ('jaundice', .10)]
calculate_recommendation(diseases).to_dict('records')

[{'Name': 'Joshua Minuto, MD',
  'Location': "['2020 Genesee Ave.']",
  'Specialty': "['Infectious disease']",
  'Picture': 'https://images.ctfassets.net/pxcfulgsd9e2/4vtfSBGmp3LQhKtvjw2JIC/d0f2094db56d33c832190f5e281156e5/Placeholder_image.png?f=face&fit=fill&fm=webp&h=2792&q=35&w=2792',
  'Phone Number': '[18586168091]',
  'Rating': -1.0,
  'Score': 0.15},
 {'Name': 'George Sakoulas, MD',
  'Location': "['2020 Genesee Ave.']",
  'Specialty': "['Infectious disease']",
  'Picture': 'https://images.ctfassets.net/pxcfulgsd9e2/4vtfSBGmp3LQhKtvjw2JIC/d0f2094db56d33c832190f5e281156e5/Placeholder_image.png?f=face&fit=fill&fm=webp&h=2792&q=35&w=2792',
  'Phone Number': '[18586168091]',
  'Rating': -1.0,
  'Score': 0.15},
 {'Name': 'Ariella Goldblatt, MD',
  'Location': "['2020 Genesee Ave.']",
  'Specialty': "['Infectious disease']",
  'Picture': 'https://images.ctfassets.net/pxcfulgsd9e2/5BzmJVevvbOVUTjjvmcTmj/faeedea2733d883655355e1c604c661b/goldblatt_arilella_94519_2022.jpg?f=face&fit=fill