In [1]:
import random
import numpy as np
from numpy.random import choice
from pandas import Series, DataFrame
import pandas as pd
import string

In [2]:
# Disease module
# https://acpinternist.org/archives/2011/10/CVD.htm
resp_diseases = ['emphysema', 'asthma','bronchitis'] # chronic only
acute_resp = ['influenza','sinusitis','rhinitis']
circ_diseases = [
    'diabetes','coronary artery disease','cerebrovascular disease','peripheral arterial disease (PAD)',
    'congestive heart failure','chronic kidney disease','chronic kidney disease','dyslipidemia','hypertension',
    'angina pectoris'
]
# https://www.newenglandnightingales.com/home-care-in-canton-ct-5-causes-mobility-impairment-seniors/
amb_diseases = ['arthritis','fibromyalgia',"Parkinson's Disease",'osteoporosis','hip/knee-replacement (recovery)']
cogImp_diseases = ["Alzheimer's Disease",'dementia','macular degeneration']

# Cancer risk
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html
# https://stackoverflow.com/questions/19103624/load-csv-to-pandas-multiindex-dataframe
# https://www.google.com/search?q=cancer+incidence+rate+by+type+age+gender&rlz=1C1CHBF_enUS803US805&oq=cancer+incidence+rate+by+type+age+gender&aqs=chrome..69i57.13839j0j8&sourceid=chrome&ie=UTF-8
filepath = 'CancerData.csv'
dfC = pd.read_csv(filepath, sep=',',index_col=[0,1])
dfC2=dfC.reorder_levels(('Gender','Affected area'), axis=0)
dfC_male = dfC2.loc['Male'].drop('All sites',axis=0).drop('Breast',axis=0).drop('Uterine cervix',axis=0).drop('Uterine corpus',axis=0)
dfC_female = dfC2.loc['Female'].drop('All sites',axis=0).drop('Prostate',axis=0)

In [3]:
class Patient(object):
    def __init__(self,name,gender):
        # basic personal data
        self.name = name
        self.gender = gender.lower()
        age = random.randint(65,85)
        self.age = age
        
        # respiratory diseases
        patient_resp = random.choice(resp_diseases)
        self.resp_disease = patient_resp

        # circulatory diseases
        k_circ = random.randint(1, 4)
        patient_circ = list(random.sample(circ_diseases, k_circ))
        self.circ_disease = patient_circ

        # ambulatory diseases
        k_amb = random.randint(1, 2)
        patient_amb = list(random.sample(amb_diseases, k_amb))
        self.amb_disease = patient_amb

        # cognitive impairment diseases
        k_cogImp = random.randint(0, 2)
        patient_cogImp = list(random.sample(cogImp_diseases, k_cogImp))
        self.cogImp_disease = patient_cogImp
        
        # https://stackoverflow.com/questions/3679694/a-weighted-version-of-random-choice
        #smoker y/n? https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3771341/ 8.3%
        patient_smokesYN = choice(['smoker','non-smoker'], 1, p=[0.083,0.917])
        self.smokerYN = patient_smokesYN
        
        #drinker y/n? https://www.niaaa.nih.gov/alcohol-health/special-populations-co-occurring-disorders/older-adults 40%
        patient_drinksYN = choice(['drinker','non-drinker'], 1, p=[0.4,0.6])
        self.drinkerYN = patient_drinksYN
        
        # Cancer risk
        # https://surveillance.cancer.gov/statistics/types/prevalence.html
        # https://www.cdc.gov/cancer/uscs/
        # https://www.cancer.org/content/dam/cancer-org/research/cancer-facts-and-statistics/annual-cancer-facts-and-figures/2017/cancer-facts-and-figures-2017.pdf
        
        # step 1 -  find age bracket
        if self.age <= 49:
            age_bucket = '0 - 49'
        elif 49 < self.age <= 59:
            age_bucket = '50 - 59'
        elif 59 < self.age <= 69:
            age_bucket = '60 - 69'
        else:
            age_bucket = '70 - 99'
        self.age_bucket = age_bucket

        #step 2 - select gender set
        if self.gender == 'male':
            stats = dict(dfC_male[age_bucket])
        else:
            stats = dict(dfC_female[age_bucket])
        #step 3 - test for cancer
        patient_stat = {}
        keys = list(stats.keys())
        values = []
        for key in keys:
            v = choice(['yes','no'], 1, p=[stats[key],(1-stats[key])])
            values.append(v[0])
        patient_stat = dict(zip(keys,values))
        if 'yes' in values:
            self.cancer = patient_stat.keys()[patient_stat.values().index('yes')]
        else:
            self.cancer = 'None'
    
    def JSONify_stats(self):
        pass
        
    def print_stats(self):
        print('%s is a %s, aged %s, with these respiratory diseases:%s' %(self.name,self.gender,self.age,self.resp_disease))
        print('%s also suffers from %s, %s and %s' %(self.name,self.circ_disease,self.amb_disease,self.cogImp_disease))
        print('%s has also been diagnosed with the following cancer: %s' %(self.name,self.cancer))

In [4]:
p1 = Patient('Dave','male')
p1.print_stats()

Dave is a male, aged 68, with these respiratory diseases:bronchitis
Dave also suffers from ['congestive heart failure'], ["Parkinson's Disease", 'hip/knee-replacement (recovery)'] and []
Dave has also been diagnosed with the following cancer: None


In [5]:
p2 = Patient('Karen','female')
p2.print_stats()

Karen is a female, aged 85, with these respiratory diseases:bronchitis
Karen also suffers from ['hypertension', 'chronic kidney disease', 'chronic kidney disease'], ["Parkinson's Disease"] and []
Karen has also been diagnosed with the following cancer: None
