<h1>Population generation</h1>

In [1]:
#necessary imports
import numpy as np
import pandas as pd
import scipy.stats
from numpy import random
from scipy.stats import truncnorm

In [7]:
#możliwe generowane pola. Generacja opiera się o przecięcie tego zbioru = Ω ze zbiorem generowanym = A ⊂ Ω
populationFields = ["age", "gender", "race", "BMI", "physicalActivity", "otherIllnesses"]
otherIllnesses = ["hypertension","asthma","cancer","diabetes","depression","angina","myocardialInfraction","irritableBowelSyndrome","stroke","migraine"]
ser = pd.Series(otherIllnesses)
comorbid = pd.DataFrame({"otherIllnesses" : ser})
comorbid

Unnamed: 0,otherIllnesses
0,hypertension
1,asthma
2,cancer
3,diabetes
4,depression
5,angina
6,myocardialInfraction
7,irritableBowelSyndrome
8,stroke
9,migraine


In [37]:
#każda funkcja zwraca ilość == amount danych dla całej populacji
def normal(amount, spec):
    return np.random.normal(spec[0], spec[1], amount)
def choice(amount, spec):
    return np.random.choice(spec[0], amount, True, spec[1])
def minmax (amount, spec):
    X = truncnorm((spec[0] - spec[2]) / spec[3], (spec[1] - spec[2]) / spec[3], loc=spec[2], scale=spec[3])
    return X.rvs(amount)

def generateComorbidities(types, vals, amount) :
    ret = pd.DataFrame({"otherIllnesses" : np.zeros(amount)})
    auxpd = pd.DataFrame()
    for typpe, weight,  in zip(types, vals) :
        genrow = choice(amount, ((typpe, None), (weight, 1-weight)))
        auxpd[typpe] = pd.Series(genrow)
    ret["otherIllnesses"] = auxpd[auxpd.columns[0:]].apply(lambda x: ','.join(x.dropna()),axis=1)
    
    return ret

#example & check
generateComorbidities(["hypertension", "asthma", "cancer", "diabetes"],[0.78, 0.12, 0.01, 0.09], 10)

Unnamed: 0,otherIllnesses
0,hypertension
1,hypertension
2,hypertension
3,"hypertension,asthma"
4,hypertension
5,"hypertension,asthma"
6,"hypertension,diabetes"
7,hypertension
8,hypertension
9,hypertension


In [15]:
#przykładowe generowanie populacji z niektórymi cechami

testspec = ["age", "gender", "race", "BMI", "physicalActivity"]
testweights = [[56, 12.5],#age[mean, standard_deviation] - normal dist.
            [["f", "m"],[0.52, 0.48]],#gender[vals, weights] - weighted single choice
            [["white","black","other"],[0.66, 0.22, 0.12]],#race[vals, weights] - weighted single choice
              [12.1212, 44.8364, 27.3397, 4.77216 ], #BMI [min_val, max_val, mean, std]] -beta
              [0, 510, 127.095, 101.904]] #activityMinutes [min_val, max_val, mean, std]] -beta
            #otherIllnesses będziemy modelować z wykorzystaniem metody weighted single choice wybierając
            #ułamki wg występowania choroby w populacji
#raczej przekazanie funkcji zamiast stringów
testdistrmethods = [normal, choice, choice, minmax, minmax]
#testdistrmethods = [normal, choice, choice, normal]
testdata = [testspec, testweights, testdistrmethods]


In [16]:
class Generator :
    #metadata section
    author = ""
    title = ""
    link = ""
    population = 0
    positive_population = 0
    multimorbid = 0
    
    #generation spec values
    data = []
    
    def __init__(self, data, metadata) :
        #save metadata
        if len(metadata) == 3 :
            author, title, link = metadata
        if len(metadata) == 4 :
            author, title, link, population = metadata
        if len(metadata) == 5 :
            author, title, link, population, positive_population = metadata    
        if len(metadata) == 6 :
            author, title, link, population, positive_population, multimorbid = metadata
        
        self.data = data
        return None
    
    def populate(amount, speclist) :
        #przecięcie zboiru generowanych cech i wszystkich możliwych
        columns = set(speclist[0]).intersection(populationFields)

        df = pd.DataFrame()
        for index,vals,method in zip(speclist[0],speclist[1],speclist[2]) :
            df[index] = method(amount, vals)

        return df
    
    def getSpecs(self) :
        return (author, title, link, population, positive_population, multimorbid)
    


In [28]:
tup = ("hypertension","asthma","cancer","diabetes","depression","angina","myocardialInfraction","irritableBowelSyndrome","stroke","migraine")
pd.DataFrame({"a" : tup, "b" : 2})

Unnamed: 0,a,b
0,hypertension,2
1,asthma,2
2,cancer,2
3,diabetes,2
4,depression,2
5,angina,2
6,myocardialInfraction,2
7,irritableBowelSyndrome,2
8,stroke,2
9,migraine,2


In [110]:
def generateComorbidities(types, vals, amount) :
    ret = pd.DataFrame({"otherIllnesses" : np.arange(amount)})
    ret.replace(pd.Series(np.zeros(amount)), inplace=True)
    #replace tutaj działa
    return ret
generateComorbidities(["hypertension", "asthma", "cancer", "diabetes"],[0.78, 0.12, 0.01, 0.09], 10)

Unnamed: 0,otherIllnesses
0,0.0
1,0.0
2,0.0
3,0.0
4,0.0
5,0.0
6,0.0
7,0.0
8,0.0
9,0.0
