## Data Synthesis - BRFSS Cleansing for Integration

In [9]:
import pandas as pd
import numpy as np
import random as rd

In [10]:
# Read in the AIHS Data set integrated with RAW BRFSS data set 
df_BRFSS = pd.read_csv("C:/Users/josep/documents/datasets/AIHS_SIM_V4_BRFSS_RAW.csv",nrows=10000)
df_BRFSS.shape

(10000, 427)

In [None]:
# Read in the AIHS Data set integrated with RAW BRFSS data set 
df_BRFSS = pd.read_csv("C:/Users/josep/documents/datasets/BRFSS_Redux.csv",nrows=31546)
df_BRFSS.shape

In [11]:
# Save the transformed file 
df_BRFSS.to_csv("C:/Users/josep/documents/datasets/AIHS_SIM_BRFSS_RAW_Reduced.csv") 

In [5]:
df_BRFSS.head()

Unnamed: 0,_STATE,FMONTH,IDATE,IMONTH,IDAY,IYEAR,DISPCODE,SEQNO,_PSU,CTELENUM,...,_PAREC1,_PASTAE1,_LMTACT1,_LMTWRK1,_LMTSCL1,_RFSEAT2,_RFSEAT3,_FLSHOT6,_PNEUMO2,_AIDTST3
0,1,1,b'01292015',b'01',b'29',b'2015',1200,2015000001,2015000001,1.0,...,4,2,1.0,1.0,1.0,1,1,,,1.0
1,1,1,b'01202015',b'01',b'20',b'2015',1100,2015000002,2015000002,1.0,...,2,2,3.0,3.0,4.0,2,2,,,2.0
2,1,1,b'02012015',b'02',b'01',b'2015',1200,2015000003,2015000003,1.0,...,9,9,9.0,9.0,9.0,9,9,9.0,9.0,
3,1,1,b'01142015',b'01',b'14',b'2015',1100,2015000004,2015000004,1.0,...,4,2,1.0,1.0,1.0,1,1,,,9.0
4,1,1,b'01142015',b'01',b'14',b'2015',1100,2015000005,2015000005,1.0,...,4,2,1.0,1.0,1.0,1,1,,,1.0


In [None]:
# Calculate the AIHS health risk stratification 
def calc_age_risk(age):
    if age < 40: 
        age_risk =  0.0
    elif age < 50: 
        age_risk =  1 + ((age-40) * .1)
    elif age  < 60: 
        age_risk =  2 + ((age-50) * .1)
    elif age  < 90: 
        age_risk =  3 + ((age-60) * .1)
    elif age  < 110:    # Age over 90 non actionable ????
        age_risk =  0      
    
    return age_risk   

In [None]:
kilo_grams = float(input('Enter weight in Kg to Convert into pounds:'))
pounds = kilo_grams * 2.2046
print(kilo_grams,' Kilograms =', pounds,' Pounds')

In [None]:
def calc_AIHS_age_risk(row):
    # The row is a single Series object which is a single row indexed by column values
    # Let's extract the firstname and create a new entry in the series
    if row['Age'] < 40: 
        row['AIHS Age Risk']=  0.0
    elif row['Age'] < 50: 
        row['AIHS Age Risk']=  1 + ((row['Age']-40) * .1)
    elif row['Age']  < 60: 
        row['AIHS Age Risk']=  2 + ((row['Age']-50) * .1)
    elif row['Age']  < 90: 
        row['AIHS Age Risk']=  3 + ((row['Age']-60) * .1)
    elif row['Age']  < 110:    # Age over 90 non actionable ????
        row['AIHS Age Risk']=  0      
   
    return row

In [None]:
def calc_height_in_feet(row):
    # Converts from centimeters to feet and inches 
    row['Simulated Height IN']=  round(row['Simulated Height CM']  * 0.0328,1) 
    return row

In [None]:
def calc_weight_in_pounds(row):
    # Converts from centimeters to feet and inches 
    row['Simulated Weight LBS']=   round(row['Simulated Weight KG']  * 2.2046,1)
    return row

In [None]:
def calc_body_mass_index(row):
    # Converts from centimeters to feet and inches 
    row['Simulated BMI']=  row['Simulated Weight KG']/(row['Simulated Height CM']**2)  
    return row

In [None]:
def calc_ADA_age_risk(row):
    # The row is a single Series object which is a single row indexed by column values
    # Let's extract the firstname and create a new entry in the series
    if row['Age'] < 40: 
        row['ADA Age Risk']= 0
    elif row['Age'] < 50: 
        row['ADA Age Risk']=  1  
    elif row['Age']  < 60: 
        row['ADA Age Risk']=  2  
    elif row['Age']  < 90: 
        row['ADA Age Risk']=  3  
    elif row['Age']  < 110:    # Age over 90 non actionable ????
        row['ADA Age Risk']=  0      
    return row

In [None]:
def calc_ADA_BMI_risk(row):
    # The row is a single Series object which is a single row indexed by column values
    # Let's extract the firstname and create a new entry in the series
    if row['Simulated BMI'] < 18.5: 
        row['ADA BMI Risk']= 0
    elif row['Simulated BMI'] < 25: 
        row['ADA BMI Risk']=  0  
    elif row['Simulated BMI']  < 30: 
        row['ADA BMI Risk']=  1  
    elif row['Simulated BMI']  < 40: 
        row['ADA BMI Risk']=  2  
    elif row['Simulated BMI']  < 60:    # Age over 90 non actionable ????
        row['ADA BMI Risk']=  3      
    return row

In [None]:
def calc_AIHS_BMI_risk(row):
    # The row is a single Series object which is a single row indexed by column values
    # Let's extract the firstname and create a new entry in the series
    if row['Simulated BMI'] < 18.5: 
        row['AIHS BMI Risk']= 0
    elif row['Simulated BMI'] < 25: 
        row['AIHS BMI Risk']=  0  
    elif row['Simulated BMI']  < 30: 
        row['AIHS BMI Risk']=  1 +  ((row['Simulated BMI']-26) * .1)
    elif row['Simulated BMI']  < 40: 
        row['AIHS BMI Risk']=  2 +  ((row['Simulated BMI']-31) * .1) 
    elif row['Simulated BMI']  < 60:    # Age over 90 non actionable ????
        row['AIHS BMI Risk']=  3 +  ((row['Simulated BMI']-41) * .1)      
    return row

## Read In the core demographic data set 

In [None]:
#df = pd.read_csv("C:/Users/josep/documents/datasets/dg.csv", nrows=5000)
df = pd.read_csv("C:/Users/josep/documents/datasets/dg_BMI_risk.csv")
df.head(5)

In [None]:
df.head()

In [None]:
cols = df.columns
print(cols)

In [None]:
df['Simulated Height CM'] = np.random.randint(155, 187, df.shape[0])
df['Simulated Weight KG'] = np.random.randint(60, 130, df.shape[0])
df['Simulated BMI']=  round(df['Simulated Weight KG']/((df['Simulated Height CM']/100)**2),1)  
df.head() 

In [None]:
height = float(input("Enter height in meters: "))

weight = float(input("Enter weight in kg: "))

bmi = weight/(height**2)

print("Your BMI is: {0} and you are: ".format(bmi), end='')

if ( bmi < 16):
    print("severely underweight")

elif ( bmi >= 16 and bmi < 18.5):
    print("underweight")

elif ( bmi >= 18.5 and bmi < 25):
    print("Healthy")

elif ( bmi >= 25 and bmi < 30):
    print("overweight")

elif ( bmi >=30):
    print("severely overweight")

In [None]:
df.to_csv("C:/Users/josep/documents/datasets/dg_BMI_risk_III.csv")

In [None]:
df_BRFSS = pd.read_csv("C:/Data/brfss cdc/BRFSS_2015.csv",nrows=5000)
df_BRFSS.shape

In [None]:
df_BRFSS["Patient Number"]= np.arange(len(df_BRFSS)) + 1

In [None]:
df_BRFSS["Patient ID"]= "Patient_Number_" + df_BRFSS["Patient Number"].astype(str)

In [None]:
df_BRFSS.head()

In [None]:
df["Age Decade"]=df['Age'] / 10 
df["Patient Number"]= np.arange(len(df)) + 1 
cols = df.columns
df.head()

In [None]:
df["Age Dec"]=df['Age Decade'].astype(int) 
df["Patient ID"]= "Patient_Number_" + df["Patient Number"].astype(str)
cols = df.columns
df.head()

In [None]:
dfm = df.merge(df_BRFSS, how='left', on='Patient ID')

In [None]:
dfm.shape

In [None]:
#take input from user
cm=int(input("Enter the height in centimeters:"))
 
#convert centimeter to feet
feet=0.0328*cm
 
#print result
print("The length in feet",round(feet,2))


In [None]:
dfm.head()

In [None]:
df.head()

In [None]:
dfm.to_csv("C:/Users/josep/documents/datasets/patient_demo_behavior.csv")

In [None]:
df["Age Dec"]=df['Age Decade'].astype(int) 
df["Patient ID"]= "Patient_Number_" + df["Patient Number"].astype(str)
cols = df.columns
df.head()

In [None]:
#df=df.apply(calc_ADS_age_risk, axis='columns')
df=df.apply(calc_height_in_feet, axis='columns')
df.head()

In [None]:
#df=df.apply(calc_ADS_age_risk, axis='columns')
df=df.apply(calc_weight_in_pounds, axis='columns')
df.head()

In [None]:
#cols = cols + 'Computed AIHS Age Risk'
df = df[cols]
df.head(10)



In [None]:
#df=df.apply(calc_ADS_age_risk, axis='columns')
df=df.apply(calc_AIHS_age_risk, axis='columns')
df.head()


In [None]:
#df=df.apply(calc_ADS_age_risk, axis='columns')
df=df.apply(calc_ADA_BMI_risk, axis='columns')
df.head()

In [None]:
#df=df.apply(calc_ADS_age_risk, axis='columns')
df=df.apply(calc_AIHS_BMI_risk, axis='columns')
pd.options.display.max_columns = None
df.head(5)