In [None]:
import pandas as pd
import plotly.express as px

In [None]:
# some settings just for display
pd.set_option('display.max_columns', None)

In [None]:
# this loads without data labels.  If you want to keep the labels, change 
# convert_categoricals to True
df = pd.read_stata('in/acs_pers_test.dta', convert_categoricals=False)
df.head()

In [None]:
# examples of how to calculate additional fields

# calculate a binary flag where adult is 0 or 1 based on age
df['adult'] = (df['age'] >= 18).astype(int)
df['child'] = (df['age'] < 18).astype(int)

# TODO - come back and deal with missing values or other odd codes

In [None]:
# for more complicated calculations, define a function, then apply it

def is_male_10_17(row):
    if row['sex'] == 1 and 10 <= row['age'] <= 17:  # Adjust sex code as needed
        return 1
    else:
        return 0

# Apply it to the dataframe
df['male_10_17'] = df.apply(is_male_10_17, axis=1)

In [None]:
# example of how to calculate household-level attributes and cast them to 
# the person level

df['adults_in_hh'] = df.groupby('cbserial')['adult'].transform('sum')
df['kids_in_hh'] = df.groupby('cbserial')['child'].transform('sum')

In [None]:
# example of aggregating to household level
hh_df = df.groupby('cbserial').agg(
    hhsize=('cbserial', 'count'),    # Count records, name it hhsize
    num_adults=('adult', 'sum'),
    num_kids=('child', 'sum'), 
    rooms=('rooms', 'first'), 
    hhincome=('hhincome', 'first')
).reset_index()

In [None]:
# look at the first few rows
hh_df.head()

In [None]:
# save as a csv file
hh_df.to_csv('out/acs_hh.csv')

In [None]:
# Make a simple heatmap
fig = px.density_heatmap(hh_df, x='hhincome', y='rooms')
fig.show()