# Median Household Income Calculator by Building Age and Density Level

The purpose of this program is to reveal the income level of families and people based on the level of density they live in. 
The program will show variations of median household incomes for select jurisidictions, with the X-axis displaying what year their apartment or house was built in and a Y-Axis displaying what type of building they reside in: single-family, 50 unit apartment etc. This data is collected annually by the Census Bureau as part of the American Community Survey dataset. The latest data is from 2023 and this program will source the 5-year dataset covering years 2019, 2020, 2021, 2022 and 2023. Explanations of how this program works is provided through green text notes and paragraphs. 

In [134]:
import pandas as pd
import numpy as np
import wquantiles
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)


In [None]:
# Summon Households in the Berkeley-Albany PUMA zone (the smallest geography for Berkeley)
ca_households = pd.read_csv("ACS_2023/csv_hca/psam_h06.csv")
#PUMAs_of_interest = [111,112,113,123] # Oakland codes
PUMAs_of_interest = [101] # Berkeley, Albany code : 06 - 00101
# You can change the area based on the PUMA codes: https://www2.census.gov/geo/pdfs/reference/puma2020/2020_PUMA_Names.pdf
households_set = ca_households[ca_households['PUMA'].isin(PUMAs_of_interest)]

In [136]:
# SERIALNO : Unique ID for Census ACS respondants, in this case, households. This is just a viewing sample of the large dataset
households_set.head()

Unnamed: 0,RT,SERIALNO,DIVISION,PUMA,REGION,STATE,ADJHSG,ADJINC,WGTP,NP,TYPEHUGQ,ACCESSINET,ACR,AGS,BATH,BDSP,BLD,BROADBND,COMPOTHX,CONP,DIALUP,ELEFP,ELEP,FS,FULFP,FULP,GASFP,GASP,HFL,HISPEED,HOTWAT,INSP,LAPTOP,MHP,MRGI,MRGP,MRGT,MRGX,OTHSVCEX,REFR,RMSP,RNTM,RNTP,RWAT,RWATPR,SATELLITE,SINK,SMARTPHONE,SMP,STOV,TABLET,TEL,TEN,VACS,VALP,VEH,WATFP,WATP,YRBLT,CPLT,FINCP,FPARC,GRNTP,GRPIP,HHL,HHLANP,HHLDRAGEP,HHLDRHISP,HHLDRRAC1P,HHT,HHT2,HINCP,HUGCL,HUPAC,HUPAOC,HUPARC,KIT,LNGI,MULTG,MV,NOC,NPF,NPP,NR,NRC,OCPIP,PARTNER,PLM,PLMPRP,PSF,R18,R60,R65,RESMODE,SMOCP,SMX,SRNT,SVAL,TAXAMT,WIF,WKEXREL,WORKSTAT,FACCESSP,FACRP,FAGSP,FBATHP,FBDSP,FBLDP,FBROADBNDP,FCOMPOTHXP,FCONP,FDIALUPP,FELEP,FFINCP,FFSP,FFULP,FGASP,FGRNTP,FHFLP,FHINCP,FHISPEEDP,FHOTWATP,FINSP,FKITP,FLAPTOPP,FMHP,FMRGIP,FMRGP,FMRGTP,FMRGXP,FMVP,FOTHSVCEXP,FPLMP,FPLMPRP,FREFRP,FRMSP,FRNTMP,FRNTP,FRWATP,FRWATPRP,FSATELLITEP,FSINKP,FSMARTPHONP,FSMOCP,FSMP,FSMXHP,FSMXSP,FSTOVP,FTABLETP,FTAXP,FTELP,FTENP,FVACSP,FVALP,FVEHP,FWATP,FYRBLTP,WGTP1,WGTP2,WGTP3,WGTP4,WGTP5,WGTP6,WGTP7,WGTP8,WGTP9,WGTP10,WGTP11,WGTP12,WGTP13,WGTP14,WGTP15,WGTP16,WGTP17,WGTP18,WGTP19,WGTP20,WGTP21,WGTP22,WGTP23,WGTP24,WGTP25,WGTP26,WGTP27,WGTP28,WGTP29,WGTP30,WGTP31,WGTP32,WGTP33,WGTP34,WGTP35,WGTP36,WGTP37,WGTP38,WGTP39,WGTP40,WGTP41,WGTP42,WGTP43,WGTP44,WGTP45,WGTP46,WGTP47,WGTP48,WGTP49,WGTP50,WGTP51,WGTP52,WGTP53,WGTP54,WGTP55,WGTP56,WGTP57,WGTP58,WGTP59,WGTP60,WGTP61,WGTP62,WGTP63,WGTP64,WGTP65,WGTP66,WGTP67,WGTP68,WGTP69,WGTP70,WGTP71,WGTP72,WGTP73,WGTP74,WGTP75,WGTP76,WGTP77,WGTP78,WGTP79,WGTP80
20,H,2019GQ0000225,9,101,4,6,1195583,1207712,0,1,3,,,,,,,,,,,,,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
35,H,2019GQ0000394,9,101,4,6,1195583,1207712,0,1,3,,,,,,,,,,,,,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
105,H,2019GQ0001190,9,101,4,6,1195583,1207712,0,1,3,,,,,,,,,,,,,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
197,H,2019GQ0002123,9,101,4,6,1195583,1207712,0,1,3,,,,,,,,,,,,,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
237,H,2019GQ0002612,9,101,4,6,1195583,1207712,0,1,3,,,,,,,,,,,,,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [137]:
# HINCP = Household income (past 12 months, use ADJINC to adjust HINCP to constant dollars)
# ADJINC = Adjustment factor for income and earnings dollar amounts (6 implied decimal places)
# Divide ADJINC by 1,000,000 to obtain the inflation adjustment factor and multiply it to the PUMS variable value to adjust it to 2023 dollars
households_set['Adjusted_Income'] = (households_set['HINCP'].astype(float) * households_set['ADJINC'].astype(float)) / 1000000

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  households_set['Adjusted_Income'] = (households_set['HINCP'].astype(float) * households_set['ADJINC'].astype(float)) / 1000000


# Filtering the Dataset

With all the Census respondents prepared in household_set, we'll now filter based on their building age and density type. Building Ages and Density Levels are assigned values numerical values by the Census, and a detailed breakdown of them is located here : https://www2.census.gov/programs-surveys/acs/tech_docs/pums/data_dict/PUMS_Data_Dictionary_2023.pdf

For example, code: "BLD" contains whether a Census samplier lives in a Mobile Home (Code 1) or a Boathouse (Code 10). The same applies for Building Ages under code YRBLT. I will filter and store these Census respondents of their economic data to calculate the median incomes of people who live in single-family homes, duplexes, apartments etc. and by what year they were built.

In [138]:
# Making a column called building type (bldtype) where the home of every household / unit is corresponded with the following strings
conditions = [
    households_set['BLD'] == 2,
    households_set['BLD'] == 3,
    households_set['BLD'].isin([4, 5]),
    households_set['BLD'].isin([6, 7]),
    households_set['BLD'] == 8,
    households_set['BLD'] == 9,
    households_set['BLD'] == 1,
    households_set['BLD'] == 10
]

choices_type = [
    "Single-Family Home",
    "Accessory Dwelling / Rowhouse",
    "Two, Three or Fourplex",
    "5-19 unit bldg",
    "20-49 unit bldg",
    "50+ unit bldg",
    "Mobile home or trailer",
    "Boat/ RV/ Van"
]
households_set["bldtype"] = np.select(conditions, choices_type, default="unknown")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  households_set["bldtype"] = np.select(conditions, choices_type, default="unknown")


In [139]:
# Same thing with building ages; classify them with corresponding values
conditions = [
    households_set['YRBLT'] <= 1950,
    (households_set['YRBLT'] > 1950) & (households_set['YRBLT'] <= 1970),
    (households_set['YRBLT'] > 1970) & (households_set['YRBLT'] <= 1990),
    (households_set['YRBLT'] > 1990) & (households_set['YRBLT'] <= 2009),
    households_set['YRBLT'] > 2009
]
choices_age = [
    "Pre-1950",
    "1951 - 1970",
    "1971 - 1990",
    "1991 - 2009",
    "2010 - 2023"
]
households_set["bldage"] = np.select(conditions, choices_age, default="unknown")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  households_set["bldage"] = np.select(conditions, choices_age, default="unknown")


# Explaining the data:

Note that although these are the values below are per row or ACS response, the numbers below are not the proportion of the population. Each row and respondent to the Census questionares is assigned a weight which determines how many households this respondent is represenative of. This is how the Census estimates housing and population during non-decinneal years by sending questionaires to a sample and assigning them a weight It is not practical to send every citizen every year a very detailed Census survey, but they can be sampled in ways that can offer estimations of the broader public.

# Example:

Suppose that the Census surveys and gets responses back from 1000 single-family residents (SFR) and 100 apartment residents (AR). But the Census knows the 50% of the city is single-family and 50% apartments. The weight for SFH owners is 0.5 (50%) / 1,000 = 0.0005 and the weight for Apt. residents is 0.5 / 100 = 0.005. When we calculate the median household incomes, the individual apartment household response will be assigned as a greater represenative value or "weight" for apartment households overall than the individual SFH owner response will be for SFH owners overall. Calculating medians without weights supposes all numbers (or responses in this case) are equal. The median of incomes in this program will be weighted medians to account for the discrepency in responses

In [140]:
# Number of ACS household respondants living in the array of building density types in Berkeley-Albany

households_set["bldtype"].value_counts()

bldtype
Single-Family Home               1844
unknown                          1202
5-19 unit bldg                    592
Two, Three or Fourplex            581
50+ unit bldg                     281
20-49 unit bldg                   246
Accessory Dwelling / Rowhouse     150
Mobile home or trailer              3
Boat/ RV/ Van                       2
Name: count, dtype: int64

Note that the Census agency could not identify the age of building or density level of approximately 1,202 respondents for Berkeley - Albany. They will not be included in the median census calculations. 

In [141]:
# Number of ACS household respondants living in the array of homes by the year their homes/apartments were built, in Berkeley-Albany

households_set["bldage"].value_counts()

bldage
Pre-1950       2451
unknown        1202
1951 - 1970     589
1971 - 1990     322
2010 - 2023     175
1991 - 2009     162
Name: count, dtype: int64

In [142]:
# Results will contain the final product: Median Household Income by Density and Home Age
results = pd.DataFrame(index=choices_type, columns=choices_age)
# This will show the numbered of (weighted) samples of the above figure
results_nrow = pd.DataFrame(index=choices_type, columns=choices_age)
                            
for unit in choices_type:
    for age in choices_age:
        # Filter out units by age and density level, but only use household incomes that are known
        iter_set = households_set[
            (households_set['bldtype'] == unit) &
            (households_set['bldage'] == age) &
            (~households_set['Adjusted_Income'].isna() & 
            households_set['Adjusted_Income']> 0)
        ]
        # Sum the housing unit weights for this type of household (the weights estimate the represenative proportion of the sample)
        # If weight is under 1, data is missing likely for this cohort for privacy reasons or low sampling. Disregard this response
        if iter_set["WGTP"].sum(skipna=True) < 1:
            results.loc[unit, age] = -1
        # Else, calculate the Household's Income using a represenative weight of the sample response
        else:
            results.loc[unit, age]  = wquantiles.median(iter_set["Adjusted_Income"], iter_set["WGTP"])
        # Sum the weights to see how represenative each housing type/age is
        results_nrow.loc[unit, age] = iter_set["WGTP"].sum(skipna=True)


# Median Household Income of Berkeley & Albany residents by the density of their buildings and the year their buildings were built

Households with $0 income are omitted. Likely families leasing their extra units out to relatives.

In [143]:
# The Results (excludes households reporting $0 income)
# Format the float columns as dollars
results_styled = results.style.format({
    'Pre-1950':  lambda x: '${:,.2f}'.format(x) if x != -1 else 'NA', 
    '1951 - 1970': lambda x: '${:,.2f}'.format(x) if x != -1 else 'NA',  
    '1971 - 1990': lambda x: '${:,.2f}'.format(x) if x  != -1 else 'NA',   
    '1991 - 2009':  lambda x: '${:,.2f}'.format(x) if x != -1 else 'NA', 
    '2010 - 2023':  lambda x: '${:,.2f}'.format(x) if x  != -1 else 'NA',   
})
results_styled



Unnamed: 0,Pre-1950,1951 - 1970,1971 - 1990,1991 - 2009,2010 - 2023
Single-Family Home,"$196,706.28","$151,357.76","$205,034.59","$174,443.00","$254,878.55"
Accessory Dwelling / Rowhouse,"$144,202.61","$147,720.95","$159,825.59","$93,546.08","$182,798.13"
"Two, Three or Fourplex","$116,179.00","$66,420.65","$66,760.03","$73,265.78","$52,064.35"
5-19 unit bldg,"$50,195.03","$76,506.21","$47,877.20","$65,316.77","$61,886.25"
20-49 unit bldg,"$48,215.21","$53,284.34","$67,659.57","$53,524.45","$50,793.79"
50+ unit bldg,"$64,651.62","$62,794.35","$63,234.10","$49,086.43","$69,471.38"
Mobile home or trailer,,,"$13,770.82",,"$20,390.36"
Boat/ RV/ Van,,,"$19,707.80",,


# What the Above Data Says:

The X-axis is the year a Census respondent's home was built and the Y-Axis is the type of building they live in. The median income of households who live in Single-Family Homes built before 1950 is 196,706.28 dollars. The median income of households who live in apartments built in the last 13 years, with 50 units in them or more is 69,471.38 dollars. 

In [144]:
# Weighted Results Sample Size
# This is the Numerical Representation of how many households the income data is informed by
results_nrow.head(len(results_nrow))


Unnamed: 0,Pre-1950,1951 - 1970,1971 - 1990,1991 - 2009,2010 - 2023
Single-Family Home,19974,1657,1167,263,438
Accessory Dwelling / Rowhouse,1197,227,425,80,49
"Two, Three or Fourplex",6229,1846,683,307,180
5-19 unit bldg,3926,3685,1404,542,197
20-49 unit bldg,1001,1512,516,691,438
50+ unit bldg,622,1177,846,474,1422
Mobile home or trailer,0,0,10,0,11
Boat/ RV/ Van,0,0,18,0,0


# Sample Size 

The greater the size of the value, the more useful the data is. Although all the data should be shown for clarity, a weighted value of say 18 RV dwellers may not be an accurate picture. The higher the sample size and weighted value in proportion to the city's population, the more reliable the data. It doesnt mean the median income data is false either just because the sample size is low; it just means you take it with a bigger grain of salt than the higher samples. Some samples are inherently low due to the low proportion of Census respondents sampled in say, an A.D.U. built in the last 13 years. There just not that many of them compared to the thousands in older apartments and single-family homes built before 1950. 

Now that this is explained I will re-run the calculations with various other scenarios. 

# Median Incomes By Building Age and Density Level but with $0 income households included

Only drags the median incomes downwards slightly. Mostly families renting out houses to their kid or in-law that is unemployed or retired w/o income. This is why I exclude it.

In [145]:
# Results will contain the final product: Median Household Income by Density and Age
results = pd.DataFrame(index=choices_type, columns=choices_age)
# This will show the numbered of (weighted) samples of the above figure
results_nrow = pd.DataFrame(index=choices_type, columns=choices_age)
                            
for unit in choices_type:
    for age in choices_age:
        # Filter out units by age and density level, but only use household incomes that are known
        iter_set = households_set[
            (households_set['bldtype'] == unit) &
            (households_set['bldage'] == age) &
            (~households_set['Adjusted_Income'].isna() )
        ]
        # Sum the housing unit weights for this type of household (the weights estimate the represenative proportion of the sample)
        # If weight is under 1, data is missing likely for this cohort for privacy reasons or low sampling. Disregard this response
        if iter_set["WGTP"].sum(skipna=True) < 1:
            results.loc[unit, age] = -1
        # Else, calculate the Household's Income using a represenative weight of the sample response
        else:
            results.loc[unit, age]  = wquantiles.median(iter_set["Adjusted_Income"], iter_set["WGTP"])
        # Sum the weights to see how represenative each housing type/age is
        results_nrow.loc[unit, age] = iter_set["WGTP"].sum(skipna=True)

In [146]:
# The Results (includes households reporting $0 income)
# Format the float columns as dollars
results_styled = results.style.format({
    'Pre-1950':  lambda x: '${:,.2f}'.format(x) if x != -1 else 'NA', 
    '1951 - 1970': lambda x: '${:,.2f}'.format(x) if x != -1 else 'NA',  
    '1971 - 1990': lambda x: '${:,.2f}'.format(x) if x  != -1 else 'NA',   
    '1991 - 2009':  lambda x: '${:,.2f}'.format(x) if x != -1 else 'NA', 
    '2010 - 2023':  lambda x: '${:,.2f}'.format(x) if x  != -1 else 'NA',   
})
results_styled


Unnamed: 0,Pre-1950,1951 - 1970,1971 - 1990,1991 - 2009,2010 - 2023
Single-Family Home,"$193,848.59","$150,040.43","$204,621.34","$174,443.00","$254,878.55"
Accessory Dwelling / Rowhouse,"$144,202.61","$147,720.95","$159,825.59","$93,546.08","$182,798.13"
"Two, Three or Fourplex","$114,987.97","$65,651.19","$66,760.03","$73,265.78","$52,064.35"
5-19 unit bldg,"$49,974.33","$76,149.78","$47,125.86","$61,182.31","$58,553.71"
20-49 unit bldg,"$42,978.20","$49,335.07","$56,765.09","$50,638.19","$45,850.48"
50+ unit bldg,"$64,611.81","$57,697.07","$58,858.51","$39,903.71","$60,326.43"
Mobile home or trailer,$0.00,,"$13,770.82",,"$20,390.36"
Boat/ RV/ Van,,,"$19,707.80",,


In [147]:
# Weighted Results Sample Size
# This is the Numerical Representation of how many households the income data is informed by
results_nrow.head(len(results_nrow))

Unnamed: 0,Pre-1950,1951 - 1970,1971 - 1990,1991 - 2009,2010 - 2023
Single-Family Home,20198,1667,1186,263,438
Accessory Dwelling / Rowhouse,1197,227,425,80,49
"Two, Three or Fourplex",6271,1859,683,307,180
5-19 unit bldg,4003,3713,1432,579,207
20-49 unit bldg,1123,1576,653,726,485
50+ unit bldg,632,1280,878,507,1561
Mobile home or trailer,11,0,10,0,11
Boat/ RV/ Van,0,0,18,0,0


# Median Household Income by Building Density in Berkeley & Albany for All Building Ages

In [148]:
# Building Age 
conditions = [
    ~households_set['YRBLT'].isna()
]
choices_age = [
    "Median Income for All Households"
]
households_set["bldage"] = np.select(conditions, choices_age, default="unknown")
# Building Density 
conditions = [
    households_set['BLD'] == 2,
    households_set['BLD'] == 3,
    households_set['BLD'].isin([4, 5]),
    households_set['BLD'].isin([6, 7]),
    households_set['BLD'] == 8,
    households_set['BLD'] == 9,
    households_set['BLD'] == 1,
    households_set['BLD'] == 10
]

choices_type = [
    "Single-Family Home",
    "Accessory Dwelling / Rowhouse",
    "Two, Three or Fourplex",
    "5-19 unit bldg",
    "20-49 unit bldg",
    "50+ unit bldg",
    "Mobile home or trailer",
    "Boat/ RV/ Van"
]
households_set["bldtype"] = np.select(conditions, choices_type, default="unknown")

# Results will contain the final product: Median Household Income by Density and Age
results = pd.DataFrame(index=choices_type, columns=choices_age)
# This will show the numbered of (weighted) samples of the above figure
results_nrow = pd.DataFrame(index=choices_type, columns=choices_age)
                            
for unit in choices_type:
    for age in choices_age:
        # Filter out units by age and density level, but only use household incomes that are known
        iter_set = households_set[
            (households_set['bldtype'] == unit) &
            (~households_set['Adjusted_Income'].isna() )
        ]
        # Sum the housing unit weights for this type of household (the weights estimate the represenative proportion of the sample)
        # If weight is under 1, data is missing likely for this cohort for privacy reasons or low sampling. Disregard this response
        if iter_set["WGTP"].sum(skipna=True) < 1:
            results.loc[unit, age] = -1
        # Else, calculate the Household's Income using a represenative weight of the sample response
        else:
            results.loc[unit, age]  = wquantiles.median(iter_set["Adjusted_Income"], iter_set["WGTP"])
        # Sum the weights to see how represenative each housing type/age is
        results_nrow.loc[unit, age] = iter_set["WGTP"].sum(skipna=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  households_set["bldage"] = np.select(conditions, choices_age, default="unknown")
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  households_set["bldtype"] = np.select(conditions, choices_type, default="unknown")


In [149]:
# The Results (excludes households reporting $0 income)
# Format the float columns as dollars
results_styled = results.style.format({
    'Median Income for All Households':  lambda x: '${:,.2f}'.format(x) if x != -1 else 'NA', 
})
results_styled

Unnamed: 0,Median Income for All Households
Single-Family Home,"$193,233.92"
Accessory Dwelling / Rowhouse,"$144,942.91"
"Two, Three or Fourplex","$97,139.16"
5-19 unit bldg,"$59,689.60"
20-49 unit bldg,"$45,016.58"
50+ unit bldg,"$60,092.47"
Mobile home or trailer,"$13,770.82"
Boat/ RV/ Van,"$19,707.80"


In [150]:
# Weighted Results Sample Size
# This is the Numerical Representation of how many households the income data is informed by
results_nrow.head(len(results_nrow))

Unnamed: 0,Median Income for All Households
Single-Family Home,23752
Accessory Dwelling / Rowhouse,1978
"Two, Three or Fourplex",9300
5-19 unit bldg,9934
20-49 unit bldg,4563
50+ unit bldg,4858
Mobile home or trailer,32
Boat/ RV/ Van,18


# Comparing housing units built before and after Costa-Hawkins (1995).

After 1995, rental housing units built in Berkeley could not have rent control applied to them. This attempts to compare rent controlled and non-rent controlled housing, although nearly all single-family homes are owner-occupieds. 

In [151]:
# Same thing with building ages; classify them with corresponding values
conditions = [
    households_set['YRBLT'] < 1995,
    households_set['YRBLT'] >= 1995,
]
choices_age = [
    "Median Household Income for\nHomes Built Before 1995",
    "Median Household Income for\nHomes Built On or After 1995"
]
households_set["bldage"] = np.select(conditions, choices_age, default="unknown")

# Results will contain the final product: Median Household Income by Density and Home Age
results = pd.DataFrame(index=choices_type, columns=choices_age)
# This will show the numbered of (weighted) samples of the above figure
results_nrow = pd.DataFrame(index=choices_type, columns=choices_age)
                            
for unit in choices_type:
    for age in choices_age:
        # Filter out units by age and density level, but only use household incomes that are known
        iter_set = households_set[
            (households_set['bldtype'] == unit) &
            (households_set['bldage'] == age) &
            (~households_set['Adjusted_Income'].isna() & 
            households_set['Adjusted_Income']> 0)
        ]
        # Sum the housing unit weights for this type of household (the weights estimate the represenative proportion of the sample)
        # If weight is under 1, data is missing likely for this cohort for privacy reasons or low sampling. Disregard this response
        if iter_set["WGTP"].sum(skipna=True) < 1:
            results.loc[unit, age] = -1
        # Else, calculate the Household's Income using a represenative weight of the sample response
        else:
            results.loc[unit, age]  = wquantiles.median(iter_set["Adjusted_Income"], iter_set["WGTP"])
        # Sum the weights to see how represenative each housing type/age is
        results_nrow.loc[unit, age] = iter_set["WGTP"].sum(skipna=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  households_set["bldage"] = np.select(conditions, choices_age, default="unknown")


In [152]:
# The Results (excludes households reporting $0 income)
# Format the float columns as dollars
results_styled = results.style.format({
    'Median Household Income for\nHomes Built Before 1995':  lambda x: '${:,.2f}'.format(x) if x != -1 else 'NA', 
    'Median Household Income for\nHomes Built On or After 1995':  lambda x: '${:,.2f}'.format(x) if x != -1 else 'NA'   
})
results_styled

Unnamed: 0,Median Household Income for Homes Built Before 1995,Median Household Income for Homes Built On or After 1995
Single-Family Home,"$196,016.52","$235,055.74"
Accessory Dwelling / Rowhouse,"$145,730.97","$107,384.85"
"Two, Three or Fourplex","$104,024.42","$67,592.18"
5-19 unit bldg,"$61,202.97","$63,210.12"
20-49 unit bldg,"$55,007.32","$44,798.72"
50+ unit bldg,"$64,392.85","$61,356.08"
Mobile home or trailer,"$13,770.82","$20,390.36"
Boat/ RV/ Van,"$19,707.80",


In [153]:
# Weighted Results Sample Size
# This is the Numerical Representation of how many households the income data is informed by
results_nrow.head(len(results_nrow))

Unnamed: 0,Median Household Income for\nHomes Built Before 1995,Median Household Income for\nHomes Built On or After 1995
Single-Family Home,22798,701
Accessory Dwelling / Rowhouse,1849,129
"Two, Three or Fourplex",8758,487
5-19 unit bldg,9015,739
20-49 unit bldg,3029,1129
50+ unit bldg,2645,1896
Mobile home or trailer,10,11
Boat/ RV/ Van,18,0
