<a href="https://colab.research.google.com/github/Stephen42Kim/IHS/blob/main/Top50MSASpectralClustering.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# IHS Urban Spectral Clustering On Top 50 MSAs



In [None]:
import requests                   # Make HTTP requests
import json                       # Load US Census datasets as JSON
import chardet                    # Character encoding auto-detection
import numpy as np
import pandas as pd
#from google.colab import files   # Access local directory
from google.colab import drive    # Access Google Drive (in folder /content/drive/My Drive/IHS/)
drive.mount('/content/drive')

Mounted at /content/drive


# Data Preprocessing

##Load PUMA/MSA crosswalk and list of top 50 MSA's by population

In [None]:
# Load puma_to_cbsa_Crosswalk.csv file (Usa CBSA as MSA)

with open('/content/drive/My Drive/IHS/PumaCBSACrosswalk.csv', 'rb') as f:
    result = chardet.detect(f.read())                           # Detect encoding of the file (use readline() if file is large)
pumaMSACrossDF = pd.read_csv('/content/drive/My Drive/IHS/PumaCBSACrosswalk.csv', encoding=result['encoding'])
pumaMSACrossDF.columns = ['ST', 'PUMA', 'CBSA#','stab','CBSA']  # Format column names
pumaMSACrossDF['ST'] = pumaMSACrossDF['ST'].astype('string')    # Convert 'ST' (State) column to string type
print('\033[1m' + 'PumaCBSACrosswalk.csv')
print('\033[0m' + '5 Features, 2203 Entries')
pumaMSACrossDF.head()

[1mPumaCBSACrosswalk.csv
[0m5 Features, 2203 Entries


Unnamed: 0,ST,PUMA,CBSA#,stab,CBSA
0,1,100,22520,AL,"Florence-Muscle Shoals, AL"
1,1,200,26620,AL,"Huntsville, AL"
2,1,301,26620,AL,"Huntsville, AL"
3,1,302,26620,AL,"Huntsville, AL"
4,1,400,22840,AL,"Fort Payne, AL"


In [None]:
# Create list for top 50 MSAs by population - load excel file

MSA50 = list(pd.read_excel('/content/drive/My Drive/IHS/Top50MSAByPop.xlsx', sheet_name='Sheet1').iloc[3:,1])

# Remove unnecessary punctuation and suffixes
for j in range(len(MSA50)):
  MSA50[j] = MSA50[j].removeprefix('.')
  MSA50[j] = MSA50[j].removesuffix(' Metro Area')

print('\033[1m' + 'Top50MSAByPop.xlsx')
print('\033[0m' + '50 Entries')
print()
MSA50[:5]

[1mTop50MSAByPop.xlsx
[0m50 Entries



['New York-Newark-Jersey City, NY-NJ-PA',
 'Los Angeles-Long Beach-Anaheim, CA',
 'Chicago-Naperville-Elgin, IL-IN-WI',
 'Dallas-Fort Worth-Arlington, TX',
 'Houston-The Woodlands-Sugar Land, TX']

In [None]:
def readACS(year):
  ''' Input: year
      Output: Top 50 MSA's for the given year - csv file
      Return: Top 50 MSA's for the given year - pandas dataframe
  '''

  # Extract housing information from US Census PUMS dataset of given year
  # If year = 2021, extract 'YRBLT' instead of 'YBL'
  if year != 2021:
    url1 = 'https://api.census.gov/data/'+str(year)+'/acs/acs1/pums?get=SERIALNO,WGTP,GRPIP,RNTP,HINCP,TEN,OCPIP,VACS,YBL,BLD,ST,PUMA'
  else:
    url1 = 'https://api.census.gov/data/'+str(year)+'/acs/acs1/pums?get=SERIALNO,WGTP,GRPIP,RNTP,HINCP,TEN,OCPIP,VACS,YRBLT,BLD,ST,PUMA'
  response1 = requests.get(url1)
  if response1.status_code == 200:             # HTTP response status code 200 means success
    print('Housing file retrieved.')
  census1 = response1.text                     # Return the content of the response in unicode
  census1 = json.loads(census1)                # Parse a valid JSON string and convert it into a Python Dictionary
  hous = pd.DataFrame.from_dict(census1)       # Construct pandas DataFrame from dictionary
  columns = hous.iloc[0,:]
  hous.columns = columns
  hous.drop(index=0, inplace=True)

  # Extract attributes from population files
  url1 = 'https://api.census.gov/data/' + str(year) + '/acs/acs1/pums?get=AGEP,SERIALNO,RAC1P,SCHL,HISP,ESR&SPORDER=1'
  response1 = requests.get(url1)
  if response1.status_code == 200:
    print('\nPopulation file retrieved.')
  census1 = response1.text
  census1 = json.loads(census1)
  pop = pd.DataFrame.from_dict(census1)
  columns = pop.iloc[0,:]
  pop.columns = columns
  pop.drop(index=0, inplace=True)

  # Merge housing and population dataframes - Left join on SERIALNO
  print('\nMerging housing and population datasets.')
  df1 = pd.merge(hous,pop,on = 'SERIALNO',how = 'left')

  # Merge with PUMA crosswalk file (puma_to_cbsa_Crosswalk.csv) - join on ST and PUMA
  print('\nAdding the MSAs using crosswalk file.')
  df_merge = pd.merge(df1,pumaMSACrossDF,how ='left', right_on=['ST','PUMA'], left_on=['ST','PUMA'])

  #Filtering the dataframe based on the MSAs
  print('\nFiltering required MSAs')
  msa50df = df_merge[df_merge['CBSA'].isin(MSA50)]
  msa50df.index = range(msa50df.shape[0])

  # Add year attribute
  msa50df.loc[:,'Year'] = year

  # Write dataframe to drive as csv file
  print('\nWriting into CSV file.')
  #msa50df.to_csv('/content/drive/My Drive/IHS/acs'+str(year)+'.csv')
  print('\nDone.')

  return msa50df

In [None]:
# Get 2012 data

acs2012 = readACS(2012)
acs2012.head()

Housing file retrieved.

Population file retrieved.

Merging housing and population datasets.

Adding the MSAs using crosswalk file.

Filtering required MSAs


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  msa50df.loc[:,'Year'] = year



Writing into CSV file.

Done.


Unnamed: 0,SERIALNO,WGTP,GRPIP,RNTP,HINCP,TEN,OCPIP,VACS,YBL,BLD,...,AGEP,RAC1P,SCHL,HISP,ESR,SPORDER,CBSA#,stab,CBSA,Year
0,1,51,0,0,149000,1,23,0,9,3,...,43,1,22,3,1,1,35620.0,NY,"New York-Newark-Jersey City, NY-NJ-PA",2012
1,4,104,0,0,50000,1,33,0,1,2,...,56,1,22,1,1,1,26420.0,TX,"Houston-The Woodlands-Sugar Land, TX",2012
2,7,94,0,0,5000,2,101,0,1,3,...,75,1,13,1,6,1,37980.0,PA,"Philadelphia-Camden-Wilmington, PA-NJ-DE-MD",2012
3,16,90,0,580,0,3,0,0,5,9,...,57,2,16,1,6,1,19820.0,MI,"Detroit-Warren-Dearborn, MI",2012
4,19,180,0,0,-60000,0,0,4,6,9,...,,,,,,,41860.0,CA,"San Francisco-Oakland-Berkeley, CA",2012
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
709231,1493822,19,0,0,133250,1,28,0,5,2,...,38,1,21,1,1,1,33100.0,FL,"Miami-Fort Lauderdale-Pompano Beach, FL",2012
709232,1493831,49,0,0,72000,2,10,0,4,2,...,70,1,22,1,6,1,38300.0,PA,"Pittsburgh, PA",2012
709233,1493834,0,0,0,-60000,0,0,0,0,0,...,18,8,18,3,6,1,35620.0,NY,"New York-Newark-Jersey City, NY-NJ-PA",2012
709234,1493837,102,25,650,40600,3,0,0,13,4,...,25,2,19,1,1,1,12060.0,GA,"Atlanta-Sandy Springs-Alpharetta, GA",2012


In [None]:
# Get 2013 data

acs2013 = readACS(2013)
acs2013.head()

Housing file retrieved.

Population file retrieved.

Merging housing and population datasets.

Adding the MSAs using crosswalk file.

Filtering required MSAs


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  msa50df.loc[:,'Year'] = year



Writing into CSV file.

Done.


Unnamed: 0,SERIALNO,WGTP,GRPIP,RNTP,HINCP,TEN,OCPIP,VACS,YBL,BLD,...,AGEP,RAC1P,SCHL,HISP,ESR,SPORDER,CBSA#,stab,CBSA,Year
0,1,76,0,0,90020,1,29,0,5,2,...,54,2,22,1,1,1,33100.0,FL,"Miami-Fort Lauderdale-Pompano Beach, FL",2013
1,10,155,0,0,30950,2,9,0,3,2,...,78,1,18,1,1,1,15380.0,NY,"Buffalo-Cheektowaga, NY",2013
2,13,77,0,0,80000,1,19,0,7,2,...,35,1,16,1,1,1,12420.0,TX,"Austin-Round Rock-Georgetown, TX",2013
3,16,78,0,0,16000,1,101,0,2,6,...,45,1,18,1,3,1,41940.0,CA,"San Jose-Sunnyvale-Santa Clara, CA",2013
4,26,80,0,0,80000,1,24,0,8,2,...,44,1,21,1,1,1,18140.0,OH,"Columbus, OH",2013


In [None]:
# Get 2014 data

acs2014 = readACS(2014)
acs2014.head()

Housing file retrieved.

Population file retrieved.

Merging housing and population datasets.

Adding the MSAs using crosswalk file.

Filtering required MSAs


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  msa50df.loc[:,'Year'] = year



Writing into CSV file.

Done.


Unnamed: 0,SERIALNO,WGTP,GRPIP,RNTP,HINCP,TEN,OCPIP,VACS,YBL,BLD,...,AGEP,RAC1P,SCHL,HISP,ESR,SPORDER,CBSA#,stab,CBSA,Year
0,4,0,0,0,-60000,0,0,0,0,0,...,18,2,19,1,6,1,37980.0,DE,"Philadelphia-Camden-Wilmington, PA-NJ-DE-MD",2014
1,10,312,55,1600,40000,3,0,0,7,3,...,47,1,19,1,1,1,12580.0,MD,"Baltimore-Columbia-Towson, MD",2014
2,14,67,0,0,75000,1,38,0,1,2,...,48,1,20,1,3,1,27260.0,FL,"Jacksonville, FL",2014
3,17,40,0,0,355500,1,14,0,8,2,...,42,8,23,19,1,1,31140.0,KY,"Louisville/Jefferson County, KY-IN",2014
4,23,96,0,0,53200,1,40,0,5,2,...,68,1,18,1,1,1,19820.0,MI,"Detroit-Warren-Dearborn, MI",2014


In [None]:
# Get 2015 data

acs2015 = readACS(2015)
acs2015.head()

Housing file retrieved.

Population file retrieved.

Merging housing and population datasets.

Adding the MSAs using crosswalk file.

Filtering required MSAs


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  msa50df.loc[:,'Year'] = year



Writing into CSV file.

Done.


Unnamed: 0,SERIALNO,WGTP,GRPIP,RNTP,HINCP,TEN,OCPIP,VACS,YBL,BLD,...,AGEP,RAC1P,SCHL,HISP,ESR,SPORDER,CBSA#,stab,CBSA,Year
0,1,110,0,0,200750,1,12,0,7,2,...,45.0,1.0,21.0,1.0,1.0,1.0,16980.0,IL,"Chicago-Naperville-Elgin, IL-IN-WI",2015
1,10,77,18,2800,210000,3,0,0,7,3,...,39.0,1.0,22.0,1.0,1.0,1.0,47900.0,MD,"Washington-Arlington-Alexandria, DC-VA-MD-WV",2015
2,13,112,0,0,-60000,0,0,7,8,2,...,,,,,,,37980.0,NJ,"Philadelphia-Camden-Wilmington, PA-NJ-DE-MD",2015
3,25,121,0,0,39600,2,12,0,1,2,...,18.0,3.0,15.0,1.0,6.0,1.0,33460.0,MN,"Minneapolis-St. Paul-Bloomington, MN-WI",2015
4,28,104,0,0,64400,1,21,0,7,1,...,34.0,1.0,12.0,1.0,1.0,1.0,17460.0,OH,"Cleveland-Elyria, OH",2015


In [None]:
# Get 2016 data

acs2016 = readACS(2016)
acs2016.head()

Housing file retrieved.

Population file retrieved.

Merging housing and population datasets.

Adding the MSAs using crosswalk file.

Filtering required MSAs


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  msa50df.loc[:,'Year'] = year



Writing into CSV file.

Done.


Unnamed: 0,SERIALNO,WGTP,GRPIP,RNTP,HINCP,TEN,OCPIP,VACS,YBL,BLD,...,AGEP,RAC1P,SCHL,HISP,ESR,SPORDER,CBSA#,stab,CBSA,Year
0,4,53,101,430,940,3,0,0,1,9,...,65,2,16,1,6,1,35620.0,NJ,"New York-Newark-Jersey City, NY-NJ-PA",2016
1,7,55,7,10,10800,3,0,0,17,8,...,65,2,15,1,6,1,47260.0,VA,"Virginia Beach-Norfolk-Newport News, VA-NC",2016
2,10,96,0,0,89170,1,41,0,4,2,...,78,1,23,1,2,1,35620.0,NJ,"New York-Newark-Jersey City, NY-NJ-PA",2016
3,16,151,69,1000,21000,3,0,0,7,2,...,36,1,19,1,1,1,45300.0,FL,"Tampa-St. Petersburg-Clearwater, FL",2016
4,22,195,0,0,152900,1,8,0,1,2,...,58,2,19,1,1,1,38300.0,PA,"Pittsburgh, PA",2016


In [None]:
# Get 2017 data

acs2017 = readACS(2017)
acs2017.head()

Housing file retrieved.

Population file retrieved.

Merging housing and population datasets.

Adding the MSAs using crosswalk file.

Filtering required MSAs

Writing into CSV file.

Done.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  msa50df.loc[:,'Year'] = year


Unnamed: 0,SERIALNO,WGTP,GRPIP,RNTP,HINCP,TEN,OCPIP,VACS,YBL,BLD,...,AGEP,RAC1P,SCHL,HISP,ESR,SPORDER,CBSA#,stab,CBSA,Year
0,2017000000010,72,0,0,32800,2,25,0,4,2,...,76.0,1.0,12.0,1.0,6.0,1.0,35620.0,NJ,"New York-Newark-Jersey City, NY-NJ-PA",2017
1,2017000000013,112,0,0,22400,2,19,0,10,2,...,33.0,1.0,17.0,1.0,1.0,1.0,38060.0,AZ,"Phoenix-Mesa-Chandler, AZ",2017
2,2017000000040,96,23,950,58300,3,0,0,4,4,...,53.0,1.0,16.0,1.0,1.0,1.0,16980.0,IL,"Chicago-Naperville-Elgin, IL-IN-WI",2017
3,2017000000046,347,0,0,-60000,0,0,3,20,3,...,,,,,,,35620.0,NY,"New York-Newark-Jersey City, NY-NJ-PA",2017
4,2017000000052,59,43,3300,98000,3,0,0,6,6,...,47.0,1.0,23.0,1.0,1.0,1.0,27260.0,FL,"Jacksonville, FL",2017


In [None]:
# Get 2018 data

acs2018 = readACS(2018)
acs2018.head()

Housing file retrieved.

Population file retrieved.

Merging housing and population datasets.

Adding the MSAs using crosswalk file.

Filtering required MSAs

Writing into CSV file.

Done.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  msa50df.loc[:,'Year'] = year


Unnamed: 0,SERIALNO,WGTP,GRPIP,RNTP,HINCP,TEN,OCPIP,VACS,YBL,BLD,...,AGEP,RAC1P,SCHL,HISP,ESR,SPORDER,CBSA#,stab,CBSA,Year
0,2018GQ0000013,0,0,0,-60000,0,0,0,0,0,...,18,1,14,2,6,1,41740.0,CA,"San Diego-Chula Vista-Carlsbad, CA",2018
1,2018GQ0000016,0,0,0,-60000,0,0,0,0,0,...,69,9,17,1,6,1,31080.0,CA,"Los Angeles-Long Beach-Anaheim, CA",2018
2,2018GQ0000019,0,0,0,-60000,0,0,0,0,0,...,78,1,22,1,6,1,41620.0,UT,"Salt Lake City, UT",2018
3,2018GQ0000031,0,0,0,-60000,0,0,0,0,0,...,19,6,18,1,6,1,39300.0,RI,"Providence-Warwick, RI-MA",2018
4,2018GQ0000037,0,0,0,-60000,0,0,0,0,0,...,82,2,12,1,6,1,17460.0,OH,"Cleveland-Elyria, OH",2018


In [None]:
# Get 2019 data

acs2019 = readACS(2019)
acs2019.head()

Housing file retrieved.

Population file retrieved.

Merging housing and population datasets.

Adding the MSAs using crosswalk file.

Filtering required MSAs

Writing into CSV file.

Done.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  msa50df.loc[:,'Year'] = year


Unnamed: 0,SERIALNO,WGTP,GRPIP,RNTP,HINCP,TEN,OCPIP,VACS,YBL,BLD,...,AGEP,RAC1P,SCHL,HISP,ESR,SPORDER,CBSA#,stab,CBSA,Year
0,2019GQ0000001,0,0,0,-60000,0,0,0,0,0,...,35,1,17,1,6,1,26900.0,IN,"Indianapolis-Carmel-Anderson, IN",2019
1,2019GQ0000007,0,0,0,-60000,0,0,0,0,0,...,21,6,19,1,1,1,26900.0,IN,"Indianapolis-Carmel-Anderson, IN",2019
2,2019GQ0000013,0,0,0,-60000,0,0,0,0,0,...,18,1,16,1,6,1,31080.0,CA,"Los Angeles-Long Beach-Anaheim, CA",2019
3,2019GQ0000016,0,0,0,-60000,0,0,0,0,0,...,33,1,16,1,6,1,33100.0,FL,"Miami-Fort Lauderdale-Pompano Beach, FL",2019
4,2019GQ0000022,0,0,0,-60000,0,0,0,0,0,...,41,1,12,1,6,1,28140.0,MO,"Kansas City, MO-KS",2019


In [None]:
# Get 2021 data

acs2021 = readACS(2021)
acs2021

Housing file retrieved.

Population file retrieved.

Merging housing and population datasets.

Adding the MSAs using crosswalk file.

Filtering required MSAs

Writing into CSV file.

Done.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  msa50df.loc[:,'Year'] = year


Unnamed: 0,SERIALNO,WGTP,GRPIP,RNTP,HINCP,TEN,OCPIP,VACS,YRBLT,BLD,...,AGEP,RAC1P,SCHL,HISP,ESR,SPORDER,CBSA#,stab,CBSA,Year
0,2021GQ0000061,0,0,0,-60000,0,0,0,1938,0,...,19,1,18,01,6,1,33340.0,WI,"Milwaukee-Waukesha, WI",2021
1,2021GQ0000101,0,0,0,-60000,0,0,0,1938,0,...,19,1,19,01,6,1,33340.0,WI,"Milwaukee-Waukesha, WI",2021
2,2021GQ0000107,0,0,0,-60000,0,0,0,1938,0,...,59,2,13,01,6,1,47260.0,VA,"Virginia Beach-Norfolk-Newport News, VA-NC",2021
3,2021GQ0000144,0,0,0,-60000,0,0,0,1938,0,...,23,6,16,01,4,1,47260.0,VA,"Virginia Beach-Norfolk-Newport News, VA-NC",2021
4,2021GQ0000165,0,0,0,-60000,0,0,0,1938,0,...,18,6,16,01,6,1,42660.0,WA,"Seattle-Tacoma-Bellevue, WA",2021
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64794,2021HU1415555,50,0,0,33000,2,11,0,1950,2,...,58,1,16,01,1,1,45300.0,FL,"Tampa-St. Petersburg-Clearwater, FL",2021
64795,2021HU1415599,349,0,0,64000,1,14,0,1939,2,...,80,2,14,01,6,1,33340.0,WI,"Milwaukee-Waukesha, WI",2021
64796,2021HU1415625,55,0,0,12100,1,101,0,1940,2,...,70,1,20,01,6,1,45300.0,FL,"Tampa-St. Petersburg-Clearwater, FL",2021
64797,2021HU1415649,63,0,0,201800,1,21,0,1990,2,...,70,1,22,01,1,1,42660.0,WA,"Seattle-Tacoma-Bellevue, WA",2021


In [None]:
# Merge all acs files
acsTotal = pd.concat([acs2012, acs2013, acs2014, acs2015, acs2016, acs2017, acs2018, acs2019, acs2021], ignore_index=True)

# Confirm the total length
totalLength = acs2012.shape[0] + acs2013.shape[0] + acs2014.shape[0] + acs2015.shape[0] + acs2016.shape[0] + acs2017.shape[0] + \
acs2018.shape[0] + acs2019.shape[0] + acs2021.shape[0]
print('Total length should be: {}'.format(totalLength))

acsTotal

Total length should be: 5892653


Unnamed: 0,SERIALNO,WGTP,GRPIP,RNTP,HINCP,TEN,OCPIP,VACS,YBL,BLD,...,RAC1P,SCHL,HISP,ESR,SPORDER,CBSA#,stab,CBSA,Year,YRBLT
0,1,51,0,0,149000,1,23,0,9,3,...,1,22,3,1,1,35620.0,NY,"New York-Newark-Jersey City, NY-NJ-PA",2012,
1,4,104,0,0,50000,1,33,0,1,2,...,1,22,1,1,1,26420.0,TX,"Houston-The Woodlands-Sugar Land, TX",2012,
2,7,94,0,0,5000,2,101,0,1,3,...,1,13,1,6,1,37980.0,PA,"Philadelphia-Camden-Wilmington, PA-NJ-DE-MD",2012,
3,16,90,0,580,0,3,0,0,5,9,...,2,16,1,6,1,19820.0,MI,"Detroit-Warren-Dearborn, MI",2012,
4,19,180,0,0,-60000,0,0,4,6,9,...,,,,,,41860.0,CA,"San Francisco-Oakland-Berkeley, CA",2012,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5892648,2021HU1415555,50,0,0,33000,2,11,0,,2,...,1,16,01,1,1,45300.0,FL,"Tampa-St. Petersburg-Clearwater, FL",2021,1950
5892649,2021HU1415599,349,0,0,64000,1,14,0,,2,...,2,14,01,6,1,33340.0,WI,"Milwaukee-Waukesha, WI",2021,1939
5892650,2021HU1415625,55,0,0,12100,1,101,0,,2,...,1,20,01,6,1,45300.0,FL,"Tampa-St. Petersburg-Clearwater, FL",2021,1940
5892651,2021HU1415649,63,0,0,201800,1,21,0,,2,...,1,22,01,1,1,42660.0,WA,"Seattle-Tacoma-Bellevue, WA",2021,1990


In [None]:
acsTotal.describe(include='all').T