# Enrich Polygons from a Dataset

This example illustrates how to enrich polygons that are in a dataset with variables from CARTO's Data Observatory.

_Note: You'll need [CARTO Account](https://carto.com/signup) credentials to reproduce this example._

In [1]:
from cartoframes.utils import set_log_level
set_log_level('debug')

In [2]:
from cartoframes.auth import set_default_credentials

set_default_credentials()

In [3]:
from geopandas import read_file

census_track = 'http://libs.cartocdn.com/cartoframes/files/census_track.geojson'
census_track_gdf = read_file(census_track)
census_track_gdf.head(3)

Unnamed: 0,OBJECTID,FULLTRACTID,TRACTID,geometry
0,1,51013102901,102901,"POLYGON ((-77.09099 38.84516, -77.08957 38.844..."
1,2,51013103000,103000,"POLYGON ((-77.08558 38.82992, -77.08625 38.828..."
2,3,51013102902,102902,"POLYGON ((-77.09520 38.84499, -77.09442 38.844..."


In [4]:
census_track_gdf = census_track_gdf.head(1)

In [5]:
from cartoframes.data.observatory import Catalog

Catalog().country('usa').category('demographics').geographies

[<Geography.get('ags_q17_4739be4f')>,
 <Geography.get('expn_grid_a4075de4')>,
 <Geography.get('mbi_blockgroups_1ab060a')>,
 <Geography.get('mbi_counties_141b61cd')>,
 <Geography.get('mbi_county_subd_e8e6ea23')>,
 <Geography.get('mbi_pc_5_digit_4b1682a6')>,
 <Geography.get('usct_blockgroup_f45b6b49')>,
 <Geography.get('usct_cbsa_6c8b51ef')>,
 <Geography.get('usct_censustract_bc698c5a')>,
 <Geography.get('usct_congression_b6336b2c')>,
 <Geography.get('usct_county_ec40c962')>,
 <Geography.get('usct_county_92f1b5df')>,
 <Geography.get('usct_place_12d6699f')>,
 <Geography.get('usct_puma_b859f0fa')>,
 <Geography.get('usct_schooldistr_515af763')>,
 <Geography.get('usct_schooldistr_da72a4cb')>,
 <Geography.get('usct_schooldistr_287be4f7')>,
 <Geography.get('usct_state_4c8090b5')>,
 <Geography.get('usct_zcta5_75071016')>]

In [6]:
datasets = Catalog().country('usa').category('demographics').geography('usct_censustract_bc698c5a').datasets
datasets.to_dataframe()

Unnamed: 0,id,slug,name,description,country_id,geography_id,geography_name,geography_description,category_id,category_name,provider_id,provider_name,data_source_id,lang,temporal_aggregation,time_coverage,update_frequency,version,is_public_data
0,carto-do-public-data.usa_acs.demographics_soci...,acs_sociodemogr_d4b2cf03,5-yr ACS data at Census Tracts level (2006 - 2...,The American Community Survey (ACS) is an ongo...,usa,carto-do-public-data.usa_carto.geography_usa_c...,,,demographics,,,,,eng,5yrs,"[2006-01-01, 2011-01-01)",,20062010,True
1,carto-do-public-data.usa_acs.demographics_soci...,acs_sociodemogr_9ed5d625,5-yr ACS data at Census Tracts level (2007 - 2...,The American Community Survey (ACS) is an ongo...,usa,carto-do-public-data.usa_carto.geography_usa_c...,,,demographics,,,,,eng,5yrs,"[2007-01-01, 2012-01-01)",,20072011,True
2,carto-do-public-data.usa_acs.demographics_soci...,acs_sociodemogr_858c104e,5-yr ACS data at Census Tracts level (2008 - 2...,The American Community Survey (ACS) is an ongo...,usa,carto-do-public-data.usa_carto.geography_usa_c...,,,demographics,,,,,eng,5yrs,"[2008-01-01, 2013-01-01)",,20082012,True
3,carto-do-public-data.usa_acs.demographics_soci...,acs_sociodemogr_cfeb0968,5-yr ACS data at Census Tracts level (2009 - 2...,The American Community Survey (ACS) is an ongo...,usa,carto-do-public-data.usa_carto.geography_usa_c...,,,demographics,,,,,eng,5yrs,"[2009-01-01, 2014-01-01)",,20092013,True
4,carto-do-public-data.usa_acs.demographics_soci...,acs_sociodemogr_97c32d1f,5-yr ACS data at Census Tracts level (2010 - 2...,The American Community Survey (ACS) is an ongo...,usa,carto-do-public-data.usa_carto.geography_usa_c...,,,demographics,,,,,eng,5yrs,"[2010-01-01, 2015-01-01)",,20102014,True
5,carto-do-public-data.usa_acs.demographics_soci...,acs_sociodemogr_dda43439,5-yr ACS data at Census Tracts level (2011 - 2...,The American Community Survey (ACS) is an ongo...,usa,carto-do-public-data.usa_carto.geography_usa_c...,,,demographics,,,,,eng,5yrs,"[2011-01-01, 2016-01-01)",,20112015,True
6,carto-do-public-data.usa_acs.demographics_soci...,acs_sociodemogr_30d1f53,5-yr ACS data at Census Tracts level (2012 - 2...,The American Community Survey (ACS) is an ongo...,usa,carto-do-public-data.usa_carto.geography_usa_c...,,,demographics,,,,,eng,5yrs,"[2012-01-01, 2017-01-01)",,20122016,True
7,carto-do-public-data.usa_acs.demographics_soci...,acs_sociodemogr_496a0675,5-yr ACS data at Census Tracts level (2013 - 2...,The American Community Survey (ACS) is an ongo...,usa,carto-do-public-data.usa_carto.geography_usa_c...,,,demographics,,,,,eng,5yrs,"[2013-01-01, 2018-01-01)",,20132017,True


In [7]:
from cartoframes.data.observatory import Dataset

dataset = Dataset.get('acs_sociodemogr_d4b2cf03')
variables_df = dataset.variables

In [8]:
for variable in variables_df:
    print(variable.agg_method, variable.db_type, variable.slug)

AVG FLOAT median_age_1b299936
None STRING geoidsl_52dffc46
None STRING geoidsc_c260e1d7
SUM FLOAT owner_occupied__a242d69b
SUM FLOAT bachelors_degre_c97d7ec4
SUM FLOAT bachelors_degre_619dbbbf
SUM FLOAT children_ccc14aa2
SUM FLOAT children_in_sin_96bc4ba1
SUM FLOAT employed_inform_cc19ebad
SUM FLOAT employed_manufa_4e5264bc
SUM FLOAT employed_other__a5acf594
SUM FLOAT employed_public_80ba451f
SUM FLOAT employed_retail_3cfcad1f
SUM FLOAT employed_scienc_10f6c661
SUM FLOAT employed_transp_66aedbfd
SUM FLOAT employed_wholes_dfec4891
SUM FLOAT female_female_h_d11712ea
SUM FLOAT four_more_cars_297e8a8a
AVG FLOAT gini_index_e8c30f9a
SUM FLOAT graduate_profes_ce2840a8
SUM FLOAT three_cars_32e541e7
SUM FLOAT pop_25_64_ce8c2ef0
SUM FLOAT pop_determined__486c4212
SUM FLOAT population_1_ye_dcac6ed6
SUM FLOAT population_3_ye_ca9ce3ea
AVG FLOAT poverty_4c2c9ac5
SUM FLOAT sales_office_em_abb972b6
SUM FLOAT some_college_an_23a0fb89
SUM FLOAT pop_15_and_over_1b25d822
SUM FLOAT nonfamily_house_2c17f15c

In [9]:
from cartoframes.data.observatory import Variable

v1 = Variable.get('no_car_2207f034')   # SUM, FLOAT
v2 = Variable.get('poverty_4c2c9ac5')  # AVG, FLOAT
v3 = Variable.get('geoidsl_52dffc46')   # None, STRING

variables = [v1, v2, v3]

In [10]:
from cartoframes.data.observatory import Enrichment

enrichment = Enrichment()

enriched_dataset_gdf = enrichment.enrich_polygons(
    census_track_gdf,
    variables=variables,
    aggregation=None
)

2020-03-13 13:18:04,568 - DEBUG - _prepare_data in 0.01 s
2020-03-13 13:18:09,230 - DEBUG - _upload_data in 4.66 s
2020-03-13 13:18:17,120 - DEBUG - _execute_enrichment in 7.89 s
2020-03-13 13:18:17,126 - DEBUG - _enrich in 12.57 s


In [11]:
enriched_dataset_gdf.head(20)

Unnamed: 0,OBJECTID,FULLTRACTID,TRACTID,geometry,geoidsl,poverty,no_car,intersected_area,do_area,user_area,do_geoid
0,1,51013102901,102901,"POLYGON ((-77.09099 38.84516, -77.08957 38.844...",51013102802,55.0,0.0,35.033737,731381.7,681621.693138,51013102802
1,1,51013102901,102901,"POLYGON ((-77.09099 38.84516, -77.08957 38.844...",51013102901,68.0,0.0,677561.538702,696129.4,681621.693138,51013102901
2,1,51013102901,102901,"POLYGON ((-77.09099 38.84516, -77.08957 38.844...",51510201000,100.0,14.0,23.771227,781136.1,681621.693138,51510201000
3,1,51013102901,102901,"POLYGON ((-77.09099 38.84516, -77.08957 38.844...",51013102902,256.0,124.0,3571.753693,753116.3,681621.693138,51013102902
4,1,51013102901,102901,"POLYGON ((-77.09099 38.84516, -77.08957 38.844...",51510200107,341.0,217.0,161.381823,637726.5,681621.693138,51510200107
5,1,51013102901,102901,"POLYGON ((-77.09099 38.84516, -77.08957 38.844...",51013102702,247.0,61.0,0.39007,283408.7,681621.693138,51013102702
6,1,51013102901,102901,"POLYGON ((-77.09099 38.84516, -77.08957 38.844...",51013103100,489.0,102.0,267.823804,1908856.0,681621.693138,51013103100


In [22]:
from cartoframes.data.observatory import Enrichment

enrichment = Enrichment()

enriched_dataset_gdf = enrichment.enrich_polygons(
    census_track_gdf,
    variables=variables,
    aggregation=None,
    filters={v1.id: "> 100", v2.id: "< 300"}
)

enriched_dataset_gdf.head(20)

2020-03-13 13:23:45,199 - DEBUG - _prepare_data in 0.01 s
2020-03-13 13:23:48,893 - DEBUG - _upload_data in 3.69 s
2020-03-13 13:23:55,352 - DEBUG - _execute_enrichment in 6.46 s
2020-03-13 13:23:55,360 - DEBUG - _enrich in 10.17 s


Unnamed: 0,OBJECTID,FULLTRACTID,TRACTID,geometry,geoidsl,poverty,no_car,intersected_area,do_area,user_area,do_geoid
0,1,51013102901,102901,"POLYGON ((-77.09099 38.84516, -77.08957 38.844...",51013102902,256.0,124.0,3571.753693,753116.255091,681621.693138,51013102902


In [12]:
from cartoframes.data.observatory import Enrichment

enrichment = Enrichment()

enriched_dataset_gdf = enrichment.enrich_polygons(
    census_track_gdf,
    variables=variables
)

2020-03-13 13:18:17,187 - DEBUG - _prepare_data in 0.0 s
2020-03-13 13:18:22,133 - DEBUG - _upload_data in 4.94 s
2020-03-13 13:18:29,719 - DEBUG - _execute_enrichment in 7.59 s
2020-03-13 13:18:29,769 - DEBUG - _enrich in 12.58 s


In [13]:
enriched_dataset_gdf.head(20)

Unnamed: 0,OBJECTID,FULLTRACTID,TRACTID,geometry,poverty,no_car
0,1,51013102901,102901,"POLYGON ((-77.09099 38.84516, -77.08957 38.844...",222.285714,0.657821


In [14]:
from cartoframes.data.observatory import Enrichment

enrichment = Enrichment()

enriched_dataset_gdf = enrichment.enrich_polygons(
    census_track_gdf,
    variables=variables,
    aggregation='ARRAY_AGG'
)

2020-03-13 13:18:29,843 - DEBUG - _prepare_data in 0.01 s
2020-03-13 13:18:33,361 - DEBUG - _upload_data in 3.52 s
2020-03-13 13:18:41,083 - DEBUG - _execute_enrichment in 7.72 s
2020-03-13 13:18:41,109 - DEBUG - _enrich in 11.28 s


In [15]:
enriched_dataset_gdf.head(20)

Unnamed: 0,OBJECTID,FULLTRACTID,TRACTID,geometry,geoidsl,poverty,no_car
0,1,51013102901,102901,"POLYGON ((-77.09099 38.84516, -77.08957 38.844...","['51013102902', '51013103100', '51510200107', ...","[256.0, 489.0, 341.0, 247.0, 55.0, 68.0, 100.0]","[124.0, 102.0, 217.0, 61.0, 0.0, 0.0, 14.0]"


In [16]:
enrichment = Enrichment()

enriched_dataset_gdf = enrichment.enrich_polygons(
    census_track_gdf,
    variables=variables,
    aggregation={v1.id: ['SUM', 'AVG'], v2.id:'AVG'}
)

enriched_dataset_gdf.head(20)

2020-03-13 13:18:41,154 - DEBUG - _prepare_data in 0.0 s
2020-03-13 13:18:44,561 - DEBUG - _upload_data in 3.41 s
2020-03-13 13:18:50,709 - DEBUG - _execute_enrichment in 6.15 s
2020-03-13 13:18:50,739 - DEBUG - _enrich in 9.59 s


Unnamed: 0,OBJECTID,FULLTRACTID,TRACTID,geometry,poverty,sum_no_car,avg_no_car
0,1,51013102901,102901,"POLYGON ((-77.09099 38.84516, -77.08957 38.844...",222.285714,0.657821,74.0


In [17]:
enrichment = Enrichment()

enriched_dataset_gdf = enrichment.enrich_polygons(
    census_track_gdf,
    variables=variables,
    aggregation=None,
    filters={v1.id: "> 0"}
)

enriched_dataset_gdf.head(20)

2020-03-13 13:18:50,770 - DEBUG - _prepare_data in 0.0 s
2020-03-13 13:18:54,184 - DEBUG - _upload_data in 3.41 s
2020-03-13 13:19:00,228 - DEBUG - _execute_enrichment in 6.04 s
2020-03-13 13:19:00,237 - DEBUG - _enrich in 9.47 s


Unnamed: 0,OBJECTID,FULLTRACTID,TRACTID,geometry,geoidsl,poverty,no_car,intersected_area,do_area,user_area,do_geoid
0,1,51013102901,102901,"POLYGON ((-77.09099 38.84516, -77.08957 38.844...",51013102902,256.0,124.0,3571.753693,753116.3,681621.693138,51013102902
1,1,51013102901,102901,"POLYGON ((-77.09099 38.84516, -77.08957 38.844...",51013102702,247.0,61.0,0.39007,283408.7,681621.693138,51013102702
2,1,51013102901,102901,"POLYGON ((-77.09099 38.84516, -77.08957 38.844...",51510201000,100.0,14.0,23.771227,781136.1,681621.693138,51510201000
3,1,51013102901,102901,"POLYGON ((-77.09099 38.84516, -77.08957 38.844...",51013103100,489.0,102.0,267.823804,1908856.0,681621.693138,51013103100
4,1,51013102901,102901,"POLYGON ((-77.09099 38.84516, -77.08957 38.844...",51510200107,341.0,217.0,161.381823,637726.5,681621.693138,51510200107


In [18]:
enrichment = Enrichment()

enriched_dataset_gdf = enrichment.enrich_polygons(
    census_track_gdf,
    variables=variables,
    aggregation=None,
    filters={v1.id: ["> 0", "< 200"]}
)

enriched_dataset_gdf.head(20)

2020-03-13 13:19:00,296 - DEBUG - _prepare_data in 0.0 s
2020-03-13 13:19:05,040 - DEBUG - _upload_data in 4.74 s
2020-03-13 13:19:12,828 - DEBUG - _execute_enrichment in 7.79 s
2020-03-13 13:19:12,852 - DEBUG - _enrich in 12.56 s


Unnamed: 0,OBJECTID,FULLTRACTID,TRACTID,geometry,geoidsl,poverty,no_car,intersected_area,do_area,user_area,do_geoid
0,1,51013102901,102901,"POLYGON ((-77.09099 38.84516, -77.08957 38.844...",51013103100,489.0,102.0,267.823804,1908856.0,681621.693138,51013103100
1,1,51013102901,102901,"POLYGON ((-77.09099 38.84516, -77.08957 38.844...",51013102702,247.0,61.0,0.39007,283408.7,681621.693138,51013102702
2,1,51013102901,102901,"POLYGON ((-77.09099 38.84516, -77.08957 38.844...",51510201000,100.0,14.0,23.771227,781136.1,681621.693138,51510201000
3,1,51013102901,102901,"POLYGON ((-77.09099 38.84516, -77.08957 38.844...",51013102902,256.0,124.0,3571.753693,753116.3,681621.693138,51013102902


In [19]:
enrichment = Enrichment()

enriched_dataset_gdf = enrichment.enrich_polygons(
    census_track_gdf,
    variables=variables,
    filters={v1.id: ["> 0", "< 200"]}
)

enriched_dataset_gdf.head(20)

2020-03-13 13:19:12,898 - DEBUG - _prepare_data in 0.0 s
2020-03-13 13:19:17,869 - DEBUG - _upload_data in 4.97 s
2020-03-13 13:19:25,423 - DEBUG - _execute_enrichment in 7.55 s
2020-03-13 13:19:25,452 - DEBUG - _enrich in 12.56 s


Unnamed: 0,OBJECTID,FULLTRACTID,TRACTID,geometry,poverty,no_car
0,1,51013102901,102901,"POLYGON ((-77.09099 38.84516, -77.08957 38.844...",273.0,0.602908


In [20]:
enrichment = Enrichment()

enriched_dataset_gdf = enrichment.enrich_polygons(
    census_track_gdf,
    variables=variables,
    aggregation={v1.id: ['SUM', 'AVG'], v2.id:'AVG'},
    filters={v1.id: ["> 0", "< 200"]}
)

enriched_dataset_gdf.head(20)

2020-03-13 13:19:25,480 - DEBUG - _prepare_data in 0.0 s
2020-03-13 13:19:28,968 - DEBUG - _upload_data in 3.49 s
2020-03-13 13:19:36,482 - DEBUG - _execute_enrichment in 7.51 s
2020-03-13 13:19:36,507 - DEBUG - _enrich in 11.03 s


Unnamed: 0,OBJECTID,FULLTRACTID,TRACTID,geometry,poverty,sum_no_car,avg_no_car
0,1,51013102901,102901,"POLYGON ((-77.09099 38.84516, -77.08957 38.844...",273.0,0.602908,75.25
