In [1]:
# set up for continuous integration
# can be safely ignored by the user
import os

from cities.queries.fips_query import CTFipsQuery
# proper imports
from cities.utils.data_grabber import CTDataGrabberCSV, list_available_features

smoke_test = "CI" in os.environ
smoke_test = True
sample_size = 10 if smoke_test else 100

#### How to Use `CTFipsQuery` to Ask Similarity Questions

This notebook demonstrates how to use the `CTFipsQuery` class, which is similar to `FipsQuery` but operates at the census tract level rather than the county level. All functionalities remain the same. The key differences are as follows:

- FIPS numbers are longer than those for counties, as they have 11 digits.
- The `CTFipsQuery` class accepts the `ct_time_period` argument, which is a string that can be either 'pre_2020' or 'post_2020'. This argument indicates the time period for which the data should be retrieved. The default value is 'pre_2020', reflecting the distinct definitions of Census tracts before and after 2020.
- GeoNames are not unique, as they represent the county where the Census Tract is located, and each county contains multiple Census Tracts.

For additional guidance, please refer to the `similarity_demo.ipynb` notebook.


In [2]:
f = CTFipsQuery(1003010903, "population")  
f.compare_my_outcome_to_others(sample_size=sample_size, range_multiplier=10)

In this example notice the use of `post_2020` time period.

In [9]:
f = CTFipsQuery(
    34037374200,
    "population",
    feature_groups_with_weights={"population": 1, "urbanicity": 2},
    ct_time_period = "post_2020",
)

f.find_euclidean_kins()
f.plot_weights()
display(f.euclidean_kins.head())

Unnamed: 0,GeoFIPS,GeoName,2020,2021,2022,rural_housing_urbanicity,urban_housing_urbanicity,rural_pop_urbanicity,urban_pop_urbanicity,rural_pop_prct_urbanicity,rural_housing_prct_urbanicity,distance to 34037374200,percentile
0,34037374200,"Sussex, NJ (CT)",2217.0,2315.0,2318.0,566.0,475.0,1016.0,1240.0,0.450355,0.543708,0.0,16.34
1,42079217001,"Luzerne, PA (CT)",2388.0,2368.0,2357.0,620.0,603.0,1005.0,1334.0,0.429671,0.50695,0.02395,17.03
2,37081016600,"Guilford, NC (CT)",2163.0,2137.0,2241.0,558.0,505.0,1217.0,1235.0,0.49633,0.524929,0.024335,14.81
3,17091010201,"Kankakee, IL (CT)",2192.0,2273.0,2299.0,420.0,430.0,1055.0,1046.0,0.502142,0.494118,0.029972,15.93
4,12103027309,"Pinellas, FL (CT)",2298.0,2307.0,2357.0,484.0,415.0,1136.0,956.0,0.543021,0.538376,0.031278,17.03


In [10]:
fig = f.show_kins_plot()

In [6]:
f = CTFipsQuery(
    42021010801,
    "population",
    lag=0,
    top=10,
    time_decay=1.06,  # this is how you regulate how much you discount the past
    outcome_comparison_period=(2003, 2019),
    outcome_percentile_range=(40, 100),
)
f.find_euclidean_kins()
f.plot_weights()

In [7]:
f.find_euclidean_kins()
f.euclidean_kins

Unnamed: 0,GeoFIPS,GeoName,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,distance to 42021010801,percentile
0,42021010801,"Cambria, PA (CT)",5414.0,5897.0,5653.0,5736.0,5732.0,5635.0,5359.0,5227.0,5186.0,5145.0,0.000000,68.64
0,42021010801,"Cambria, PA (CT)",5414.0,5897.0,5653.0,5736.0,5732.0,5635.0,5359.0,5227.0,5186.0,5145.0,0.000000,68.64
1,17031808702,"Cook, IL (CT)",5445.0,5771.0,5648.0,5647.0,5601.0,5498.0,5370.0,5351.0,5255.0,5225.0,0.014702,69.84
2,21211040301,"Shelby, KY (CT)",5481.0,5815.0,5664.0,5805.0,5530.0,5495.0,5370.0,5238.0,5302.0,5099.0,0.015099,67.91
3,51059482504,"Fairfax, VA (CT)",5544.0,5747.0,5579.0,5698.0,5668.0,5531.0,5282.0,5364.0,5191.0,5045.0,0.015308,67.01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
72183,48039660602,"Brazoria, TX (CT)",20882.0,24648.0,27406.0,30933.0,31346.0,33655.0,37455.0,40883.0,42978.0,49119.0,1.774765,100.00
72184,12115002712,"Sarasota, FL (CT)",29470.0,31809.0,32883.0,33520.0,34138.0,35527.0,35518.0,36147.0,36185.0,35818.0,1.788631,99.99
72185,48157672900,"Fort Bend, TX (CT)",22129.0,25517.0,29144.0,31930.0,34898.0,38137.0,43074.0,47744.0,51536.0,59947.0,1.789435,100.00
72186,6073018700,"San Diego, CA (CT)",39248.0,36880.0,39143.0,39429.0,39017.0,39454.0,40616.0,40402.0,38932.0,38754.0,1.806544,99.99


In [8]:
fig = f.show_kins_plot()