In [1]:
# set up for continuous integration
# can be safely ignored by the user
import os

from cities.queries.fips_query import MSAFipsQuery
# proper imports
from cities.utils.data_grabber import MSADataGrabber, list_available_features

smoke_test = "CI" in os.environ
smoke_test = True
sample_size = 10 if smoke_test else 100

#### How to Use `MSAFipsQuery` to Ask Similarity Questions

This notebook demonstrates how to use the `MSAFipsQuery` class, which is an equivalent of `FipsQuery` but based on a metropolitan statistical level rather than on a county level. All functionalities remain the same. The only difference is the temporarily much smaller availability of variables for metropolitan statistical areas.

For more complementary guidance, visit the `similarity_demo.ipynb` notebook.

In [5]:
f = MSAFipsQuery(10180, "gdp_ma")  
f.compare_my_outcome_to_others(sample_size=sample_size, range_multiplier=10)

In [7]:
f = MSAFipsQuery(
    10180,
    "gdp_ma",
    feature_groups_with_weights={"gdp_ma": 1, "population_ma": 2, "ethnic_composition_ma": 3},
)

f.find_euclidean_kins()
f.plot_weights()
display(f.euclidean_kins.head())

Unnamed: 0,GeoFIPS,GeoName,2017,2018,2019,2020,2021,1993_population_ma,1994_population_ma,1995_population_ma,...,cuban_ethnic_composition_ma,other_hispanic_latino_ethnic_composition_ma,white_ethnic_composition_ma,black_african_american_ethnic_composition_ma,american_indian_alaska_native_ethnic_composition_ma,asian_ethnic_composition_ma,native_hawaiian_other_pacific_islander_ethnic_composition_ma,other_race_races_ethnic_composition_ma,distance to 10180,percentile
0,10180,"Abilene, TX (MA)",6827152.0,7141273.0,7426609.0,7354374.0,7573774.0,152909.0,153779.0,156097.0,...,0.001116,0.009321,0.315565,0.036652,0.00071,0.009498,0.000165,0.015208,0.0,30.73
1,48660,"Wichita Falls, TX (MA)",5924040.0,6051825.0,6267961.0,6150157.0,6170319.0,141403.0,143986.0,147740.0,...,0.000769,0.007348,0.335207,0.044527,0.002361,0.00922,0.000371,0.016133,0.046694,19.79
2,40420,"Rockford, IL (MA)",15826773.0,16253918.0,16161341.0,14490425.0,14904989.0,298582.0,302172.0,305671.0,...,0.00109,0.005922,0.339524,0.055914,0.000416,0.012594,0.000127,0.016439,0.082333,55.21
3,48300,"Wenatchee, WA (MA)",6353998.0,6436263.0,6483516.0,6407321.0,6669335.0,86027.0,88755.0,91676.0,...,0.000198,0.013911,0.32643,0.002266,0.003339,0.003793,0.000627,0.013993,0.084552,23.44
4,46300,"Twin Falls, ID (MA)",4697778.0,4880650.0,4987157.0,5130187.0,5329506.0,73402.0,75368.0,76590.0,...,0.000115,0.010233,0.365399,0.002641,0.001144,0.006413,0.000411,0.016651,0.086016,13.02


In [8]:
fig = f.show_kins_plot()

In [9]:
f = MSAFipsQuery(10180, feature_groups_with_weights={"population_ma": 4, "industry_ma": 3})
f.find_euclidean_kins()
f.plot_weights()
display(f.euclidean_kins)

Unnamed: 0,GeoFIPS,GeoName,1993_population_ma,1994_population_ma,1995_population_ma,1996_population_ma,1997_population_ma,1998_population_ma,1999_population_ma,2000_population_ma,...,retail_trade_industry_ma,transport_utilities_industry_ma,information_industry_ma,finance_real_estate_industry_ma,prof_sci_mgmt_admin_industry_ma,education_health_industry_ma,arts_entertainment_industry_ma,other_services_industry_ma,public_admin_industry_ma,distance to 10180
0,10180,"Abilene, TX (MA)",152909.0,153779.0,156097.0,156351.0,157405.0,158264.0,159755.0,160288.0,...,0.122735,0.056986,0.010446,0.070761,0.069715,0.285989,0.083910,0.049015,0.062534,0.000000
101,20260,"Duluth, MN-WI (MA)",284052.0,284517.0,282567.0,283317.0,284541.0,284526.0,285719.0,286858.0,...,0.114800,0.059173,0.011567,0.057350,0.070733,0.290105,0.100776,0.044181,0.052343,0.135358
274,38540,"Pocatello, ID (MA)",78268.0,80153.0,81323.0,81721.0,82315.0,82642.0,83150.0,83212.0,...,0.110768,0.062569,0.012368,0.070131,0.071992,0.271196,0.087988,0.048670,0.058023,0.153648
355,46540,"Utica-Rome, NY (MA)",319608.0,318002.0,311447.0,305700.0,302681.0,300619.0,299874.0,299597.0,...,0.118046,0.044986,0.011222,0.073083,0.080698,0.293815,0.085012,0.043259,0.065170,0.155301
56,16060,"Carbondale-Marion, IL (MA)",132350.0,133285.0,133889.0,134097.0,134438.0,134607.0,134280.0,133748.0,...,0.119076,0.043447,0.013003,0.059134,0.058713,0.328818,0.089491,0.048676,0.060082,0.166863
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
365,47900,"Washington-Arlington-Alexandria, DC-VA-MD-WV (MA)",4374165.0,4434176.0,4488385.0,4549151.0,4614434.0,4684826.0,4776083.0,4875923.0,...,0.077121,0.040376,0.024576,0.061319,0.218669,0.194942,0.080111,0.063384,0.130387,1.254338
86,19100,"Dallas-Fort Worth-Arlington, TX (MA)",4284857.0,4388097.0,4496174.0,4622564.0,4765255.0,4912807.0,5054705.0,5187122.0,...,0.111899,0.069830,0.021165,0.091406,0.136251,0.194062,0.085205,0.049145,0.028729,1.281197
68,16980,"Chicago-Naperville-Elgin, IL-IN-WI (MA)",8512911.0,8605735.0,8693383.0,8782253.0,8862719.0,8949190.0,9035654.0,9113234.0,...,0.100905,0.070246,0.018715,0.077272,0.138506,0.224894,0.087793,0.045882,0.033436,1.611454
210,31080,"Los Angeles-Long Beach-Anaheim, CA (MA)",11632798.0,11663207.0,11692693.0,11771038.0,11915815.0,12086776.0,12253223.0,12392704.0,...,0.099650,0.058208,0.037867,0.066327,0.136879,0.210920,0.106094,0.054437,0.034495,1.901200


In [13]:
f = MSAFipsQuery(
    46540,
    "population_ma",
    lag=0,
    top=10,
    time_decay=1.06,  # this is how you regulate how much you discount the past
    outcome_comparison_period=(2003, 2019),
    outcome_percentile_range=(40, 100),
)
f.find_euclidean_kins()
f.plot_weights()

In [14]:
f.find_euclidean_kins()
f.euclidean_kins

Unnamed: 0,GeoFIPS,GeoName,1993,1994,1995,1996,1997,1998,1999,2000,...,2014,2015,2016,2017,2018,2019,2020,2021,distance to 46540,percentile
0,46540,"Utica-Rome, NY (MA)",319608.0,318002.0,311447.0,305700.0,302681.0,300619.0,299874.0,299597.0,...,297583.0,295877.0,295057.0,294784.0,294167.0,292926.0,291599.0,290024.0,0.000000,55.21
0,46540,"Utica-Rome, NY (MA)",319608.0,318002.0,311447.0,305700.0,302681.0,300619.0,299874.0,299597.0,...,297583.0,295877.0,295057.0,294784.0,294167.0,292926.0,291599.0,290024.0,0.000000,55.21
1,20260,"Duluth, MN-WI (MA)",284052.0,284517.0,282567.0,283317.0,284541.0,284526.0,285719.0,286858.0,...,291503.0,291279.0,291213.0,291236.0,291834.0,291727.0,291518.0,290822.0,0.001561,55.47
2,28700,"Kingsport-Bristol, TN-VA (MA)",285142.0,286625.0,289762.0,292153.0,294250.0,296333.0,297863.0,298534.0,...,307154.0,306220.0,305989.0,306443.0,306618.0,307208.0,307716.0,308794.0,0.002088,57.03
3,40220,"Roanoke, VA (MA)",276643.0,279176.0,280938.0,282915.0,284593.0,285762.0,287193.0,288699.0,...,313438.0,314023.0,313616.0,314002.0,314339.0,314933.0,315205.0,314712.0,0.002993,57.55
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
379,26420,"Houston-The Woodlands-Sugar Land, TX (MA)",4075564.0,4153355.0,4227574.0,4314589.0,4407210.0,4513913.0,4622857.0,4717507.0,...,6493835.0,6663596.0,6797324.0,6888064.0,6962207.0,7048589.0,7140749.0,7215837.0,1.132148,98.70
380,19100,"Dallas-Fort Worth-Arlington, TX (MA)",4284857.0,4388097.0,4496174.0,4622564.0,4765255.0,4912807.0,5054705.0,5187122.0,...,6877353.0,7026835.0,7176620.0,7315457.0,7432101.0,7545583.0,7665875.0,7773289.0,1.198064,98.96
381,16980,"Chicago-Naperville-Elgin, IL-IN-WI (MA)",8512911.0,8605735.0,8693383.0,8782253.0,8862719.0,8949190.0,9035654.0,9113234.0,...,9643982.0,9655619.0,9656432.0,9655887.0,9646352.0,9634076.0,9600822.0,9519538.0,1.543098,99.22
382,31080,"Los Angeles-Long Beach-Anaheim, CA (MA)",11632798.0,11663207.0,11692693.0,11771038.0,11915815.0,12086776.0,12253223.0,12392704.0,...,13184705.0,13256931.0,13295906.0,13309287.0,13285814.0,13236839.0,13177752.0,12972847.0,1.805254,99.48


In [15]:
fig = f.show_kins_plot()