In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from plotly import graph_objs as go

from cities.utils.data_grabber import DataGrabber
from cities.queries.fips_query import FipsQuery

#### Use case: I just want a rough idea

You want to know where your jurisdiction stands relative to all the others in the country. you also know the rate 
of locations where the most recent value of the outcome is lower than yours.

In [2]:
f  = FipsQuery(42001, "gdp")
f.compare_my_outcome_to_others(sample_size= 100, range_multiplier= 10)


#### Use case: similarity in outcome patterns

You want to find top five juristdiction with similar gdp change patterns. You value times nearest to you a bit more, but also you only want the years 2003-2019 to be used for the outcome comparison.

In [3]:
f  = FipsQuery(42001, "gdp", lag = 0, top =5, time_decay = 1.06, 
               outcome_comparison_period=(2003, 2019), outcome_percentile_range= (40,100))
f.find_euclidean_kins()
# you can inspect the weights resulting from your time_decay setting:

display(f.plot_weights())

None

In [4]:
# you can find the distances and  inspect the resulting 
# dataframe that contains the ranking:
f.find_euclidean_kins()
display(f.euclidean_kins)

Unnamed: 0,GeoFIPS,GeoName,2001,2002,2003,2004,2005,2006,2007,2008,...,2014,2015,2016,2017,2018,2019,2020,2021,distance to 42001,percentile
0,42001,"Adams, PA",78.619,84.689,84.475,85.860,89.556,93.508,93.154,95.379,...,100.006,102.509,103.708,108.411,105.390,103.440,97.678,102.664,0.000000,35.14
1,24013,"Carroll, MD",76.152,80.700,82.853,86.013,89.398,95.381,95.463,97.823,...,99.510,101.215,101.568,106.456,104.838,105.452,101.050,105.298,0.039596,41.95
2,28105,"Oktibbeha, MS",83.279,85.143,83.864,85.565,91.160,93.433,93.899,98.855,...,100.858,100.069,106.639,105.926,106.941,107.105,108.225,109.357,0.055532,51.43
4,36103,"Suffolk, NY",77.583,80.337,83.711,87.603,88.109,90.437,90.857,94.202,...,100.099,101.102,102.505,103.283,103.288,105.925,101.523,106.687,0.056833,44.94
5,25027,"Worcester, MA",83.429,84.954,88.210,89.841,90.625,92.234,93.830,96.316,...,101.656,103.801,104.461,104.997,107.212,107.542,104.070,109.597,0.059123,51.98
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3075,48127,"Dimmit, TX",9.381,10.070,10.269,10.101,10.527,13.382,13.424,12.854,...,208.912,253.553,226.448,235.298,230.138,256.745,206.367,159.131,1.150028,96.27
3076,48389,"Reeves, TX",46.003,49.290,44.960,41.682,39.742,41.332,41.009,41.389,...,212.473,334.183,416.768,607.235,843.967,1226.531,1136.483,942.206,1.163298,99.90
3078,48301,"Loving, TX",37.050,40.836,40.690,49.561,81.009,110.338,149.312,114.503,...,186.286,306.333,551.251,802.739,1064.149,1388.130,1453.540,1080.924,1.182778,99.94
3080,48283,"La Salle, TX",6.638,6.679,7.065,6.727,7.541,8.729,8.254,7.900,...,224.270,255.409,208.985,202.036,201.509,219.118,177.489,159.947,1.227202,96.40


In [5]:
# you can plot the few most similar cities:
fig = f.show_kins_plot()

### Use case: similarity in outcome patterns and some other features

Say you want to include historical population patterns in your similarity ranking. You also want to pay a bit more attention to older data points. And you can now set weights to negative values to indicate that you care about dissimilarity in that feature.

In [6]:
f  = FipsQuery(1007, outcome_var = "gdp",
               feature_groups_with_weights= {"gdp": -2, "population":1}, #with one feature group only
               # weights 1-4 won't make a difference
               lag = 0, top =5, time_decay = 1.03)
f.find_euclidean_kins()
# you still can inspect the resulting weighing:
display(f.plot_weights())

None

In [7]:
# you still have access to the distances and the ranking.
# only, this time there are more columns in the dataframe:
display(f.euclidean_kins)


Unnamed: 0,GeoFIPS,GeoName,2001,2002,2003,2004,2005,2006,2007,2008,...,2014_population,2015_population,2016_population,2017_population,2018_population,2019_population,2020_population,2021_population,distance to 1007,percentile
0,1007,"Bibb, AL",80.443,81.527,85.124,89.317,88.782,89.597,95.308,94.745,...,22586.0,22607.0,22654.0,22606.0,22383.0,22405.0,22223.0,22477.0,0.000000,47.86
1,48109,"Culberson, TX",35.264,37.743,36.255,38.339,40.177,41.247,42.368,53.349,...,2301.0,2275.0,2244.0,2259.0,2212.0,2186.0,2193.0,2193.0,1.947278,99.97
2,48389,"Reeves, TX",46.003,49.290,44.960,41.682,39.742,41.332,41.009,41.389,...,14614.0,14936.0,14484.0,14314.0,14526.0,14847.0,14730.0,14487.0,1.968913,99.90
3,48283,"La Salle, TX",6.638,6.679,7.065,6.727,7.541,8.729,8.254,7.900,...,7115.0,7175.0,7057.0,6916.0,6808.0,6763.0,6642.0,6670.0,2.070731,96.40
4,48255,"Karnes, TX",6.498,6.891,7.331,6.759,6.537,6.290,6.662,6.937,...,14569.0,14976.0,14997.0,14990.0,15028.0,14605.0,14721.0,14754.0,2.076390,98.60
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3077,6073,"San Diego, CA",79.703,83.170,87.449,92.400,97.137,99.354,100.938,100.267,...,3234658.0,3262566.0,3283586.0,3293575.0,3303463.0,3297959.0,3297252.0,3286069.0,2.924300,80.21
3078,4013,"Maricopa, AZ",77.463,80.415,85.793,90.349,97.615,102.729,105.313,103.632,...,4040171.0,4105747.0,4174844.0,4231511.0,4292576.0,4363816.0,4438342.0,4496588.0,2.950617,87.18
3079,48201,"Harris, TX",73.137,72.696,72.847,79.658,80.626,87.278,94.310,91.961,...,4452976.0,4553991.0,4619635.0,4651955.0,4672445.0,4704042.0,4732491.0,4728030.0,2.952810,51.14
3080,17031,"Cook, IL",95.406,94.886,95.455,97.260,99.315,101.320,101.826,99.238,...,5320233.0,5324961.0,5320293.0,5311621.0,5297956.0,5287099.0,5262741.0,5173146.0,3.019405,52.43


In [8]:
# you still can plot the few top ranked cities:
fig = f.show_kins_plot()

#### Use case: similarity of outcome with a lag

You care about similarity of outcome variables, but your question now is: what other locations were 2 years ago in a similar place to me now, when it comes to the outcome variable and the features?


In [9]:
f  = FipsQuery(42001, "gdp", lag = 2, top =5, time_decay = 1.06)
f.find_euclidean_kins()

display(f.plot_weights())

None

In [10]:
f.find_euclidean_kins()
display(f.euclidean_kins)

Unnamed: 0,GeoFIPS,GeoName,2001,2002,2003,2004,2005,2006,2007,2008,...,2014,2015,2016,2017,2018,2019,2020,2021,distance to 42001,percentile
0,42001,"Adams, PA",78.619,84.689,84.475,85.860,89.556,93.508,93.154,95.379,...,100.006,102.509,103.708,108.411,105.390,103.440,97.678,102.664,0.000000,35.14
1,20161,"Riley, KS",78.822,79.879,83.156,85.898,86.815,89.029,93.378,97.320,...,102.355,102.888,103.512,102.037,100.206,100.843,100.186,101.947,0.065257,33.32
2,17001,"Adams, IL",79.654,81.654,86.491,90.160,91.589,93.582,93.367,95.001,...,102.400,101.760,104.586,97.057,99.635,98.477,93.510,100.614,0.066060,30.63
3,33019,"Sullivan, NH",87.624,93.209,91.829,96.587,97.861,95.699,97.592,92.842,...,102.845,106.400,101.283,101.850,99.204,99.205,101.346,109.571,0.066776,51.82
4,13171,"Lamar, GA",83.191,81.883,83.438,84.551,93.216,93.727,93.967,98.477,...,99.373,103.592,103.735,104.157,101.036,105.494,110.219,114.258,0.067939,62.52
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3077,48389,"Reeves, TX",46.003,49.290,44.960,41.682,39.742,41.332,41.009,41.389,...,212.473,334.183,416.768,607.235,843.967,1226.531,1136.483,942.206,1.221964,99.90
3078,48127,"Dimmit, TX",9.381,10.070,10.269,10.101,10.527,13.382,13.424,12.854,...,208.912,253.553,226.448,235.298,230.138,256.745,206.367,159.131,1.238983,96.27
3079,48301,"Loving, TX",37.050,40.836,40.690,49.561,81.009,110.338,149.312,114.503,...,186.286,306.333,551.251,802.739,1064.149,1388.130,1453.540,1080.924,1.266906,99.94
3080,48283,"La Salle, TX",6.638,6.679,7.065,6.727,7.541,8.729,8.254,7.900,...,224.270,255.409,208.985,202.036,201.509,219.118,177.489,159.947,1.314292,96.40


In [11]:
fig = f.show_kins_plot()

#### Use case: similarity of outcome and other features with a lag

In [12]:
f  = FipsQuery(20003, outcome_var = "gdp",
               feature_groups_with_weights= {"gdp":0, "population":4},
               # weights= {"population":4}, 
               lag = 3, top =10, time_decay = 1.03)
f.find_euclidean_kins()
display(f.plot_weights())



None

In [13]:
# if you want the full dataframe with distances, 
# you still can get this
# it's just boring to print it all over again
#f.find_euclidean_kins()
#display(f.euclidean_kins)

# or, you can plot the few top ranked cities:
fig = f.show_kins_plot()