In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from plotly import graph_objs as go

from cities.utils.data_grabber import DataGrabber
from cities.queries.fips_query import FipsQuery

#### Use case: I just want a rough idea

You want to know where your jurisdiction stands relative to all the others in the country. you also know the rate 
of locations where the most recent value of the outcome is lower than yours.

In [2]:
f  = FipsQuery(42001, "gdp")
f.compare_my_outcome_to_others(sample_size= 100, range_multiplier= 10)


#### Use case: similarity in outcome patterns

You want to find top five juristdiction with similar gdp change patterns. You value times nearest to you a bit more.

In [3]:
f  = FipsQuery(42001, "gdp", lag = 0, top =5, time_decay = 1.06, outcome_comparison_period=(2003, 2019))
f.find_euclidean_kins()
# you can inspect the weights resulting from your time_decay setting:

display(f.plot_weights())

I will survive
Index(['GeoFIPS', 'GeoName', '2001', '2002', '2003', '2004', '2005', '2006',
       '2007', '2008', '2009', '2010', '2011', '2013', '2014', '2015', '2016',
       '2017', '2018', '2019', '2020', '2021'],
      dtype='object')


None

In [5]:
# you can find the distances and  inspect the resulting 
# dataframe that contains the ranking:
f.find_euclidean_kins()
display(f.euclidean_kins)

Unnamed: 0,GeoFIPS,GeoName,2001,2002,2003,2004,2005,2006,2007,2008,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,distance to 42001
2235,42001,"Adams, PA",78.619,84.689,84.475,85.860,89.556,93.508,93.154,95.379,...,100.080,100.006,102.509,103.708,108.411,105.390,103.440,97.678,102.664,0.000000
540,15003,"Honolulu, HI",77.518,79.162,82.883,87.189,91.507,93.039,94.927,96.471,...,101.365,102.008,104.781,106.434,108.409,108.672,107.108,96.323,101.427,0.059562
1189,24013,"Carroll, MD",76.152,80.700,82.853,86.013,89.398,95.381,95.463,97.823,...,100.226,99.510,101.215,101.568,106.456,104.838,105.452,101.050,105.298,0.063390
2274,42079,"Luzerne, PA",88.892,88.600,90.395,92.882,92.945,92.749,95.542,96.382,...,100.106,100.575,103.128,104.050,102.777,102.825,104.782,98.374,102.625,0.071654
959,20161,"Riley, KS",78.822,79.879,83.156,85.898,86.815,89.029,93.378,97.320,...,99.925,102.355,102.888,103.512,102.037,100.206,100.843,100.186,101.947,0.073161
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2641,48255,"Karnes, TX",6.498,6.891,7.331,6.759,6.537,6.290,6.662,6.937,...,137.426,170.877,210.901,200.421,221.260,217.623,236.985,204.295,183.520,1.259313
675,17179,"Tazewell, IL",42.031,40.832,41.214,40.862,46.054,51.686,57.682,57.881,...,74.577,72.169,58.943,50.605,40.879,38.782,36.135,33.239,35.584,1.317611
2708,48389,"Reeves, TX",46.003,49.290,44.960,41.682,39.742,41.332,41.009,41.389,...,121.888,212.473,334.183,416.768,607.235,843.967,1226.531,1136.483,942.206,1.329836
2568,48109,"Culberson, TX",35.264,37.743,36.255,38.339,40.177,41.247,42.368,53.349,...,144.271,258.311,404.721,546.092,673.326,712.048,986.320,1238.206,1185.010,1.384473


In [22]:
# you can plot the few most similar cities:
f.plot_kins()

### Use case: similarity in outcome patterns and some other features

Say you want to include historical population patterns in your similarity ranking. You also want to pay a bit more attention to older data points. And you can now set weights to negative values to indicate that you care about dissimilarity in that feature.

In [23]:
f  = FipsQuery(1007, outcome_var = "gdp",
               feature_groups_with_weights= {"gdp": -2, "population":1}, #with one feature group only
               # weights 1-4 won't make a difference
               lag = 0, top =5, time_decay = 1.03)
f.find_euclidean_kins()
# you still can inspect the resulting weighing:
display(f.plot_weights())

None

In [31]:
# you still have access to the distances and the ranking.
# only, this time there are more columns in the dataframe:
display(f.euclidean_kins)


Unnamed: 0,GeoFIPS,GeoName,2001,2002,2003,2004,2005,2006,2007,2008,...,2013_population,2014_population,2015_population,2016_population,2017_population,2018_population,2019_population,2020_population,2021_population,distance to 20003
880,20003,"Anderson, KS",74.590,78.511,76.836,76.589,76.548,75.401,81.577,91.311,...,7829.0,7848.0,7777.0,7770.0,7783.0,7782.0,7813.0,7850.0,7778.0,0.000000
169,5129,"Searcy, AR",90.528,97.992,101.146,104.189,106.461,112.705,106.153,100.018,...,7988.0,7920.0,7830.0,7951.0,7917.0,7886.0,7848.0,7827.0,7880.0,0.002230
900,20043,"Doniphan, KS",96.019,85.880,80.674,100.728,106.243,110.814,132.605,128.517,...,7868.0,7828.0,7776.0,7697.0,7637.0,7613.0,7562.0,7469.0,7471.0,0.003202
469,13181,"Lincoln, GA",105.269,104.599,106.313,102.832,109.490,105.428,118.190,112.823,...,7634.0,7514.0,7558.0,7663.0,7632.0,7667.0,7677.0,7704.0,7749.0,0.003351
847,19135,"Monroe, IA",80.830,78.299,81.980,89.798,90.345,95.037,102.527,114.486,...,7913.0,7874.0,7822.0,7722.0,7651.0,7591.0,7517.0,7582.0,7610.0,0.003700
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
216,6073,"San Diego, CA",79.703,83.170,87.449,92.400,97.137,99.354,100.938,100.267,...,3199707.0,3234658.0,3262566.0,3283586.0,3293575.0,3303463.0,3297959.0,3297252.0,3286069.0,2.448110
97,4013,"Maricopa, AZ",77.463,80.415,85.793,90.349,97.615,102.729,105.313,103.632,...,3977518.0,4040171.0,4105747.0,4174844.0,4231511.0,4292576.0,4363816.0,4438342.0,4496588.0,2.583485
2614,48201,"Harris, TX",73.137,72.696,72.847,79.658,80.626,87.278,94.310,91.961,...,4350992.0,4452976.0,4553991.0,4619635.0,4651955.0,4672445.0,4704042.0,4732491.0,4728030.0,2.652626
601,17031,"Cook, IL",95.406,94.886,95.455,97.260,99.315,101.320,101.826,99.238,...,5303041.0,5320233.0,5324961.0,5320293.0,5311621.0,5297956.0,5287099.0,5262741.0,5173146.0,2.907444


In [25]:
# you still can plot the few top ranked cities:
f.plot_kins()

#### Use case: similarity of outcome with a lag

You care about similarity of outcome variables, but your question now is: what other locations were 2 years ago in a similar place to me now, when it comes to the outcome variable and the features?


In [26]:
f  = FipsQuery(42001, "gdp", lag = 2, top =5, time_decay = 1.06)
f.find_euclidean_kins()

display(f.plot_weights())

None

In [30]:
f.find_euclidean_kins()
display(f.euclidean_kins)

Unnamed: 0,GeoFIPS,GeoName,2001,2002,2003,2004,2005,2006,2007,2008,...,2013_population,2014_population,2015_population,2016_population,2017_population,2018_population,2019_population,2020_population,2021_population,distance to 20003
880,20003,"Anderson, KS",74.590,78.511,76.836,76.589,76.548,75.401,81.577,91.311,...,7829.0,7848.0,7777.0,7770.0,7783.0,7782.0,7813.0,7850.0,7778.0,0.000000
169,5129,"Searcy, AR",90.528,97.992,101.146,104.189,106.461,112.705,106.153,100.018,...,7988.0,7920.0,7830.0,7951.0,7917.0,7886.0,7848.0,7827.0,7880.0,0.002230
900,20043,"Doniphan, KS",96.019,85.880,80.674,100.728,106.243,110.814,132.605,128.517,...,7868.0,7828.0,7776.0,7697.0,7637.0,7613.0,7562.0,7469.0,7471.0,0.003202
469,13181,"Lincoln, GA",105.269,104.599,106.313,102.832,109.490,105.428,118.190,112.823,...,7634.0,7514.0,7558.0,7663.0,7632.0,7667.0,7677.0,7704.0,7749.0,0.003351
847,19135,"Monroe, IA",80.830,78.299,81.980,89.798,90.345,95.037,102.527,114.486,...,7913.0,7874.0,7822.0,7722.0,7651.0,7591.0,7517.0,7582.0,7610.0,0.003700
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
216,6073,"San Diego, CA",79.703,83.170,87.449,92.400,97.137,99.354,100.938,100.267,...,3199707.0,3234658.0,3262566.0,3283586.0,3293575.0,3303463.0,3297959.0,3297252.0,3286069.0,2.448110
97,4013,"Maricopa, AZ",77.463,80.415,85.793,90.349,97.615,102.729,105.313,103.632,...,3977518.0,4040171.0,4105747.0,4174844.0,4231511.0,4292576.0,4363816.0,4438342.0,4496588.0,2.583485
2614,48201,"Harris, TX",73.137,72.696,72.847,79.658,80.626,87.278,94.310,91.961,...,4350992.0,4452976.0,4553991.0,4619635.0,4651955.0,4672445.0,4704042.0,4732491.0,4728030.0,2.652626
601,17031,"Cook, IL",95.406,94.886,95.455,97.260,99.315,101.320,101.826,99.238,...,5303041.0,5320233.0,5324961.0,5320293.0,5311621.0,5297956.0,5287099.0,5262741.0,5173146.0,2.907444


In [27]:
f.plot_kins()

#### Use case: similarity of outcome and other features with a lag

In [28]:
f  = FipsQuery(20003, outcome_var = "gdp",
               feature_groups_with_weights= {"gdp":0, "population":4},
               # weights= {"population":4}, 
               lag = 3, top =10, time_decay = 1.03)
f.find_euclidean_kins()
display(f.plot_weights())



None

In [29]:
# if you want the full dataframe with distances, 
# you still can get this
# it's just boring to print it all over again
#f.find_euclidean_kins()
#display(f.euclidean_kins)

# or, you can plot the few top ranked cities:
f.plot_kins()