# City Search Jupyter Notebook
---

In [1]:
import imp
import functools
import numpy as np
import pandas as pd
#
import citysearch
imp.reload(citysearch);

### Initial unindexed data exploration:
---

In [2]:
df = citysearch.DataLoader().to_dataframe()
df.iloc[[0,1,2,-3,-2,-1]]

Unnamed: 0,id,geonameid,name,asciiname,altnames,latitude,longitude,feat_class,feat_code,country_code,cc2,admin1_code,admin2_code,admin3_code,admin4_code,population,elevation,dem,timezone,modified
0,1,3039154,El Tarter,El Tarter,"Ehl Tarter,Эл Тартер",42.579521,1.65362,P,PPL,AD,,2,,,,1052,,1721,Europe/Andorra,2012-11-03
1,2,3039163,Sant Julià de Lòria,Sant Julia de Loria,"San Julia,San Julià,Sant Julia de Loria,Sant J...",42.463718,1.49129,P,PPLA,AD,,6,,,,8022,,921,Europe/Andorra,2013-11-23
2,3,3039604,Pas de la Casa,Pas de la Casa,"Pas de la Kasa,Пас де ла Каса",42.54277,1.73361,P,PPL,AD,,3,,,,2363,2050.0,2106,Europe/Andorra,2008-06-09
142308,142309,895417,Banket,Banket,"Banket,Banket Junction",-17.383329,30.4,P,PPL,ZW,,5,,,,9641,,1277,Africa/Harare,2013-03-12
142309,142310,1085510,Epworth,Epworth,Epworth,-17.889999,31.147499,P,PPLX,ZW,,10,,,,123250,,1508,Africa/Harare,2012-01-19
142310,142311,1106542,Chitungwiza,Chitungwiza,"Chitungviza,Chitungwiza,Citungviza,Gorad Chytu...",-18.012739,31.07555,P,PPL,ZW,,10,,,,340360,,1435,Africa/Harare,2012-01-20


In [3]:
df.shape

(142311, 20)

In [4]:
df.isnull().sum()

id                   0
geonameid            0
name                 0
asciiname            1
altnames         22512
latitude             0
longitude            0
feat_class           0
feat_code            0
country_code        43
cc2             138063
admin1_code         53
admin2_code      46027
admin3_code      90037
admin4_code     121802
population           0
elevation       120913
dem                  0
timezone             0
modified             0
dtype: int64

In [5]:
[(colname, df[colname].str.len().min(), df[colname].str.len().max()) for colname in df.columns if df[colname].dtype == np.object]

[('geonameid', nan, nan),
 ('name', 1, 66),
 ('asciiname', 1.0, 66.0),
 ('altnames', 2.0, 589259.0),
 ('feat_class', 1, 1),
 ('feat_code', 3, 5),
 ('country_code', 2.0, 2.0),
 ('cc2', 2.0, 5.0),
 ('admin1_code', 1.0, 8.0),
 ('admin2_code', 1.0, 47.0),
 ('admin3_code', 1.0, 10.0),
 ('admin4_code', 2.0, 20.0),
 ('population', nan, nan),
 ('elevation', nan, nan),
 ('dem', nan, nan),
 ('timezone', 9, 30),
 ('modified', 10, 10)]

In [6]:
[(colname, df[colname].min(), df[colname].max()) for colname in df.columns if df[colname].dtype == np.int]

[('id', 1, 142311)]

In [7]:
[(colname, df[colname].min(), df[colname].max()) for colname in df.columns if df[colname].dtype == np.float32]

[('latitude', -77.846001, 78.223343), ('longitude', -179.12198, 179.36452)]

In [8]:
altnames = df[['altnames']].copy()
altnames['charlen'] = altnames.altnames.str.len()
altnames['wordlen'] = altnames.altnames.str.count(',') + 1
print(altnames.charlen.sum(), altnames.wordlen.sum())
altnames.sort_values('charlen', ascending = False)[:10]

8012677.0 715190.0


Unnamed: 0,altnames,charlen,wordlen
135461,"ksbridzh,Aksbridz,Aksbridzas,Aksbridzh,Aksbrid...",589259.0,15717.0
134154,"rving,Erving,Irving,Irvingas,abingu,ayrfyngh,a...",196814.0,5364.0
59053,"rvin,Ehrvin,Irbhinn,Irvin,Irvine i Ayrshire,OE...",115552.0,2271.0
57799,"kfijld,awkfyld,Ъкфийлд,اوکفیلد\t50.96948\t0.09...",22976.0,398.0
57597,"stradgunlajs,Ystradgunlais,Ystradgynlais,Ъстра...",13723.0,352.0
137631,"plend,Apland,CCB,Magnolia Villa,North Ontario,...",4713.0,204.0
125053,"sparta,Baris,Gorad ysparta,Hamid,Hamid-Abad,Ha...",4059.0,170.0
73536,"Adonai-jireh,Aelia Capitolina,Al Quds,Al-Kuds,...",2425.0,251.0
120869,"Gorad Petrapaulausk-Kamchacki,Kamchatkataagy P...",1736.0,71.0
137232,"Angelopolis,Cuidad De Los Angelos,Cuidad De lo...",1619.0,113.0


### Quick and dirty search implementation (unindexed, unoptimized):

In [9]:
def city_search(afield, avalue):
    """ Lookup a city by key value pair."""
    return df[df[afield] == avalue]

def geo_dist(lat1, lon1, lat2, lon2):
    """ Exact geo distance in kilometers (haversine). """
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    hs1 = np.sin(dlat/2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2.0)**2
    hs = 2 * np.arcsin(np.sqrt(hs1))
    km = 6367 * hs
    return km

In [10]:
def proximity_search(afield, avalue, k = 10, dt = None):
    """ Locate k nearest cities to specified city key."""
    # locate city:
    rec = city_search(afield, avalue)
    lat1, lon1 = (rec.latitude.iloc[0], rec.longitude.iloc[0])
    # Filter cities with approximate surface patch based
    # on fixed deviations of angular coordinates:
    if dt is None:
        if k < 10**3:
            dt = 2
        else:
            dt = min(180/6*np.log10(k+1)+1, 180)
    dg = df
    # North Pole patch:
    if lat1 > 80:
        latmin = 80 - 4*dt
        latmax = 90
        dg = dg[(dg.latitude >= latmin) & (dg.latitude <= latmax)]
    # South Pole patch:
    elif lat1 < -80:
        latmin = -90
        latmax = -80 + 4*dt
        dg = dg[(dg.latitude >= latmin) & (dg.latitude <= latmax)]
    # Prime meridian patch:
    elif (lon1 + dt > 180) or (lon1 - dt < -180):
        latmin = lat1 - dt
        latmax = lat1 + dt
        dg = dg[(dg.latitude >= latmin) & (dg.latitude <= latmax)]
        dg = dg[(dg.longitude <= -180 + dt) | (dg.longitude >= 180 - dt)]
    # Regular patch:
    elif lon1 + dt < 180 and lon1 - dt > -180:
        latmin = lat1 - dt
        latmax = lat1 + dt
        lonmin = lon1 - dt
        lonmax = lon1 + dt
        dg = dg[(dg.latitude >= latmin) & (dg.latitude <= latmax)]
        dg = dg[(dg.longitude >= lonmin) & (dg.longitude <= lonmax)]
    # With most cities roughly filtered, apply slow exact distance calculation:
    dg['dist'] = dg.apply(lambda arow: geo_dist(lat1, lon1, arow.latitude, arow.longitude), axis = 1)
    dg =  dg.sort_values('dist')
    rs = dg[:k]
    if len(rs) >= k:
        print('under bounded: ', (k, len(dg)))
        return rs
    else:
        print('over bounded: ', (k, len(rs)))
        return proximity_search(afield, avalue, k = k, dt = 2*dt)

In [11]:
proximity_search('name', 'Daly City', k = 10)

under bounded:  (10, 414)


Unnamed: 0,id,geonameid,name,asciiname,altnames,latitude,longitude,feat_class,feat_code,country_code,...,admin1_code,admin2_code,admin3_code,admin4_code,population,elevation,dem,timezone,modified,dist
136953,136954,5341430,Daly City,Daly City,"Daly City,Dalyurbo,Dejli Siti,Dejli-Siti,Lungs...",37.705769,-122.461922,P,PPL,US,...,CA,81,,,106562,124,123,America/Los_Angeles,2017-03-09,0.000354
136848,136849,5330854,Broadmoor,Broadmoor,,37.6866,-122.48275,P,PPL,US,...,CA,81,,,4176,106,111,America/Los_Angeles,2011-05-14,2.809461
136924,136925,5338703,Colma,Colma,"Colma,Kolma,School House Station,Sutter's Fort...",37.67688,-122.459686,P,PPL,US,...,CA,81,,,1520,37,36,America/Los_Angeles,2017-03-09,3.216301
136847,136848,5330810,Brisbane,Brisbane,"Brisbane,Brisben,Brizbejn,Visitacion City,bris...",37.680771,-122.399971,P,PPL,US,...,CA,81,,,4717,33,38,America/Los_Angeles,2017-03-09,6.114593
137568,137569,5397765,South San Francisco,South San Francisco,"Baden,Juzhen San Francisko,Saus-San-Francisko,...",37.654659,-122.407753,P,PPL,US,...,CA,81,,,67271,4,5,America/Los_Angeles,2017-03-09,7.412851
137490,137491,5391959,San Francisco,San Francisco,"Franciscopolis,Frisco,Gorad San-Francyska,Kapa...",37.774929,-122.419418,P,PPLA2,US,...,CA,75,,,864816,16,28,America/Los_Angeles,2017-03-09,8.544804
137483,137484,5391749,San Bruno,San Bruno,,37.630489,-122.411079,P,PPL,US,...,CA,81,,,43185,5,5,America/Los_Angeles,2017-03-09,9.485704
137369,137370,5380420,Pacifica,Pacifica,"Edgemar,Fairway Park,Pacifica,Pacífica,Pasifik...",37.613831,-122.486923,P,PPL,US,...,CA,81,,,39260,25,25,America/Los_Angeles,2017-03-09,10.450813
137277,137278,5373129,Millbrae,Millbrae,"Milbrej,mi er bu rui,milabre,milbeule,myl bry ...",37.598549,-122.387192,P,PPL,US,...,CA,81,,,22795,10,10,America/Los_Angeles,2017-03-09,13.608368
136856,136857,5331920,Burlingame,Burlingame,"Berlingejm,Berlingem,Burlingejm,barlingema,beo...",37.584099,-122.366081,P,PPL,US,...,CA,81,,,30459,12,12,America/Los_Angeles,2017-03-09,15.934764


In [12]:
city_search('name', 'Dallas')

Unnamed: 0,id,geonameid,name,asciiname,altnames,latitude,longitude,feat_class,feat_code,country_code,cc2,admin1_code,admin2_code,admin3_code,admin4_code,population,elevation,dem,timezone,modified
129731,129732,4190598,Dallas,Dallas,"Dalas,Dallas,dalas,dalas jwrjya,dyalasa,Далас...",33.92371,-84.840767,P,PPLA2,US,,GA,223,,,12870,318,316,America/New_York,2017-03-09
132106,132107,4462896,Dallas,Dallas,"Dalas,Dallas,dalas,dls karwlynay shmaly,Далас...",35.316528,-81.176193,P,PPL,US,,NC,71,,,4622,242,245,America/New_York,2017-03-09
133972,133973,4684888,Dallas,Dallas,"DFW,Dalas,Dalasa,Dalasas,Dallas,Dallas shaary,...",32.783058,-96.806671,P,PPLA2,US,,TX,113,,,1300092,128,139,America/Chicago,2017-03-09
138710,138711,5722064,Dallas,Dallas,"Dalas,Dallas,dalas,dalas awrgn,Далас,Даллас,د...",44.919281,-123.317047,P,PPLA2,US,,OR,53,,,15277,99,101,America/Los_Angeles,2017-03-09


In [13]:
city_search('geonameid', 4684888)

Unnamed: 0,id,geonameid,name,asciiname,altnames,latitude,longitude,feat_class,feat_code,country_code,cc2,admin1_code,admin2_code,admin3_code,admin4_code,population,elevation,dem,timezone,modified
133972,133973,4684888,Dallas,Dallas,"DFW,Dalas,Dalasa,Dalasas,Dallas,Dallas shaary,...",32.783058,-96.806671,P,PPLA2,US,,TX,113,,,1300092,128,139,America/Chicago,2017-03-09


In [14]:
proximity_search('geonameid', 4684888, k = 16)

under bounded:  (16, 206)


Unnamed: 0,id,geonameid,name,asciiname,altnames,latitude,longitude,feat_class,feat_code,country_code,...,admin1_code,admin2_code,admin3_code,admin4_code,population,elevation,dem,timezone,modified,dist
133972,133973,4684888,Dallas,Dallas,"DFW,Dalas,Dalasa,Dalasas,Dallas,Dallas shaary,...",32.783058,-96.806671,P,PPLA2,US,...,TX,113,,,1300092,128,139,America/Chicago,2017-03-09,0.000277
134117,134118,4697616,Highland Park,Highland Park,"Khajlend Park,Хайленд Парк",32.833462,-96.791946,P,PPL,US,...,TX,113,,,9189,161,169,America/Chicago,2017-03-09,5.767539
133936,133937,4682251,Cockrell Hill,Cockrell Hill,,32.73624,-96.886948,P,PPL,US,...,TX,113,,,4316,195,202,America/Chicago,2017-03-09,9.129504
134146,134147,4699608,Hutchins,Hutchins,"Khachins,hachynz tgzas,hatshynz,Хачинс,هاتشين...",32.6493,-96.713051,P,PPL,US,...,TX,113,,,5727,142,146,America/Chicago,2017-03-09,17.24951
133993,133994,4687331,Duncanville,Duncanville,"Dankanvil,danknfyl,danknwyl tgzas,Данканвил,د...",32.651798,-96.90834,P,PPL,US,...,TX,113,,,39826,221,224,America/Chicago,2017-03-09,17.410217
134027,134028,4690198,Farmers Branch,Farmers Branch,"Farmers Branch,farmrz bransh,farmrz brnch tgz...",32.92651,-96.896118,P,PPL,US,...,TX,113,,,32689,141,147,America/Chicago,2017-03-09,17.995761
133825,133826,4672059,Balch Springs,Balch Springs,,32.728741,-96.622772,P,PPL,US,...,TX,113,,,25210,152,159,America/Chicago,2017-03-09,18.215038
134075,134076,4694482,Grand Prairie,Grand Prairie,"Deckman,Grand Prairie,Grand Preri,Grand Preris...",32.74596,-96.99778,P,PPL,US,...,TX,113,,,187809,157,162,America/Chicago,2017-03-09,18.328083
133787,133788,4669828,Addison,Addison,"Adison,adyswn,adyswn tgzas,Адисон,آدیسون، تگز...",32.961788,-96.82917,P,PPL,US,...,TX,113,,,15518,194,196,America/Chicago,2017-03-09,19.972242
133905,133906,4679195,Carrollton,Carrollton,"Carrollton,Karolton,Karoltonas,Karrolton,Kerro...",32.953732,-96.890282,P,PPL,US,...,TX,113,,,133168,161,163,America/Chicago,2017-03-09,20.509071


### How fast are radix tries for memory optimized name lookups?
---

In [15]:
import marisa_trie

In [16]:
keys = df.name.values.tolist()
vals = df.geonameid.values.tolist()
kvs = list(zip(keys, vals))
tkvs = list(zip(keys, [(x,) for x in vals]))

In [17]:
trie = marisa_trie.RecordTrie('<L', tkvs)

In [18]:
'San Juan de Mata' in trie, trie['San Juan de Mata']

(True, [(1689220,)])

In [19]:
%timeit trie['San Juan de Mata']
# 2.4 µs ± 72.2 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)

2.4 µs ± 72.2 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [20]:
adict = dict(kvs)

In [21]:
'San Juan de Mata' in adict, adict['San Juan de Mata']

(True, 1689220)

In [22]:
%timeit adict['San Juan de Mata']
# 53.9 ns ± 1.74 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)

53.9 ns ± 1.74 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)


### Pandas Multi-Index vs Multiple Single Indexers?
---

#### Observation, Pandas isn't memory efficient with multiple single indexes.  (copies data rather than pointing at slices)

In [None]:
df.head(4)

In [None]:
df_latlon = df.set_index(['latitude','longitude'])
df_latlon.head(4)

In [None]:
df_pop = df.set_index(['population'])
df_pop.head(4)

In [None]:
df_geonameid = df.set_index(['geonameid'])
df_geonameid.head(4)

### To database:
---

In [None]:
import sqlalchemy as sa

In [None]:
engine = sa.create_engine('mysql+mysqldb://root:citysearch123456@127.0.0.1:3306/test?charset=utf8', encoding = 'utf8')

In [None]:
df.to_sql('City', engine.connect(), if_exists = 'append', index = False, chunksize = 10**4)

In [None]:
engine = sa.create_engine('mysql+mysqldb://root:user@172.17.0.3:9306/test?charset=utf8', encoding = 'utf8')

In [None]:
df.to_sql('City', engine.connect(), if_exists = 'append', index = False, chunksize = 10**4)

#### SQLAlchemy is screwing up the connection, always better to go direct to the driver...

In [None]:
import logger
import mariadb
imp.reload(mariadb)
from mariadb import SQL

In [None]:
mysql = SQL(user = 'gone', passwd = 'gone')

In [None]:
mysql.fetchall('show tables;')

In [None]:
sphinx = SQL(host = '172.17.0.3', port = 9306)

In [None]:
sphinx.fetchall('show tables')

In [None]:
dfmin = df.name + ',' + df.asciiname + ',' + df.alternatenames
dfmin = pd.DataFrame(dfmin, columns = ['altnames'])
dfmin.to_csv('data/city_altnames.tsv', sep = '\t', index = True, index_label = 'id')
dfmin.head()

In [None]:
import sphinxql
imp.reload(sphinxql)
from sphinxql import SphinxQL

In [None]:
sphinx = SphinxQL(host = '172.17.0.3')

In [None]:
sphinx.fetchall('show tables')

### JSON Cache:
---

In [23]:
df.head(1).to_json(orient = 'records', lines = True)

'{"id":1,"geonameid":3039154,"name":"El Tarter","asciiname":"El Tarter","altnames":"Ehl Tarter,\\u042d\\u043b \\u0422\\u0430\\u0440\\u0442\\u0435\\u0440","latitude":42.5795211792,"longitude":1.6536200047,"feat_class":"P","feat_code":"PPL","country_code":"AD","cc2":null,"admin1_code":"02","admin2_code":null,"admin3_code":null,"admin4_code":null,"population":1052,"elevation":null,"dem":1721,"timezone":"Europe\\/Andorra","modified":"2012-11-03"}'

In [24]:
df1 = df.head(100)

In [25]:
%timeit df1.to_json(orient = 'records', lines = True)

853 µs ± 7.38 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [26]:
df['json'] = df.apply(lambda row: row.to_json(orient = 'records'))

In [None]:
for i in range(10):
    jstr += df.iloc[i].to_json(orient = 'records', lines = True)

### Geo Lookups:
---

In [None]:
geo2 = df.set_index(['latitude','longitude'])[['id']].sort_index(level = (0, 1))

In [None]:
geo2.head(8)

In [None]:
geo2.loc[42.544:42.545]

##### Problem, pandas doesn't do multilevel range slices...

In [None]:
# Try two in memory pandas indexes... (other options on the table: radix tries, judy arrays, lsm trees)
geo = (df.set_index(['longitude'])[['id']], df.set_index(['longitude'])[['id']])

In [None]:
def geo_search(lat, lon, dt = 2):
    id_lonf = geo[1].iloc[lon - dt:lon + dt].id
    id_latf = geo[0].iloc[lat - dt:lat + dt].id
    return set(id_lonf).intersection(id_latf)

#### RTrees...

In [27]:
from rtree import index as rtree

In [28]:
rgeo = rtree.Rtree()

In [31]:
geo = df[['id','longitude','latitude']]
geo.head(4)

Unnamed: 0,id,longitude,latitude
0,1,1.65362,42.579521
1,2,1.49129,42.463718
2,3,1.73361,42.54277
3,4,1.53319,42.556229


In [40]:
rgeo = rtree.Rtree()
for i in range(1,len(geo)):
    rgeo.insert(i, (geo.iloc[i][1:].tolist()))
print(rgeo)

<rtree.index.Index object at 0x7f2963b5d0f0>


In [47]:
SF = (-122.41,37.77)
dt = 0.1
ids = list(rgeo.intersection((SF[0]-dt,SF[1]-dt,SF[0]+dt,SF[1]+dt)))
print(len(ids), ids[:10])

7 [137254, 136924, 136847, 136848, 136953, 137490, 137530]


In [48]:
df.loc[ids]

Unnamed: 0,id,geonameid,name,asciiname,altnames,latitude,longitude,feat_class,feat_code,country_code,...,admin1_code,admin2_code,admin3_code,admin4_code,population,elevation,dem,timezone,modified,json
137254,137255,5370464,Marin City,Marin City,"Marin Siti,Marin-Siti,Марин Сити,Марин-Сити",37.868542,-122.50914,P,PPL,US,...,CA,41,,,2666,7,9,America/Los_Angeles,2011-05-14,
136924,136925,5338703,Colma,Colma,"Colma,Kolma,School House Station,Sutter's Fort...",37.67688,-122.459686,P,PPL,US,...,CA,81,,,1520,37,36,America/Los_Angeles,2017-03-09,
136847,136848,5330810,Brisbane,Brisbane,"Brisbane,Brisben,Brizbejn,Visitacion City,bris...",37.680771,-122.399971,P,PPL,US,...,CA,81,,,4717,33,38,America/Los_Angeles,2017-03-09,
136848,136849,5330854,Broadmoor,Broadmoor,,37.6866,-122.48275,P,PPL,US,...,CA,81,,,4176,106,111,America/Los_Angeles,2011-05-14,
136953,136954,5341430,Daly City,Daly City,"Daly City,Dalyurbo,Dejli Siti,Dejli-Siti,Lungs...",37.705769,-122.461922,P,PPL,US,...,CA,81,,,106562,124,123,America/Los_Angeles,2017-03-09,
137490,137491,5391959,San Francisco,San Francisco,"Franciscopolis,Frisco,Gorad San-Francyska,Kapa...",37.774929,-122.419418,P,PPLA2,US,...,CA,75,,,864816,16,28,America/Los_Angeles,2017-03-09,
137530,137531,5393611,Sausalito,Sausalito,"El Ojo la Agua de Zaucito,JMC,Saucelito,Saucet...",37.859089,-122.485252,P,PPL,US,...,CA,41,,,7156,4,2,America/Los_Angeles,2017-03-09,


In [50]:
%timeit rgeo.intersection((SF[0]-dt,SF[1]-dt,SF[0]+dt,SF[1]+dt))

80 µs ± 835 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [51]:
%timeit df.loc[ids]

615 µs ± 13.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [53]:
df.loc[rgeo.nearest(SF, 10)]

Unnamed: 0,id,geonameid,name,asciiname,altnames,latitude,longitude,feat_class,feat_code,country_code,...,admin1_code,admin2_code,admin3_code,admin4_code,population,elevation,dem,timezone,modified,json
137490,137491,5391959,San Francisco,San Francisco,"Franciscopolis,Frisco,Gorad San-Francyska,Kapa...",37.774929,-122.419418,P,PPLA2,US,...,CA,75,,,864816,16,28,America/Los_Angeles,2017-03-09,
136953,136954,5341430,Daly City,Daly City,"Daly City,Dalyurbo,Dejli Siti,Dejli-Siti,Lungs...",37.705769,-122.461922,P,PPL,US,...,CA,81,,,106562,124,123,America/Los_Angeles,2017-03-09,
136847,136848,5330810,Brisbane,Brisbane,"Brisbane,Brisben,Brizbejn,Visitacion City,bris...",37.680771,-122.399971,P,PPL,US,...,CA,81,,,4717,33,38,America/Los_Angeles,2017-03-09,
136924,136925,5338703,Colma,Colma,"Colma,Kolma,School House Station,Sutter's Fort...",37.67688,-122.459686,P,PPL,US,...,CA,81,,,1520,37,36,America/Los_Angeles,2017-03-09,
136848,136849,5330854,Broadmoor,Broadmoor,,37.6866,-122.48275,P,PPL,US,...,CA,81,,,4176,106,111,America/Los_Angeles,2011-05-14,
137610,137611,5402535,Tiburon,Tiburon,"E-Tiburon,Tiberon,Tiburon,Tiburón,di bo long,t...",37.873539,-122.456642,P,PPL,US,...,CA,41,,,9214,4,2,America/Los_Angeles,2017-03-09,
137568,137569,5397765,South San Francisco,South San Francisco,"Baden,Juzhen San Francisko,Saus-San-Francisko,...",37.654659,-122.407753,P,PPL,US,...,CA,81,,,67271,4,5,America/Los_Angeles,2017-03-09,
136810,136811,5327490,Belvedere,Belvedere,"Belvedere,Belvedir,bei er wei dai lei,belabhed...",37.8727,-122.464417,P,PPL,US,...,CA,41,,,2121,11,15,America/Los_Angeles,2017-03-09,
137530,137531,5393611,Sausalito,Sausalito,"El Ojo la Agua de Zaucito,JMC,Saucelito,Saucet...",37.859089,-122.485252,P,PPL,US,...,CA,41,,,7156,4,2,America/Los_Angeles,2017-03-09,
137019,137020,5346462,Emeryville,Emeryville,"Emerivil,Emerivill,Emerivill',Emeryville,Golde...",37.831322,-122.285248,P,PPL,US,...,CA,1,,,11694,7,10,America/Los_Angeles,2017-03-09,


In [54]:
%timeit rgeo.nearest(SF, 10)

278 µs ± 2.36 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
