# Use trained model to predict worst air pollution locations on the grid

We tried using one month's worth of data with a trained kNN model that simply took in lat, lon, and a time delta. This resulted in predictions that all fell on the same latitude in a way that was not convincing. Next we will try a trained decision tree model.

In [1]:
# libraries

%matplotlib inline

import json
from geopy.distance import distance
import pandas as pd
from time import sleep
import shapely.geometry
import pyproj
import geopandas as gpd
from matplotlib import pyplot as plt
from shapely.geometry import Point
import datetime
from datetime import date, timedelta
from os import path
import pandas as pd
import numpy as np
import statistics
import boto3
import s3fs
import sys
from fastparquet import ParquetFile
from sklearn import linear_model
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
import geopy
from geopy import distance
import gmplot
import math
import time
from collections import defaultdict

import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_columns', 500)

In [2]:
sys.path.append("./HistoricalData/")
from getData import get_data
from getData import getNearestNoaaData
from getData import getNearestEpaData

UP_LEFT = (38.008050, -122.536985)    
UP_RIGHT = (38.008050, -122.186437)   
DOWN_RIGHT = (37.701933, -122.186437) 
DOWN_LEFT = (37.701933, -122.536985) 
START_DATE = '2019/09/01' 
END_DATE = '2019/09/02'   
START_HOUR = '00'        
END_HOUR = '24'

In [3]:
# load in the grid from csv file
boxes = pd.read_csv('data/500m_grid.csv')

In [24]:
# load in the data from the sensors in the given timeframe and bounding box
real_sensor_df = get_data(UP_LEFT, UP_RIGHT, DOWN_RIGHT, DOWN_LEFT, START_DATE, END_DATE, START_HOUR, END_HOUR, 'Monthly')

In [26]:
real_sensor_df.head()

Unnamed: 0_level_0,0_3um,0_5um,1_0um,2_5um,5_0um,10_0um,pm1_0,pm10_0,created,pm1_0_atm,pm2_5_atm,pm10_0_atm,uptime,rssi,temperature,humidity,pm2_5_cf_1,device_loc_typ,is_owner,sensor_name,parent_id,lat,lon,thingspeak_primary_id,thingspeak_primary_id_read_key,thingspeak_secondary_id,thingspeak_secondary_id_read_key,a_h,high_reading_flag,hidden,city,county,zipcode,created_at,year,month,day,hour,minute,wban_number,call_sign,call_sign2,interval,call_sign3,zulu_time,report_modifier,wind_data,wind_direction,wind_speed,gusts,gust_speed,variable_winds,variable_wind_info,sys_maint_reqd,agency_name,aqi,category,epa_pm25_unit,epa_pm25_value,full_aqs_code,intl_aqs_code,raw_concentration,site_name,wkday,daytype,timeofday,wind_compass,xy_,x,y,time_space_id
sensor_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1
16939,563.51,162.1,33.51,5.16,1.25,0.0,1.96,4.93,201909010000,1.96,3.99,4.93,59078.0,-74.0,81.0,49.0,3.99,outside,0,#SAFQ11,,37.72244,-122.439302,592280,YE7QMQYRJVM7WIH3,592281,13R50S7WTRLK96SD,,,,San Francisco,San Francisco County,94112,2019/09/01T00:00,2019,9,1,0,0,23234,KSFO,SFO,5-MIN,KSFO,010800Z,,1.0,280,7.0,0.0,,0.0,,0.0,San Francisco Bay Area AQMD,21.0,1.0,UG/M3,5.0,60750005.0,840060800000.0,4.0,San Francisco,0,Weekend,night,West,"(21, 5)",21,5,2612160_21_5
16919,577.63,169.03,28.16,5.97,2.44,0.53,2.13,5.54,201909010000,2.13,4.21,5.54,59080.0,-76.0,79.0,50.0,4.21,outside,0,#SFAQ06,,37.722456,-122.43939,592240,WNXITSKNQC3FGFPI,592241,V1FSSXOJB78Y96LX,,,,San Francisco,San Francisco County,94112,2019/09/01T00:00,2019,9,1,0,0,23234,KSFO,SFO,5-MIN,KSFO,010800Z,,1.0,280,7.0,0.0,,0.0,,0.0,San Francisco Bay Area AQMD,21.0,1.0,UG/M3,5.0,60750005.0,840060800000.0,4.0,San Francisco,0,Weekend,night,West,"(21, 5)",21,5,2612160_21_5
16931,172.1,49.46,9.62,0.22,0.0,0.0,0.0,0.0,201909010000,0.0,0.0,0.0,30702.0,-72.0,102.0,17.0,0.0,outside,0,#SFAQ10,,37.722417,-122.439245,592265,1ENJG2P25X9OVDRN,592267,8QT4N8YSSJ8M2LVD,,,,San Francisco,San Francisco County,94112,2019/09/01T00:00,2019,9,1,0,0,23234,KSFO,SFO,5-MIN,KSFO,010800Z,,1.0,280,7.0,0.0,,0.0,,0.0,San Francisco Bay Area AQMD,21.0,1.0,UG/M3,5.0,60750005.0,840060800000.0,4.0,San Francisco,0,Weekend,night,West,"(21, 5)",21,5,2612160_21_5
19173,708.04,213.76,37.46,2.76,1.23,0.7,2.91,5.96,201909010000,2.91,5.09,5.96,59081.0,-77.0,76.0,54.0,5.09,outside,0,#SFAQ12,,37.722383,-122.439227,627168,WJIUGUOTV9IKH4JK,627169,I7WF6N1EOYSSCY87,,,,San Francisco,San Francisco County,94112,2019/09/01T00:00,2019,9,1,0,0,23234,KSFO,SFO,5-MIN,KSFO,010800Z,,1.0,280,7.0,0.0,,0.0,,0.0,San Francisco Bay Area AQMD,21.0,1.0,UG/M3,5.0,60750005.0,840060800000.0,4.0,San Francisco,0,Weekend,night,West,"(21, 5)",21,5,2612160_21_5
16947,507.34,153.46,30.69,2.51,0.51,0.0,2.03,4.14,201909010000,2.03,3.87,4.14,59080.0,-75.0,79.0,51.0,3.87,outside,0,#SFAQ14,,37.722391,-122.439178,592296,PMG4LJJ4IAVEFN2S,592297,57JK69GVB3UPKFSB,,,,San Francisco,San Francisco County,94112,2019/09/01T00:00,2019,9,1,0,0,23234,KSFO,SFO,5-MIN,KSFO,010800Z,,1.0,280,7.0,0.0,,0.0,,0.0,San Francisco Bay Area AQMD,21.0,1.0,UG/M3,5.0,60750005.0,840060800000.0,4.0,San Francisco,0,Weekend,night,West,"(21, 5)",21,5,2612160_21_5


## Using a trained decision tree model

This model takes as inputs:
epa_value, humidity, NDVI, elevation, temperature, wind_x, and wind_y, where the last two are having taken the wind direction and magnitude and decomposed it into component vectors

In [5]:
# load in the model from a pickle file
#from joblib import dump, load
#model = load('VirtualSensing/models/dtree_model.joblib')

In [5]:
# create a features dataframe
grid_data_df = boxes.copy(deep=True) 
grid_data_df = grid_data_df[grid_data_df.in_water == False]
grid_data_df = grid_data_df.drop(columns = ['min_lat', 'max_lat', 'min_lon', 'max_lon', 'in_water'])
grid_data_df.rename(columns={'center_lat': 'lat', 'center_lon': 'lon'}, inplace=True)

In [7]:
grid_data_df.head()

Unnamed: 0,x,y,lat,lon,ndvi
34,0,34,37.824436,-122.534739,-2000
35,0,35,37.827984,-122.534739,-2000
36,0,36,37.831531,-122.534739,-2000
37,0,37,37.835079,-122.534739,5159
38,0,38,37.838626,-122.534739,7053


### Add wind and EPA data

In [6]:
# for now, test it with a single data point
# eventually, put this in a for loop between start and end time constants
datetimestr = '2019/09/01 00:00'

noaa_df = pd.DataFrame(getNearestNoaaData(grid_data_df.iloc[0].lat, grid_data_df.iloc[0].lon, datetimestr)).T
epa_df = pd.DataFrame(getNearestEpaData(grid_data_df.iloc[0].lat, grid_data_df.iloc[0].lon, datetimestr)).T



In [7]:
# this takes REALLY long... about 45 minutes for one point in time
for row in range(1, len(grid_data_df)): #  cycle through all grid center points
    noaa_df = noaa_df.append(getNearestNoaaData(grid_data_df.iloc[row].lat, grid_data_df.iloc[row].lon, datetimestr))
    epa_df = epa_df.append(getNearestEpaData(grid_data_df.iloc[row].lat, grid_data_df.iloc[row].lon, datetimestr))
    print(row)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277


1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060


3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699


IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [9]:
data = {'created':list(epa_df.created), 'lat': grid_data_df.lat, 'lon': grid_data_df.lon, 'x': grid_data_df.x, 
        'y': grid_data_df.y, 'epa_pm25_value':list(epa_df.epa_pm25_value), 
        'wind_direction': list(noaa_df.wind_direction), 'wind_speed': list(noaa_df.wind_speed), 'ndvi': grid_data_df.ndvi}
X_data_df = pd.DataFrame(data)

In [10]:
just_in_case = X_data_df.copy(deep=True)

In [12]:
X_data_df = just_in_case.copy(deep=True)

In [13]:
X_data_df.to_csv(path_or_buf='data/late.csv', index=True)

In [11]:
# code from Jake to convert NOAA ASOS wind info to wind vecgtor feature for decision tree model
X_data_df['wind_direction'] = X_data_df['wind_direction'].map(lambda x: x if x is not None else 0)

# fill in no wind times
X_data_df['wind_speed'].fillna(0, inplace = True)
X_data_df['wind_direction'].replace('', 0, inplace = True)

# change VRB to 0
vrb = X_data_df[X_data_df.wind_direction == "VRB"].index
X_data_df.loc[vrb, 'wind_direction'] = 0
X_data_df.loc[vrb, 'wind_speed'] = 0

X_data_df['wind_x'] = X_data_df.apply(lambda x: x.wind_speed * math.cos(math.radians(int(x.wind_direction))), axis = 1)
X_data_df['wind_y'] = X_data_df.apply(lambda x: x.wind_speed * math.sin(math.radians(int(x.wind_direction))), axis = 1)

In [12]:
X_data_df = X_data_df.drop(['wind_direction', 'wind_speed'], axis=1)

In [13]:
X_data_df

Unnamed: 0,created,lat,lon,x,y,epa_pm25_value,ndvi,wind_x,wind_y
34,201909010000,37.824436,-122.534739,0,34,5.1,-2000,1.215537,-6.893654
35,201909010000,37.827984,-122.534739,0,35,5.1,-2000,1.215537,-6.893654
36,201909010000,37.831531,-122.534739,0,36,5.1,-2000,1.215537,-6.893654
37,201909010000,37.835079,-122.534739,0,37,5.1,5159,1.215537,-6.893654
38,201909010000,37.838626,-122.534739,0,38,5.1,7053,1.215537,-6.893654
...,...,...,...,...,...,...,...,...,...
6868,201909010000,37.994537,-122.184396,78,82,8.5,3827,-5.638156,2.052121
6869,201909010000,37.998076,-122.184396,78,83,8.5,3024,-5.638156,2.052121
6870,201909010000,38.001616,-122.184396,78,84,8.5,3110,-5.638156,2.052121
6871,201909010000,38.005155,-122.184396,78,85,8.5,3110,-5.638156,2.052121


### Add elevation data

In [14]:
import rasterio
elev_filepath = './data/grid_elevations.csv'

def makeElevationFile(lat, lon, x, y, filename):
    """
    This function takes as input two same-sized lists of latitudes and longitudes and
    a filepath for dumping the output.
    It outputs a csv file with lats, lons, and elevations at each lat-lon coordinate pair.
    """
    
    # error handling
    if (len(lat) != len(lon)):
        print("Error: The lists latitude and longitude passed to getElevation() differ in length.")    
    if (len(lat) == 0):
        print("Error: The lists latitude and longitude passed to getElevation() are empty.")
    
    elevation = 'data/srtm_12_05.tif'
    coords = []
    
    # create list of coordinate tuples
    for sensor in range(len(lat)):
        coords.append((lon[sensor], lat[sensor]))

    # get elevations off the .tif
    elevations = []
    with rasterio.open(elevation) as src:
        vals = src.sample(coords)
        for val in vals:
            elevations.append(val[0])
    
    # output dataframe with elevations to csv file
    data = {'lat': lat, 'lon': lon, 'x': x, 'y':y, 'elevation': elevations}
    file_df = pd.DataFrame(data)
    file_df.to_csv(path_or_buf=filename, index=True)
    
    return

def getElevations(data_df, filename):
    """
    This function takes as input a dataframe with latitutes and longitudes and 
    the filepath to a csv file with the same latitudes and longitudes and elevations.
    It returns a dataframe that includes the elevations. 
    """
    
    elev_df = pd.read_csv(filename, header='infer')
    elev_df = elev_df[['x','y','elevation']]
    
    data_df = pd.merge(data_df, elev_df)
    print("How many elevations are missing?", data_df.elevation.isna().sum())
    print("Shape of the new dataframe:", data_df.shape)
    
    return(data_df)

In [None]:
# RUN THIS ONLY IF YOU DON'T HAVE CSV FILE ALREADY AND HAVE THE TIF FILE
# put elevations into a csv
makeElevationFile(list(X_data_df.lat), list(X_data_df.lon), list(X_data_df.x), list(X_data_df.y), elev_filepath)

In [15]:
# NEED CSV FILE TO RUN
# get elevations from csv file and merge into dataframe
X_data_df = getElevations(X_data_df, elev_filepath)

How many elevations are missing? 0
Shape of the new dataframe: (4244, 10)


In [19]:
X_data_df

Unnamed: 0,created,lat,lon,x,y,epa_pm25_value,ndvi,wind_x,wind_y,elevation
0,201909010000,37.824436,-122.534739,0,34,5.1,-2000,1.215537,-6.893654,17
1,201909010000,37.827984,-122.534739,0,35,5.1,-2000,1.215537,-6.893654,-32768
2,201909010000,37.831531,-122.534739,0,36,5.1,-2000,1.215537,-6.893654,-32768
3,201909010000,37.835079,-122.534739,0,37,5.1,5159,1.215537,-6.893654,55
4,201909010000,37.838626,-122.534739,0,38,5.1,7053,1.215537,-6.893654,132
...,...,...,...,...,...,...,...,...,...,...
4239,201909010000,37.994537,-122.184396,78,82,8.5,3827,-5.638156,2.052121,177
4240,201909010000,37.998076,-122.184396,78,83,8.5,3024,-5.638156,2.052121,159
4241,201909010000,38.001616,-122.184396,78,84,8.5,3110,-5.638156,2.052121,203
4242,201909010000,38.005155,-122.184396,78,85,8.5,3110,-5.638156,2.052121,243


In [None]:
# DO NOT RUN THIS CELL

# why are some readings -32768
# badreadings_df = X_data_df[X_data_df.elevation == -32768]

# plot top most polluted virtual sensor locations

#gmap3=gmplot.GoogleMapPlotter(badreadings_df.lat.iloc[0], badreadings_df.lon.iloc[0], 10, apikey = "AIzaSyA2TdrwntJVu6IuS_3fOY7WLTLvhl3xntk")
#gmap3.coloricon = "http://www.googlemapsmarkers.com/v1/%s/"
#for sensor in range(len(badreadings_df)):
#    gmap3.marker(badreadings_df.lat.iloc[sensor], badreadings_df.lon.iloc[sensor], color='cornflowerblue', title=sensor)#, title=map_df.pred_PM2_5)
#gmap3.draw("data/badreadings_map.html") 

# inspecting the map, it seems these are readings right on a coastline and the elevation tif thinks it's on water
# given these are all on the edge of the ocean, setting these elevations to 0 would seem to be a reasonable response

In [16]:
# set a few grid points at seashore and read from TIF as ocean to 0
X_data_df.elevation.replace(-32768, 0, inplace=True)

### Add humidity, temperature, and neighbors data

In [17]:
# DO NOT RUN, LOAD THE CSV FILE INSTEAD
# Create csv of virtual sensors, each with a list of real sensors in order of closest to farthest away
getreading_df = pd.read_json(path_or_buf="https://www.purpleair.com/json")

In [None]:
# DO NOT RUN, LOAD THE CSV FILE INSTEAD
PAsample = pd.DataFrame.from_records(getreading_df.results)
PAsample = PAsample[['ID', 'Lat', 'Lon']]
PAsample['coords'] = list(zip(PAsample.Lat, PAsample.Lon))
PAsample.drop_duplicates(subset ="coords", inplace = True) 
PAsample = PAsample[(PAsample.Lat <= UP_LEFT[0]) & (PAsample.Lat >= DOWN_LEFT[0]) & 
                    (PAsample.Lon >= UP_LEFT[1]) & (PAsample.Lon <= UP_RIGHT[1])] # just keep sensors in bounding box
proximity_df = PAsample.copy(deep=True)
proximity_df.rename(columns={'ID':'sensor_id', 'Lat':'lan', 'Lon':'lon'}, inplace=True)
num_PA_sensors = len(PAsample)
empty_col = [100000] * num_PA_sensors

for row in range(len(X_data_df)):
    col_name = (X_data_df.iloc[row].lat, X_data_df.iloc[row].lon)
    col_name = str(col_name)
    proximity_df[col_name] = empty_col

In [None]:
# DO NOT RUN, LOAD IN THE CSV INSTEAD
def calcDistance(origin, destination):
    """
    This helper function inputs two tuples, each containing (lat, lon)
    It returns distance between the two coordinates.
    """

    origin = geopy.point.Point(origin)
    destination = geopy.point.Point(destination)
    distance = geopy.distance.distance(origin, destination).km

    return distance

grid_coords = proximity_df.columns[4:]
counter = 0

for grid_coord in grid_coords:
    counter += 1
    print("Counter", counter, "Grid coord", grid_coord, "Sensor", sensor_coord)
    for sensor_coord in proximity_df.coords:
        distance = calcDistance(tuple(float(s) for s in grid_coord.strip("()").split(",")), sensor_coord)
        proximity_df[grid_coord][proximity_df.coords == sensor_coord] = distance

In [None]:
# DO NOT RUN, JUST LOAD IN THE CSV FILE
# save to csv file
proximity_df.to_csv(path_or_buf="./data/distances.csv", index=True)

In [18]:
# load in csv 
# this is a table of distances between all virtual sensors and actual sensors
distances_df = pd.read_csv("./data/distances.csv")  
distances_df.sensor_id = distances_df.sensor_id.astype(str)

In [19]:
distances_df.head()

Unnamed: 0.1,Unnamed: 0,sensor_id,lan,lon,coords,"(37.824435864921405, -122.5347392117897)","(37.82798364685624, -122.5347392117897)","(37.83153125822582, -122.5347392117897)","(37.83507869902475, -122.5347392117897)","(37.838625969247616, -122.5347392117897)","(37.84217306888904, -122.5347392117897)","(37.84571999794363, -122.5347392117897)","(37.84926675640598, -122.5347392117897)","(37.85281334427072, -122.5347392117897)","(37.85635976153246, -122.5347392117897)","(37.85990600818583, -122.5347392117897)","(37.863452084225436, -122.5347392117897)","(37.86699798964593, -122.5347392117897)","(37.87054372444192, -122.5347392117897)","(37.87408928860804, -122.5347392117897)","(37.87763468213893, -122.5347392117897)","(37.88117990502924, -122.5347392117897)","(37.884724957273576, -122.5347392117897)","(37.88826983886661, -122.5347392117897)","(37.89181454980297, -122.5347392117897)","(37.895359090077335, -122.5347392117897)","(37.89890345968432, -122.5347392117897)","(37.902447658618605, -122.5347392117897)","(37.90599168687483, -122.5347392117897)","(37.90953554444767, -122.5347392117897)","(37.913079231331785, -122.5347392117897)","(37.91662274752183, -122.5347392117897)","(37.92016609301249, -122.5347392117897)","(37.92370926779842, -122.5347392117897)","(37.927252271874316, -122.5347392117897)","(37.93079510523484, -122.5347392117897)","(37.934337767874666, -122.5347392117897)","(37.93788025978849, -122.5347392117897)","(37.94142258097101, -122.5347392117897)","(37.944964731416896, -122.5347392117897)","(37.94850671112083, -122.5347392117897)","(37.952048520077525, -122.5347392117897)","(37.95559015828168, -122.5347392117897)","(37.959131625728, -122.5347392117897)","(37.96267292241117, -122.5347392117897)","(37.966214048325895, -122.5347392117897)","(37.96975500346692, -122.5347392117897)","(37.973295787828924, -122.5347392117897)","(37.976836401406615, -122.5347392117897)","(37.98037684419473, -122.5347392117897)","(37.983917116188, -122.5347392117897)","(37.98745721738111, -122.5347392117897)","(37.99099714776882, -122.5347392117897)","(37.99453690734586, -122.5347392117897)","(37.998076496106954, -122.5347392117897)","(38.00161591404681, -122.5347392117897)","(38.00515516116021, -122.5347392117897)","(38.008694237441865, -122.5347392117897)","(37.817339789377584, -122.53024763536914)","(37.82088791242671, -122.53024763536914)","(37.824435864921405, -122.53024763536914)","(37.82798364685624, -122.53024763536914)","(37.83153125822582, -122.53024763536914)","(37.83507869902475, -122.53024763536914)","(37.838625969247616, -122.53024763536914)","(37.84217306888903, -122.53024763536914)","(37.84571999794363, -122.53024763536914)","(37.84926675640598, -122.53024763536914)","(37.85281334427072, -122.53024763536914)","(37.85635976153246, -122.53024763536914)","(37.859906008185824, -122.53024763536914)","(37.863452084225436, -122.53024763536914)","(37.86699798964593, -122.53024763536914)","(37.87054372444192, -122.53024763536914)","(37.87408928860804, -122.53024763536914)","(37.87763468213894, -122.53024763536914)","(37.88117990502923, -122.53024763536914)","(37.884724957273576, -122.53024763536914)","(37.88826983886661, -122.53024763536914)","(37.89181454980297, -122.53024763536914)","(37.895359090077335, -122.53024763536914)","(37.89890345968432, -122.53024763536914)","(37.902447658618605, -122.53024763536914)","(37.90599168687483, -122.53024763536914)","(37.90953554444767, -122.53024763536914)","(37.913079231331785, -122.53024763536914)","(37.91662274752184, -122.53024763536914)","(37.9201660930125, -122.53024763536914)","(37.923709267798415, -122.53024763536914)","(37.927252271874316, -122.53024763536914)","(37.93079510523484, -122.53024763536914)","(37.934337767874666, -122.53024763536914)","(37.93788025978849, -122.53024763536914)","(37.941422580971, -122.53024763536914)","(37.944964731416896, -122.53024763536914)","(37.94850671112083, -122.53024763536914)","(37.952048520077525, -122.53024763536914)","(37.95559015828168, -122.53024763536914)","(37.959131625728, -122.53024763536914)","(37.96267292241117, -122.53024763536914)","(37.9662140483259, -122.53024763536914)","(37.96975500346692, -122.53024763536914)","(37.973295787828924, -122.53024763536914)","(37.976836401406615, -122.53024763536914)","(37.980376844194744, -122.53024763536914)","(37.983917116188, -122.53024763536914)","(37.98745721738111, -122.53024763536914)","(37.990997147768844, -122.53024763536914)","(37.99453690734586, -122.53024763536914)","(37.99807649610694, -122.53024763536914)","(38.00161591404681, -122.53024763536914)","(38.00515516116021, -122.53024763536914)","(38.008694237441865, -122.53024763536914)","(37.824435864921405, -122.5257560589485)","(37.82798364685624, -122.5257560589485)","(37.83153125822582, -122.5257560589485)","(37.83507869902475, -122.5257560589485)","(37.838625969247616, -122.5257560589485)","(37.84217306888904, -122.5257560589485)","(37.84571999794362, -122.5257560589485)","(37.84926675640598, -122.5257560589485)","(37.85281334427072, -122.5257560589485)","(37.85635976153246, -122.5257560589485)","(37.85990600818583, -122.5257560589485)","(37.863452084225436, -122.5257560589485)","(37.86699798964594, -122.5257560589485)","(37.87054372444192, -122.5257560589485)","(37.87408928860804, -122.5257560589485)","(37.87763468213893, -122.5257560589485)","(37.88117990502924, -122.5257560589485)","(37.884724957273576, -122.5257560589485)","(37.88826983886661, -122.5257560589485)","(37.89181454980299, -122.5257560589485)","(37.89535909007733, -122.5257560589485)","(37.89890345968432, -122.5257560589485)","(37.902447658618605, -122.5257560589485)","(37.90599168687483, -122.5257560589485)","(37.909535544447664, -122.5257560589485)","(37.913079231331785, -122.5257560589485)","(37.91662274752184, -122.5257560589485)","(37.92016609301249, -122.5257560589485)","(37.92370926779843, -122.5257560589485)","(37.927252271874316, -122.5257560589485)","(37.93079510523484, -122.5257560589485)","(37.934337767874666, -122.5257560589485)","(37.93788025978849, -122.5257560589485)","(37.941422580971, -122.5257560589485)","(37.944964731416896, -122.5257560589485)","(37.94850671112083, -122.5257560589485)","(37.952048520077525, -122.5257560589485)","(37.95559015828169, -122.5257560589485)","(37.959131625728, -122.5257560589485)","(37.96267292241117, -122.5257560589485)","(37.966214048325895, -122.5257560589485)","(37.96975500346692, -122.5257560589485)","(37.973295787828924, -122.5257560589485)","(37.976836401406615, -122.5257560589485)","(37.98037684419473, -122.5257560589485)","(37.983917116188, -122.5257560589485)","(37.98745721738111, -122.5257560589485)","(37.990997147768844, -122.5257560589485)","(37.99453690734586, -122.5257560589485)","(37.99807649610694, -122.5257560589485)","(38.00161591404681, -122.5257560589485)","(38.00515516116021, -122.5257560589485)","(38.008694237441865, -122.5257560589485)","(37.82798364685624, -122.52126448252791)","(37.83153125822582, -122.52126448252791)","(37.83507869902475, -122.52126448252791)","(37.838625969247616, -122.52126448252791)","(37.84217306888903, -122.52126448252791)","(37.84571999794363, -122.52126448252791)","(37.84926675640598, -122.52126448252791)","(37.85281334427072, -122.52126448252791)","(37.85635976153246, -122.52126448252791)","(37.859906008185824, -122.52126448252791)","(37.863452084225436, -122.52126448252791)","(37.86699798964593, -122.52126448252791)","(37.87054372444192, -122.52126448252791)","(37.87408928860804, -122.52126448252791)","(37.87763468213894, -122.52126448252791)","(37.88117990502923, -122.52126448252791)","(37.884724957273576, -122.52126448252791)","(37.89181454980297, -122.52126448252791)","(37.895359090077335, -122.52126448252791)","(37.89890345968432, -122.52126448252791)","(37.902447658618605, -122.52126448252791)","(37.90599168687483, -122.52126448252791)","(37.90953554444767, -122.52126448252791)","(37.913079231331785, -122.52126448252791)","(37.91662274752184, -122.52126448252791)","(37.9201660930125, -122.52126448252791)","(37.92370926779842, -122.52126448252791)","(37.927252271874316, -122.52126448252791)","(37.93079510523484, -122.52126448252791)","(37.934337767874666, -122.52126448252791)","(37.9378802597885, -122.52126448252791)","(37.941422580971, -122.52126448252791)","(37.944964731416896, -122.52126448252791)","(37.94850671112083, -122.52126448252791)","(37.952048520077525, -122.52126448252791)","(37.95559015828168, -122.52126448252791)","(37.959131625728006, -122.52126448252791)","(37.96267292241117, -122.52126448252791)","(37.966214048325895, -122.52126448252791)","(37.96975500346692, -122.52126448252791)","(37.97329578782893, -122.52126448252791)","(37.976836401406615, -122.52126448252791)","(37.98037684419473, -122.52126448252791)","(37.983917116188, -122.52126448252791)","(37.98745721738111, -122.52126448252791)","(37.99099714776882, -122.52126448252791)","(37.99453690734586, -122.52126448252791)","(37.998076496106954, -122.52126448252791)","(38.00161591404681, -122.52126448252791)","(38.00515516116021, -122.52126448252791)","(38.008694237441865, -122.52126448252791)","(37.778299180792345, -122.51677290610729)","(37.82798364685624, -122.51677290610729)","(37.83153125822582, -122.51677290610729)","(37.83507869902475, -122.51677290610729)","(37.838625969247616, -122.51677290610729)","(37.84217306888904, -122.51677290610729)","(37.84571999794362, -122.51677290610729)","(37.84926675640598, -122.51677290610729)","(37.85281334427072, -122.51677290610729)","(37.85635976153246, -122.51677290610729)","(37.859906008185824, -122.51677290610729)","(37.863452084225436, -122.51677290610729)","(37.86699798964593, -122.51677290610729)","(37.87054372444192, -122.51677290610729)","(37.87408928860804, -122.51677290610729)","(37.87763468213893, -122.51677290610729)","(37.88117990502924, -122.51677290610729)","(37.88826983886661, -122.51677290610729)","(37.89181454980299, -122.51677290610729)","(37.89535909007733, -122.51677290610729)","(37.89890345968432, -122.51677290610729)","(37.902447658618605, -122.51677290610729)","(37.90599168687483, -122.51677290610729)","(37.90953554444767, -122.51677290610729)","(37.913079231331785, -122.51677290610729)","(37.91662274752184, -122.51677290610729)","(37.9201660930125, -122.51677290610729)","(37.92370926779843, -122.51677290610729)","(37.927252271874316, -122.51677290610729)","(37.93079510523484, -122.51677290610729)","(37.934337767874666, -122.51677290610729)","(37.93788025978849, -122.51677290610729)","(37.94142258097101, -122.51677290610729)",...,"(37.742789816872005, -122.19337940382427)","(37.74634152031077, -122.19337940382427)","(37.74989305330926, -122.19337940382427)","(37.75344441586203, -122.19337940382427)","(37.7569956079636, -122.19337940382427)","(37.760546629608505, -122.19337940382427)","(37.764097480791314, -122.19337940382427)","(37.767648161506536, -122.19337940382427)","(37.771198671748756, -122.19337940382427)","(37.7747490115125, -122.19337940382427)","(37.778299180792345, -122.19337940382427)","(37.781849179582814, -122.19337940382427)","(37.785399007878496, -122.19337940382427)","(37.788948665673935, -122.19337940382427)","(37.79249815296373, -122.19337940382427)","(37.7960474697424, -122.19337940382427)","(37.79959661600455, -122.19337940382427)","(37.80314559174475, -122.19337940382427)","(37.80669439695757, -122.19337940382427)","(37.810243031637604, -122.19337940382427)","(37.813791495779405, -122.19337940382427)","(37.817339789377584, -122.19337940382427)","(37.82088791242671, -122.19337940382427)","(37.824435864921405, -122.19337940382427)","(37.82798364685624, -122.19337940382427)","(37.83153125822582, -122.19337940382427)","(37.83507869902475, -122.19337940382427)","(37.838625969247616, -122.19337940382427)","(37.84217306888904, -122.19337940382427)","(37.84571999794363, -122.19337940382427)","(37.84926675640598, -122.19337940382427)","(37.85281334427072, -122.19337940382427)","(37.85635976153246, -122.19337940382427)","(37.859906008185824, -122.19337940382427)","(37.863452084225436, -122.19337940382427)","(37.86699798964592, -122.19337940382427)","(37.87054372444192, -122.19337940382427)","(37.87408928860804, -122.19337940382427)","(37.87763468213893, -122.19337940382427)","(37.88117990502923, -122.19337940382427)","(37.884724957273576, -122.19337940382427)","(37.88826983886661, -122.19337940382427)","(37.89181454980297, -122.19337940382427)","(37.895359090077335, -122.19337940382427)","(37.89890345968432, -122.19337940382427)","(37.902447658618605, -122.19337940382427)","(37.90599168687483, -122.19337940382427)","(37.90953554444767, -122.19337940382427)","(37.913079231331785, -122.19337940382427)","(37.91662274752184, -122.19337940382427)","(37.92016609301249, -122.19337940382427)","(37.92370926779843, -122.19337940382427)","(37.927252271874316, -122.19337940382427)","(37.93079510523484, -122.19337940382427)","(37.934337767874666, -122.19337940382427)","(37.93788025978849, -122.19337940382427)","(37.941422580971, -122.19337940382427)","(37.944964731416896, -122.19337940382427)","(37.94850671112083, -122.19337940382427)","(37.952048520077525, -122.19337940382427)","(37.95559015828168, -122.19337940382427)","(37.959131625728, -122.19337940382427)","(37.96267292241117, -122.19337940382427)","(37.966214048325895, -122.19337940382427)","(37.96975500346692, -122.19337940382427)","(37.973295787828924, -122.19337940382427)","(37.976836401406615, -122.19337940382427)","(37.98037684419473, -122.19337940382427)","(37.983917116188, -122.19337940382427)","(37.98745721738111, -122.19337940382427)","(37.990997147768844, -122.19337940382427)","(37.99453690734586, -122.19337940382427)","(37.99807649610694, -122.19337940382427)","(38.00161591404681, -122.19337940382427)","(38.00515516116021, -122.19337940382427)","(38.008694237441865, -122.19337940382427)","(37.70370983155435, -122.18888782740368)","(37.70726340947444, -122.18888782740368)","(37.71081681701458, -122.18888782740368)","(37.71437005416927, -122.18888782740368)","(37.71792312093302, -122.18888782740368)","(37.721476017300354, -122.18888782740368)","(37.725028743265746, -122.18888782740368)","(37.72858129882374, -122.18888782740368)","(37.73213368396885, -122.18888782740368)","(37.73568589869557, -122.18888782740368)","(37.739237942998464, -122.18888782740368)","(37.742789816872005, -122.18888782740368)","(37.74634152031077, -122.18888782740368)","(37.74989305330927, -122.18888782740368)","(37.75344441586203, -122.18888782740368)","(37.7569956079636, -122.18888782740368)","(37.760546629608505, -122.18888782740368)","(37.764097480791314, -122.18888782740368)","(37.767648161506536, -122.18888782740368)","(37.771198671748756, -122.18888782740368)","(37.77474901151251, -122.18888782740368)","(37.778299180792345, -122.18888782740368)","(37.781849179582814, -122.18888782740368)","(37.785399007878496, -122.18888782740368)","(37.788948665673935, -122.18888782740368)","(37.79249815296373, -122.18888782740368)","(37.7960474697424, -122.18888782740368)","(37.79959661600455, -122.18888782740368)","(37.80314559174475, -122.18888782740368)","(37.80669439695758, -122.18888782740368)","(37.810243031637604, -122.18888782740368)","(37.813791495779405, -122.18888782740368)","(37.817339789377584, -122.18888782740368)","(37.82088791242671, -122.18888782740368)","(37.824435864921405, -122.18888782740368)","(37.82798364685624, -122.18888782740368)","(37.83153125822582, -122.18888782740368)","(37.83507869902475, -122.18888782740368)","(37.838625969247616, -122.18888782740368)","(37.84217306888904, -122.18888782740368)","(37.84571999794363, -122.18888782740368)","(37.84926675640598, -122.18888782740368)","(37.85281334427072, -122.18888782740368)","(37.85635976153246, -122.18888782740368)","(37.859906008185824, -122.18888782740368)","(37.863452084225436, -122.18888782740368)","(37.86699798964593, -122.18888782740368)","(37.87054372444192, -122.18888782740368)","(37.87408928860804, -122.18888782740368)","(37.87763468213893, -122.18888782740368)","(37.88117990502923, -122.18888782740368)","(37.884724957273576, -122.18888782740368)","(37.88826983886661, -122.18888782740368)","(37.89181454980297, -122.18888782740368)","(37.895359090077335, -122.18888782740368)","(37.89890345968432, -122.18888782740368)","(37.902447658618605, -122.18888782740368)","(37.90599168687483, -122.18888782740368)","(37.90953554444767, -122.18888782740368)","(37.913079231331785, -122.18888782740368)","(37.91662274752184, -122.18888782740368)","(37.92016609301249, -122.18888782740368)","(37.923709267798415, -122.18888782740368)","(37.927252271874316, -122.18888782740368)","(37.93079510523484, -122.18888782740368)","(37.934337767874666, -122.18888782740368)","(37.93788025978849, -122.18888782740368)","(37.941422580971, -122.18888782740368)","(37.944964731416896, -122.18888782740368)","(37.94850671112083, -122.18888782740368)","(37.952048520077525, -122.18888782740368)","(37.95559015828168, -122.18888782740368)","(37.959131625728, -122.18888782740368)","(37.96267292241117, -122.18888782740368)","(37.966214048325895, -122.18888782740368)","(37.96975500346692, -122.18888782740368)","(37.973295787828924, -122.18888782740368)","(37.976836401406615, -122.18888782740368)","(37.98037684419473, -122.18888782740368)","(37.983917116188, -122.18888782740368)","(37.98745721738111, -122.18888782740368)","(37.990997147768844, -122.18888782740368)","(37.99453690734586, -122.18888782740368)","(37.998076496106954, -122.18888782740368)","(38.00161591404681, -122.18888782740368)","(38.00515516116021, -122.18888782740368)","(38.008694237441865, -122.18888782740368)","(37.70370983155435, -122.18439625098308)","(37.70726340947446, -122.18439625098308)","(37.71081681701458, -122.18439625098308)","(37.71437005416927, -122.18439625098308)","(37.71792312093302, -122.18439625098308)","(37.721476017300354, -122.18439625098308)","(37.725028743265746, -122.18439625098308)","(37.72858129882374, -122.18439625098308)","(37.73213368396885, -122.18439625098308)","(37.73568589869557, -122.18439625098308)","(37.739237942998464, -122.18439625098308)","(37.742789816872005, -122.18439625098308)","(37.74634152031077, -122.18439625098308)","(37.74989305330926, -122.18439625098308)","(37.75344441586203, -122.18439625098308)","(37.7569956079636, -122.18439625098308)","(37.760546629608505, -122.18439625098308)","(37.764097480791314, -122.18439625098308)","(37.767648161506536, -122.18439625098308)","(37.771198671748756, -122.18439625098308)","(37.77474901151251, -122.18439625098308)","(37.778299180792345, -122.18439625098308)","(37.781849179582814, -122.18439625098308)","(37.785399007878496, -122.18439625098308)","(37.788948665673935, -122.18439625098308)","(37.79249815296373, -122.18439625098308)","(37.7960474697424, -122.18439625098308)","(37.79959661600455, -122.18439625098308)","(37.80314559174475, -122.18439625098308)","(37.80669439695758, -122.18439625098308)","(37.810243031637604, -122.18439625098308)","(37.813791495779405, -122.18439625098308)","(37.817339789377584, -122.18439625098308)","(37.82088791242671, -122.18439625098308)","(37.824435864921405, -122.18439625098308)","(37.82798364685624, -122.18439625098308)","(37.83153125822582, -122.18439625098308)","(37.83507869902475, -122.18439625098308)","(37.838625969247616, -122.18439625098308)","(37.84217306888904, -122.18439625098308)","(37.84571999794363, -122.18439625098308)","(37.84926675640598, -122.18439625098308)","(37.85281334427072, -122.18439625098308)","(37.85635976153246, -122.18439625098308)","(37.85990600818583, -122.18439625098308)","(37.863452084225436, -122.18439625098308)","(37.86699798964593, -122.18439625098308)","(37.87054372444192, -122.18439625098308)","(37.87408928860805, -122.18439625098308)","(37.87763468213893, -122.18439625098308)","(37.88117990502923, -122.18439625098308)","(37.884724957273576, -122.18439625098308)","(37.88826983886661, -122.18439625098308)","(37.89181454980297, -122.18439625098308)","(37.895359090077335, -122.18439625098308)","(37.89890345968432, -122.18439625098308)","(37.902447658618605, -122.18439625098308)","(37.90599168687483, -122.18439625098308)","(37.90953554444767, -122.18439625098308)","(37.913079231331785, -122.18439625098308)","(37.91662274752184, -122.18439625098308)","(37.9201660930125, -122.18439625098308)","(37.92370926779842, -122.18439625098308)","(37.927252271874316, -122.18439625098308)","(37.93079510523484, -122.18439625098308)","(37.934337767874666, -122.18439625098308)","(37.9378802597885, -122.18439625098308)","(37.941422580971, -122.18439625098308)","(37.944964731416896, -122.18439625098308)","(37.94850671112083, -122.18439625098308)","(37.952048520077525, -122.18439625098308)","(37.95559015828169, -122.18439625098308)","(37.959131625728006, -122.18439625098308)","(37.96267292241117, -122.18439625098308)","(37.966214048325895, -122.18439625098308)","(37.96975500346692, -122.18439625098308)","(37.973295787828924, -122.18439625098308)","(37.976836401406615, -122.18439625098308)","(37.98037684419473, -122.18439625098308)","(37.983917116188, -122.18439625098308)","(37.98745721738111, -122.18439625098308)","(37.990997147768844, -122.18439625098308)","(37.99453690734587, -122.18439625098308)","(37.998076496106954, -122.18439625098308)","(38.00161591404681, -122.18439625098308)","(38.00515516116021, -122.18439625098308)","(38.008694237441865, -122.18439625098308)"
0,10,16939,37.72244,-122.439302,"(37.72244, -122.439302)",14.101673,14.419591,14.741157,15.066136,15.39431,15.725477,16.059452,16.396061,16.735143,17.076551,17.420146,17.7658,18.113395,18.462818,18.813968,19.166747,19.521068,19.876845,20.234002,20.592465,20.952166,21.313042,21.675032,22.038082,22.402137,22.76715,23.133074,23.499865,23.867483,24.235889,24.605046,24.974921,25.345481,25.716696,26.088536,26.460976,26.833988,27.207549,27.581635,27.956225,28.331298,28.706833,29.082814,29.459221,29.836037,30.213248,30.590837,30.968789,31.347092,31.725732,32.104695,32.483971,32.863548,13.234535,13.549952,13.869363,14.192497,14.519105,14.848954,15.181834,15.517547,15.855911,16.19676,16.539938,16.885303,17.232721,17.582069,17.933233,18.286108,18.640595,18.996603,19.354047,19.712847,20.072929,20.434226,20.796671,21.160206,21.524775,21.890323,22.256803,22.624168,22.992374,23.361381,23.731151,24.101647,24.472836,24.844685,25.217165,25.590247,25.963905,26.338113,26.712846,27.088084,27.463803,27.839984,28.216607,28.593655,28.971109,29.348954,29.727174,30.105753,30.484679,30.863937,31.243514,31.623399,32.003579,32.384044,32.764783,13.644577,13.972921,14.304553,14.639248,14.976799,15.317016,15.659723,16.00476,16.351978,16.701238,17.052415,17.405391,17.760058,18.116314,18.474068,18.833232,19.193725,19.555475,19.91841,20.282466,20.647583,21.013705,21.380778,21.748754,22.117586,22.487232,22.857651,23.228804,23.600658,23.973177,24.34633,24.720089,25.094424,25.46931,25.844722,26.220636,26.597031,26.973885,27.351178,27.728892,28.10701,28.485513,28.864387,29.243616,29.623186,30.003082,30.383293,30.763806,31.144608,31.525689,31.907038,32.288646,32.670501,13.761222,14.097845,14.437342,14.779515,15.124179,15.471168,15.820327,16.171513,16.524597,16.879458,17.235984,17.594074,17.953632,18.314572,18.67681,19.040274,19.404891,20.137333,20.505042,20.87367,21.24317,21.613495,21.984603,22.356454,22.729011,23.102237,23.476101,23.850572,24.22562,24.601218,24.977341,25.353963,25.731064,26.10862,26.486612,26.86502,27.243826,27.623013,28.002565,28.382467,28.762703,29.14326,29.524124,29.905284,30.286727,30.668442,31.050419,31.432647,31.815116,32.197816,32.580741,9.222423,13.557769,13.89933,14.243569,14.590296,14.939335,15.290528,15.643728,15.998799,16.35562,16.714077,17.074065,17.435488,17.798258,18.162292,18.527516,18.893858,19.629644,19.998971,20.369184,20.740234,21.112076,21.484667,21.857968,22.231943,22.606558,22.981779,23.357577,23.733923,24.110791,24.488156,24.865994,25.244283,...,21.795518,21.839376,21.890243,21.94807,22.012801,22.084375,22.162724,22.247776,22.339452,22.437672,22.542347,22.653389,22.770702,22.89419,23.023752,23.159285,23.300684,23.447842,23.600651,23.758999,23.922778,24.091874,24.266175,24.44557,24.629947,24.819192,25.013196,25.211847,25.415035,25.622652,25.83459,26.050742,26.271004,26.495273,26.723446,26.955425,27.191109,27.430404,27.673215,27.919449,28.169016,28.421827,28.677795,28.936836,29.198868,29.46381,29.731583,30.002112,30.27532,30.551136,30.829489,31.11031,31.393532,31.679091,31.966922,32.256964,32.549158,32.843445,33.13977,33.438076,33.738311,34.040424,34.344363,34.650081,34.95753,35.266664,35.577439,35.889811,36.203738,36.51918,36.836097,37.154451,37.474205,37.795322,38.117768,38.441508,22.177554,22.143539,22.116507,22.096484,22.083487,22.077528,22.078611,22.086734,22.10189,22.124061,22.153227,22.189358,22.23242,22.282372,22.339165,22.402748,22.473062,22.550042,22.63362,22.723721,22.820269,22.923179,23.032367,23.147742,23.269211,23.396679,23.530046,23.669213,23.814077,23.964533,24.120476,24.281799,24.448395,24.620156,24.796973,24.978739,25.165346,25.356685,25.552649,25.753133,25.95803,26.167235,26.380647,26.598161,26.819679,27.0451,27.274328,27.507265,27.743818,27.983895,28.227404,28.474258,28.724368,28.97765,29.23402,29.493398,29.755703,30.020859,30.28879,30.559422,30.832683,31.108504,31.386815,31.667552,31.950648,32.236042,32.523672,32.813478,33.105403,33.399391,33.695387,33.993337,34.293191,34.594898,34.89841,35.203678,35.510658,35.819304,36.129573,36.441424,36.754815,37.069706,37.38606,37.703839,38.023006,38.343526,38.665365,22.571879,22.53844,22.511864,22.492174,22.479387,22.473514,22.474559,22.482521,22.497391,22.519154,22.54779,22.583272,22.625565,22.674632,22.730427,22.7929,22.861995,22.937652,23.019804,23.108382,23.203311,23.304512,23.411903,23.525399,23.64491,23.770345,23.90161,24.038609,24.181242,24.32941,24.483012,24.641945,24.806105,24.975389,25.149693,25.328911,25.51294,25.701675,25.895012,26.092849,26.295083,26.501612,26.712337,26.927157,27.145974,27.368693,27.595217,27.825453,28.059309,28.296694,28.53752,28.781699,29.029145,29.279777,29.53351,29.790266,30.049967,30.312535,30.577896,30.845978,31.116709,31.390021,31.665845,31.944116,32.22477,32.507744,32.792978,33.080412,33.36999,33.661655,33.955352,34.251029,34.548634,34.848117,35.14943,35.452525,35.757357,36.06388,36.372052,36.681829,36.993172,37.306041,37.620396,37.936201,38.253418,38.572013,38.89195
1,12,16919,37.722456,-122.43939,"(37.722456, -122.43939)",14.095625,14.413628,14.735277,15.060337,15.388589,15.719833,16.053881,16.390562,16.729714,17.07119,17.414851,17.760569,18.108225,18.457709,18.808917,19.161753,19.516129,19.87196,20.229168,20.587681,20.947432,21.308355,21.670392,22.033486,22.397585,22.762641,23.128606,23.495437,23.863094,24.231538,24.600732,24.970643,25.341238,25.712487,26.084361,26.456833,26.829877,27.203469,27.577585,27.952204,28.327306,28.702869,29.078877,29.455311,29.832154,30.209389,30.587003,30.96498,31.343307,31.72197,32.100956,32.480254,32.859853,13.228428,13.543936,13.863435,14.186655,14.513346,14.843277,15.176235,15.512024,15.850463,16.191383,16.534631,16.880063,17.227546,17.576957,17.928183,18.281117,18.635662,18.991726,19.349223,19.708076,20.068209,20.429555,20.792049,21.155631,21.520244,21.885836,22.252359,22.619765,22.988012,23.357058,23.726865,24.097399,24.468623,24.840508,25.213022,25.586138,25.959828,26.334067,26.708831,27.084098,27.459847,27.836056,28.212707,28.589782,28.967263,29.345134,29.723379,30.101983,30.480933,30.860214,31.239814,31.619721,31.999924,32.38041,32.76117,13.638776,13.967207,14.298923,14.633699,14.971328,15.311621,15.654403,15.999511,16.346797,16.696124,17.047366,17.400404,17.755131,18.111446,18.469256,18.828475,19.189022,19.550822,19.913808,20.277912,20.643076,21.009243,21.37636,21.744379,22.113253,22.482939,22.853397,23.224588,23.596479,23.969034,24.342222,24.716015,25.090383,25.465301,25.840745,26.216689,26.593114,26.969996,27.347318,27.72506,28.103204,28.481734,28.860633,29.239887,29.619481,29.999401,30.379635,30.76017,31.140995,31.522098,31.903468,32.285096,32.666971,13.755643,14.092349,14.431928,14.774179,15.118919,15.465981,15.81521,16.166465,16.519614,16.874538,17.231126,17.589275,17.948891,18.309886,18.672178,19.035693,19.400361,20.132899,20.500653,20.869325,21.238868,21.609235,21.980383,22.352273,22.724867,23.098131,23.472031,23.846536,24.221618,24.597249,24.973403,25.350057,25.727188,26.104773,26.482794,26.861229,27.240063,27.619277,27.998855,28.378781,28.759042,29.139623,29.520511,29.901693,30.283159,30.664896,31.046893,31.429142,31.811631,32.194352,32.577295,9.215488,13.552332,13.893977,14.238296,14.585101,14.934215,15.28548,15.638749,15.993888,16.350773,16.709292,17.06934,17.430821,17.793647,18.157736,18.523012,18.889405,19.625286,19.994659,20.364916,20.736009,21.107892,21.480523,21.853864,22.227877,22.602527,22.977784,23.353616,23.729996,24.106897,24.484293,24.862162,25.240481,...,21.803047,21.846857,21.897675,21.95545,22.020128,22.091646,22.169937,22.254929,22.346545,22.444702,22.549313,22.66029,22.777536,22.900956,23.030449,23.165913,23.307241,23.454328,23.607065,23.765341,23.929046,24.098069,24.272297,24.451618,24.63592,24.825092,25.019022,25.217598,25.420713,25.628256,25.840121,26.0562,26.27639,26.500586,26.728688,26.960594,27.196208,27.435433,27.678174,27.924339,28.173837,28.426581,28.682482,28.941457,29.203423,29.4683,29.736009,30.006474,30.27962,30.555374,30.833665,31.114426,31.397589,31.683088,31.970861,32.260846,32.552984,32.847215,33.143485,33.441737,33.741918,34.043978,34.347866,34.653532,34.960931,35.270015,35.58074,35.893064,36.206944,36.522339,36.83921,37.157518,37.477227,37.798299,38.120701,38.444398,22.185444,22.151408,22.124354,22.104306,22.091281,22.085292,22.086343,22.094432,22.10955,22.131681,22.160805,22.196892,22.239908,22.289811,22.346554,22.410084,22.480343,22.557267,22.640787,22.730829,22.827315,22.930164,23.039287,23.154597,23.276,23.403401,23.5367,23.675798,23.820592,23.970977,24.126849,24.288101,24.454625,24.626314,24.803059,24.984752,25.171286,25.362552,25.558444,25.758855,25.963679,26.172813,26.386152,26.603596,26.825042,27.050392,27.279549,27.512417,27.748901,27.988909,28.23235,28.479135,28.729178,28.982394,29.238698,29.498011,29.760252,30.025344,30.293212,30.563782,30.836982,31.112741,31.390993,31.67167,31.954708,32.240043,32.527616,32.817366,33.109236,33.403169,33.69911,33.997008,34.296809,34.598464,34.901924,35.207142,35.514072,35.822669,36.13289,36.444692,36.758036,37.072882,37.38919,37.706923,38.026046,38.346522,38.668318,22.579767,22.546308,22.519709,22.499995,22.487181,22.481278,22.482292,22.490219,22.505053,22.526777,22.555372,22.59081,22.633058,22.682078,22.737823,22.800245,22.869287,22.944888,23.026984,23.115504,23.210373,23.311513,23.418842,23.532274,23.651721,23.777091,23.908289,24.04522,24.187785,24.335884,24.489416,24.648279,24.812369,24.981582,25.155815,25.334962,25.518919,25.707583,25.900849,26.098615,26.300777,26.507235,26.717889,26.932638,27.151386,27.374034,27.600489,27.830656,28.064443,28.301761,28.542519,28.786631,29.034011,29.284576,29.538245,29.794936,30.054573,30.317078,30.582377,30.850397,31.121067,31.394318,31.670082,31.948294,32.22889,32.511806,32.796983,33.084362,33.373884,33.665494,33.959137,34.254761,34.552313,34.851745,35.153006,35.456051,35.760833,36.067307,36.37543,36.68516,36.996456,37.309278,37.623587,37.939347,38.256519,38.57507,38.894964
2,14,16931,37.722417,-122.439245,"(37.722417, -122.439245)",14.106717,14.424594,14.746119,15.071058,15.399192,15.73032,16.064256,16.400828,16.739874,17.081246,17.424806,17.770426,18.117987,18.467377,18.818495,19.171244,19.525534,19.881282,20.23841,20.596845,20.956519,21.317368,21.679332,22.042356,22.406387,22.771376,23.137276,23.504045,23.87164,24.240024,24.60916,24.979014,25.349554,25.720749,26.09257,26.46499,26.837984,27.211527,27.585595,27.960168,28.335224,28.710743,29.086707,29.463098,29.8399,30.217095,30.594669,30.972608,31.350896,31.729522,32.108472,32.487735,32.867298,13.239609,13.554981,13.874349,14.19744,14.524006,14.853814,15.186653,15.522326,15.860652,16.201463,16.544605,16.889933,17.237316,17.586629,17.937761,18.290603,18.645058,19.001035,19.358449,19.71722,20.077274,20.438542,20.800961,21.16447,21.529012,21.894536,22.260991,22.628332,22.996516,23.3655,23.735248,24.105723,24.47689,24.84872,25.22118,25.594243,25.967881,26.34207,26.716786,27.092006,27.467708,27.843872,28.220479,28.597511,28.974949,29.352779,29.730984,30.109549,30.48846,30.867703,31.247267,31.627138,32.007306,32.387758,32.768484,13.649499,13.977799,14.309388,14.644041,14.981551,15.321728,15.664397,16.009396,16.356576,16.7058,17.056942,17.409884,17.764517,18.120741,18.478463,18.837597,19.198061,19.559781,19.922688,20.286717,20.651808,21.017903,21.384951,21.752903,22.121711,22.491334,22.86173,23.232862,23.604693,23.977191,24.350325,24.724063,25.098379,25.473247,25.84864,26.224536,26.600913,26.97775,27.355027,27.732725,28.110826,28.489314,28.868173,29.247387,29.626942,30.006824,30.387021,30.76752,31.148309,31.529378,31.910714,32.292309,32.674152,13.766031,14.10261,14.442065,14.784196,15.128821,15.47577,15.824891,16.17604,16.529088,16.883913,17.240406,17.598463,17.957989,18.318897,18.681106,19.04454,19.409129,20.141517,20.509199,20.877802,21.247277,21.617578,21.988663,22.360491,22.733025,23.106231,23.480074,23.854524,24.229552,24.605131,24.981235,25.357839,25.734922,26.11246,26.490435,26.868827,27.247617,27.626788,28.006325,28.386211,28.766433,29.146975,29.527826,29.908972,30.290402,30.672104,31.054068,31.436283,31.818739,32.201428,32.584341,9.227859,13.562503,13.90402,14.248216,14.594902,14.943901,15.295054,15.648216,16.003251,16.360036,16.718458,17.078412,17.439803,17.802541,18.166545,18.531739,18.898052,19.633783,20.003084,20.373271,20.744296,21.116114,21.488682,21.861961,22.235914,22.610506,22.985707,23.361484,23.737811,24.11466,24.492006,24.869826,25.248098,...,21.790789,21.834702,21.885626,21.943512,22.008302,22.079937,22.158347,22.243461,22.3352,22.433483,22.538223,22.64933,22.766709,22.890262,23.01989,23.155489,23.296955,23.444179,23.597054,23.75547,23.919314,24.088476,24.262844,24.442305,24.626747,24.816058,25.010126,25.208841,25.412093,25.619773,25.831774,26.047988,26.268312,26.492642,26.720875,26.952913,27.188657,27.42801,27.670878,27.917169,28.166792,28.419658,28.675681,28.934776,29.196861,29.461855,29.72968,30.000259,30.273517,30.549383,30.827784,31.108653,31.391922,31.677527,31.965404,32.255491,32.54773,32.84206,33.138427,33.436776,33.737053,34.039206,34.343186,34.648943,34.956431,35.265603,35.576415,35.888824,36.202788,36.518266,36.835218,37.153607,37.473395,37.794546,38.117024,38.440797,22.172315,22.138337,22.111345,22.091363,22.078408,22.072494,22.073623,22.081794,22.096999,22.119221,22.148439,22.184623,22.22774,22.277747,22.334598,22.398239,22.468611,22.545652,22.62929,22.719454,22.816064,22.919037,23.028289,23.143728,23.265261,23.392793,23.526226,23.665458,23.810387,23.960908,24.116916,24.278305,24.444966,24.616792,24.793674,24.975505,25.162175,25.353578,25.549606,25.750152,25.955112,26.164379,26.377852,26.595428,26.817006,27.042487,27.271773,27.504769,27.74138,27.981513,28.225079,28.471988,28.722153,28.975489,29.231913,29.491343,29.753701,30.018908,30.28689,30.557571,30.830882,31.106751,31.38511,31.665893,31.949036,32.234475,32.52215,32.812001,33.103969,33.398,33.694038,33.99203,34.291925,34.593672,34.897223,35.20253,35.509548,35.818232,36.128539,36.440426,36.753853,37.06878,37.385168,37.702981,38.022182,38.342735,38.664607,22.566643,22.533241,22.506704,22.487054,22.474309,22.46848,22.469571,22.477579,22.492497,22.514311,22.542998,22.578531,22.620878,22.67,22.725851,22.788381,22.857534,22.933249,23.015461,23.104099,23.199089,23.300352,23.407806,23.521364,23.640939,23.766438,23.897766,24.034829,24.177526,24.325758,24.479424,24.638421,24.802646,24.971994,25.146361,25.325643,25.509735,25.698533,25.891933,26.089832,26.292127,26.498718,26.709503,26.924383,27.14326,27.366037,27.59262,27.822914,28.056827,28.294269,28.53515,28.779384,29.026886,29.277571,29.531357,29.788166,30.047918,30.310537,30.575949,30.84408,31.114861,31.38822,31.664092,31.94241,32.22311,32.50613,32.791408,33.078887,33.368508,33.660215,33.953955,34.249673,34.547319,34.846843,35.148195,35.45133,35.7562,36.062761,36.370969,36.680784,36.992163,37.305067,37.619457,37.935296,38.252547,38.571175,38.891146
3,16,19173,37.722383,-122.439227,"(37.722383, -122.439227)",14.110693,14.428586,14.750124,15.075075,15.40322,15.734358,16.068303,16.404881,16.743934,17.085312,17.428877,17.774501,18.122066,18.47146,18.822581,19.175331,19.529623,19.885373,20.242502,20.600938,20.960612,21.321461,21.683426,22.04645,22.410481,22.775469,23.141369,23.508136,23.875731,24.244114,24.613249,24.983101,25.35364,25.724834,26.096653,26.469072,26.842065,27.215606,27.589673,27.964244,28.339298,28.714816,29.090778,29.467168,29.843967,30.221161,30.598733,30.97667,31.354957,31.733581,32.112529,32.49179,32.871352,13.243573,13.558963,13.878346,14.201451,14.528029,14.857848,15.190696,15.526378,15.864711,16.205528,16.548675,16.894008,17.241395,17.590712,17.941846,18.294691,18.649148,19.005126,19.362541,19.721313,20.081367,20.442636,20.805055,21.168563,21.533105,21.898629,22.265083,22.632423,23.000606,23.369589,23.739336,24.109809,24.480976,24.852803,25.225262,25.598323,25.97196,26.346148,26.720862,27.09608,27.47178,27.847942,28.224548,28.601577,28.979014,29.356842,29.735045,30.113608,30.492517,30.871759,31.251321,31.631191,32.011356,32.391806,32.772531,13.653516,13.981828,14.313428,14.64809,14.985608,15.325792,15.668466,16.01347,16.360655,16.709883,17.061027,17.413972,17.768607,18.124833,18.482556,18.84169,19.202155,19.563875,19.926782,20.290811,20.655901,21.021996,21.389043,21.756993,22.1258,22.495422,22.865817,23.236947,23.608777,23.981274,24.354405,24.728142,25.102457,25.477322,25.852714,26.228608,26.604983,26.981818,27.359093,27.736789,28.114888,28.493374,28.872231,29.251443,29.630996,30.010877,30.391072,30.771569,31.152356,31.533423,31.914757,32.29635,32.678191,13.770076,14.106664,14.446127,14.788264,15.132894,15.479848,15.828973,16.180125,16.533176,16.888003,17.244497,17.602555,17.962082,18.322991,18.6852,19.048633,19.413222,20.145608,20.51329,20.881892,21.251365,21.621665,21.992748,22.364575,22.737108,23.110311,23.484152,23.858601,24.233627,24.609204,24.985306,25.361908,25.738989,26.116526,26.494498,26.872888,27.251676,27.630845,28.01038,28.390264,28.770484,29.151024,29.531873,29.913017,30.294445,30.676145,31.058107,31.44032,31.822774,32.205462,32.588372,9.231571,13.566563,13.908087,14.252289,14.598979,14.947983,15.29914,15.652304,16.007341,16.364127,16.72255,17.082505,17.443896,17.806635,18.170638,18.535832,18.902144,19.637873,20.007173,20.377359,20.748382,21.120198,21.492764,21.866041,22.239992,22.614583,22.989781,23.365557,23.741881,24.118728,24.496072,24.87389,25.252159,...,21.789607,21.833591,21.884586,21.942542,22.007403,22.079108,22.157588,22.242772,22.334581,22.432933,22.537742,22.648916,22.766363,22.889983,23.019677,23.155342,23.296872,23.444161,23.597099,23.755577,23.919483,24.088706,24.263133,24.442653,24.627152,24.81652,25.010644,25.209414,25.41272,25.620453,25.832506,26.048771,26.269145,26.493524,26.721805,26.95389,27.18968,27.429079,27.671991,27.918326,28.167991,28.420899,28.676962,28.936098,29.198222,29.463254,29.731116,30.001732,30.275026,30.550926,30.829362,31.110264,31.393566,31.679203,31.967111,32.257229,32.549498,32.843858,33.140254,33.43863,33.738935,34.041115,34.345121,34.650904,34.958417,35.267614,35.57845,35.890883,36.20487,36.52037,36.837345,37.155756,37.475565,37.796736,38.119235,38.443028,22.170387,22.136473,22.109546,22.08963,22.076742,22.070894,22.072091,22.08033,22.095603,22.117894,22.147181,22.183435,22.226621,22.276697,22.333617,22.397328,22.467769,22.544879,22.628586,22.718818,22.815496,22.918537,23.027855,23.14336,23.26496,23.392557,23.526055,23.66535,23.810342,23.960926,24.116996,24.278445,24.445166,24.617051,24.793992,24.97588,25.162606,25.354065,25.550147,25.750747,25.955759,26.165078,26.378602,26.596227,26.817854,27.043383,27.272716,27.505758,27.742414,27.982592,28.226201,28.473152,28.723359,28.976736,29.233199,29.492669,29.755065,30.02031,30.288328,30.559045,30.832391,31.108294,31.386687,31.667503,31.950678,32.236149,32.523855,32.813736,33.105734,33.399794,33.69586,33.99388,34.293801,34.595575,34.899152,35.204485,35.511528,35.820236,36.130566,36.442477,36.755927,37.070876,37.387287,37.705121,38.024343,38.344917,38.666809,22.564721,22.531382,22.504909,22.485324,22.472644,22.466881,22.468038,22.476113,22.491099,22.512979,22.541734,22.577336,22.619751,22.668941,22.72486,22.787458,22.856679,22.932462,23.014741,23.103447,23.198504,23.299833,23.407353,23.520977,23.640616,23.766179,23.897572,24.034697,24.177456,24.32575,24.479477,24.638534,24.802818,24.972224,25.146649,25.325988,25.510136,25.698988,25.892443,26.090395,26.292742,26.499384,26.710219,26.925149,27.144075,27.3669,27.593529,27.823869,28.057827,28.295313,28.536238,28.780515,29.028057,29.278783,29.53261,29.789457,30.049248,30.311904,30.577353,30.84552,31.116336,31.38973,31.665636,31.943987,32.224719,32.50777,32.79308,33.080589,33.37024,33.661976,33.955744,34.251491,34.549164,34.848714,35.150093,35.453253,35.758148,36.064734,36.372967,36.682805,36.994207,37.307133,37.621546,37.937406,38.254679,38.573328,38.893319
4,18,19199,37.722405,-122.439139,"(37.722405, -122.439139)",14.113358,14.431125,14.752543,15.077377,15.405411,15.736441,16.070283,16.406762,16.745719,17.087005,17.430481,17.77602,18.123502,18.472817,18.823861,19.176538,19.530759,19.88644,20.243502,20.601874,20.961486,21.322275,21.684182,22.04715,22.411126,22.776062,23.14191,23.508629,23.876175,24.244512,24.613601,24.983411,25.353906,25.725059,26.096839,26.469219,26.842173,27.215677,27.589708,27.964245,28.339265,28.714749,29.09068,29.467038,29.843807,30.22097,30.598514,30.976422,31.354681,31.733278,32.1122,32.491435,32.870972,13.246326,13.561581,13.880834,14.203815,14.530273,14.859977,15.192716,15.528291,15.866523,16.207243,16.550296,16.895538,17.242838,17.592072,17.943126,18.295893,18.650276,19.006183,19.363528,19.722233,20.082223,20.443429,20.805788,21.169238,21.533724,21.899193,22.265595,22.632885,23.001018,23.369953,23.739654,24.110082,24.481205,24.852991,25.225408,25.59843,25.972028,26.346178,26.720856,27.096038,27.471704,27.847833,28.224406,28.601404,28.97881,29.356607,29.734781,30.113315,30.492197,30.871411,31.250947,31.63079,32.01093,32.391356,32.772056,13.655821,13.98401,14.315492,14.650041,14.98745,15.32753,15.670105,16.015014,16.362106,16.711247,17.062307,17.41517,17.769727,18.125877,18.483528,18.842593,19.20299,19.564646,19.927491,20.291459,20.656491,21.02253,21.389523,21.757421,22.126177,22.49575,22.866097,23.237181,23.608967,23.981421,24.35451,24.728207,25.102482,25.477309,25.852664,26.228522,26.604862,26.981663,27.358905,27.736569,28.114637,28.493092,28.871919,29.251103,29.630628,30.010481,30.390649,30.77112,31.151882,31.532924,31.914234,32.295803,32.677622,13.772069,14.108541,14.447892,14.789924,15.134452,15.481309,15.830341,16.181404,16.534369,16.889114,17.24553,17.603512,17.962966,18.323805,18.685946,19.049315,19.413841,20.146109,20.513734,20.882282,21.251704,21.621954,21.992988,22.364768,22.737255,23.110414,23.484213,23.85862,24.233606,24.609144,24.985209,25.361775,25.73882,26.116322,26.494261,26.872618,27.251375,27.630514,28.010018,28.389873,28.770065,29.150578,29.531399,29.912517,30.29392,30.675595,31.057533,31.439722,31.822154,32.204818,32.587707,9.235672,13.568359,13.90977,14.253864,14.600451,14.949355,15.300418,15.653491,16.008442,16.365146,16.723489,17.083368,17.444686,17.807354,18.17129,18.536419,18.902669,19.63828,20.007524,20.377656,20.748628,21.120394,21.492912,21.866142,22.240048,22.614596,22.989752,23.365487,23.741772,24.11858,24.495887,24.873669,25.251904,...,21.781635,21.825591,21.876561,21.934495,21.999337,22.071024,22.14949,22.234662,22.326462,22.424807,22.529611,22.640783,22.75823,22.881852,23.011551,23.147222,23.288761,23.43606,23.58901,23.747501,23.911423,24.080663,24.255108,24.434648,24.619169,24.808559,25.002707,25.201502,25.404834,25.612593,25.824673,26.040968,26.26137,26.485779,26.714091,26.946207,27.182029,27.421459,27.664404,27.910771,28.16047,28.413411,28.669508,28.928677,29.190835,29.455902,29.723798,29.994448,30.267777,30.543711,30.822181,31.103118,31.386454,31.672125,31.960067,32.250219,32.542521,32.836915,33.133344,33.431754,33.732091,34.034304,34.338343,34.644158,34.951703,35.260932,35.5718,35.884264,36.198282,36.513814,36.830819,37.14926,37.469099,37.7903,38.112829,38.43665,22.162887,22.128919,22.101939,22.081972,22.069037,22.063144,22.064298,22.072497,22.087732,22.109988,22.139242,22.175466,22.218624,22.268676,22.325574,22.389264,22.459689,22.536783,22.620478,22.7107,22.807371,22.910407,23.019722,23.135227,23.256828,23.384429,23.517932,23.657235,23.802237,23.952831,24.108914,24.270378,24.437115,24.609017,24.785977,24.967885,25.154633,25.346114,25.54222,25.742845,25.947882,26.157228,26.370779,26.588433,26.810088,27.035647,27.26501,27.498083,27.73477,27.974979,28.21862,28.465604,28.715843,28.969253,29.22575,29.485253,29.747682,30.01296,30.281012,30.551763,30.825142,31.101079,31.379506,31.660356,31.943564,32.229069,32.516808,32.806722,33.098753,33.392846,33.688945,33.986997,34.286951,34.588757,34.892366,35.197731,35.504805,35.813545,36.123907,36.435848,36.749329,37.064308,37.380749,37.698613,38.017865,38.338468,38.660389,22.557215,22.523823,22.497299,22.477665,22.464938,22.45913,22.460245,22.468281,22.483229,22.505075,22.533798,22.56937,22.611758,22.660922,22.716819,22.779397,22.848601,22.924369,23.006636,23.095332,23.19038,23.291704,23.39922,23.512843,23.632483,23.758049,23.889446,24.026578,24.169347,24.317651,24.47139,24.63046,24.794759,24.964182,25.138624,25.317982,25.50215,25.691025,25.884502,26.082478,26.28485,26.491517,26.702379,26.917336,27.13629,27.359143,27.585802,27.816172,28.05016,28.287676,28.528632,28.77294,29.020515,29.271273,29.525132,29.782012,30.041835,30.304524,30.570006,30.838206,31.109055,31.382482,31.658421,31.936805,32.21757,32.500655,32.785997,33.073539,33.363223,33.654992,33.948792,34.244571,34.542276,34.841858,35.143269,35.44646,35.751386,36.058003,36.366267,36.676136,36.987568,37.300525,37.614967,37.930858,38.24816,38.566838,38.886858


In [20]:
# grab readings from k-nearest sensors, average them, and add them to dataframe

start = time.time()

k = 3 # take the average of the closest k neighbors
nfeatures = 24 # number of neighbor features
ndict = {}
for x in range(nfeatures):
    ndict['neighbor_'+str(x)] = []

point_in_time = X_data_df.created[0] # for now, just do a single point in time. could convert to for loop thru times

humidity = []
temperature = []

counter = 0

for grid_coord in distances_df.columns[5:]:
    temp_df = distances_df[['sensor_id', grid_coord]]
    temp_df.sort_values(by=[grid_coord], axis=0, ascending=True, inplace=True)
    kNN_IDs = list(temp_df.sensor_id[0:k])
    avg_humid = 0
    valid_humid = 0
    avg_temp = 0
    valid_temp = 0
    for neighbor in kNN_IDs:
        try:
            avg_humid += real_sensor_df[(real_sensor_df.created == point_in_time) & (real_sensor_df.sensor_id == neighbor)].humidity.iloc[0]
            valid_humid += 1
        except:
            pass
    
        try:
            avg_temp += real_sensor_df[(real_sensor_df.created == point_in_time) & (real_sensor_df.sensor_id == neighbor)].temperature.iloc[0]
            valid_temp += 1
        except:
            pass
        
    if valid_temp:
        temperature.append(avg_temp / valid_temp)
    else:
        temperature.append(np.nan)

    if valid_humid:
        humidity.append(avg_humid / valid_humid)
    else:
        humidity.append(np.nan)

    #neighbor_features = list(temp_df.iloc[0:nfeatures].sensor_id) ############# should it be .iloc[0:nfeatures]??
    #n_count = 0

    #for neighbor in neighbor_features:
    #    try:
    #        ndict['neighbor_'+str(n_count)].append(real_sensor_df[(real_sensor_df.created == point_in_time) & (real_sensor_df.sensor_id == neighbor)]['2_5um'].iloc[0]) 
    #    except:
    #        ndict['neighbor_'+str(n_count)].append(np.nan)
    #    n_count += 1
    
    counter += 1
    print("Grid num", counter)
    
end = time.time()
print("Time", end - start)
print("Done.")

   

Grid num 1
Grid num 2
Grid num 3
Grid num 4
Grid num 5
Grid num 6
Grid num 7
Grid num 8
Grid num 9
Grid num 10
Grid num 11
Grid num 12
Grid num 13
Grid num 14
Grid num 15
Grid num 16
Grid num 17
Grid num 18
Grid num 19
Grid num 20
Grid num 21
Grid num 22
Grid num 23
Grid num 24
Grid num 25
Grid num 26
Grid num 27
Grid num 28
Grid num 29
Grid num 30
Grid num 31
Grid num 32
Grid num 33
Grid num 34
Grid num 35
Grid num 36
Grid num 37
Grid num 38
Grid num 39
Grid num 40
Grid num 41
Grid num 42
Grid num 43
Grid num 44
Grid num 45
Grid num 46
Grid num 47
Grid num 48
Grid num 49
Grid num 50
Grid num 51
Grid num 52
Grid num 53
Grid num 54
Grid num 55
Grid num 56
Grid num 57
Grid num 58
Grid num 59
Grid num 60
Grid num 61
Grid num 62
Grid num 63
Grid num 64
Grid num 65
Grid num 66
Grid num 67
Grid num 68
Grid num 69
Grid num 70
Grid num 71
Grid num 72
Grid num 73
Grid num 74
Grid num 75
Grid num 76
Grid num 77
Grid num 78
Grid num 79
Grid num 80
Grid num 81
Grid num 82
Grid num 83
Grid num 84
G

Grid num 641
Grid num 642
Grid num 643
Grid num 644
Grid num 645
Grid num 646
Grid num 647
Grid num 648
Grid num 649
Grid num 650
Grid num 651
Grid num 652
Grid num 653
Grid num 654
Grid num 655
Grid num 656
Grid num 657
Grid num 658
Grid num 659
Grid num 660
Grid num 661
Grid num 662
Grid num 663
Grid num 664
Grid num 665
Grid num 666
Grid num 667
Grid num 668
Grid num 669
Grid num 670
Grid num 671
Grid num 672
Grid num 673
Grid num 674
Grid num 675
Grid num 676
Grid num 677
Grid num 678
Grid num 679
Grid num 680
Grid num 681
Grid num 682
Grid num 683
Grid num 684
Grid num 685
Grid num 686
Grid num 687
Grid num 688
Grid num 689
Grid num 690
Grid num 691
Grid num 692
Grid num 693
Grid num 694
Grid num 695
Grid num 696
Grid num 697
Grid num 698
Grid num 699
Grid num 700
Grid num 701
Grid num 702
Grid num 703
Grid num 704
Grid num 705
Grid num 706
Grid num 707
Grid num 708
Grid num 709
Grid num 710
Grid num 711
Grid num 712
Grid num 713
Grid num 714
Grid num 715
Grid num 716
Grid num 717

Grid num 1252
Grid num 1253
Grid num 1254
Grid num 1255
Grid num 1256
Grid num 1257
Grid num 1258
Grid num 1259
Grid num 1260
Grid num 1261
Grid num 1262
Grid num 1263
Grid num 1264
Grid num 1265
Grid num 1266
Grid num 1267
Grid num 1268
Grid num 1269
Grid num 1270
Grid num 1271
Grid num 1272
Grid num 1273
Grid num 1274
Grid num 1275
Grid num 1276
Grid num 1277
Grid num 1278
Grid num 1279
Grid num 1280
Grid num 1281
Grid num 1282
Grid num 1283
Grid num 1284
Grid num 1285
Grid num 1286
Grid num 1287
Grid num 1288
Grid num 1289
Grid num 1290
Grid num 1291
Grid num 1292
Grid num 1293
Grid num 1294
Grid num 1295
Grid num 1296
Grid num 1297
Grid num 1298
Grid num 1299
Grid num 1300
Grid num 1301
Grid num 1302
Grid num 1303
Grid num 1304
Grid num 1305
Grid num 1306
Grid num 1307
Grid num 1308
Grid num 1309
Grid num 1310
Grid num 1311
Grid num 1312
Grid num 1313
Grid num 1314
Grid num 1315
Grid num 1316
Grid num 1317
Grid num 1318
Grid num 1319
Grid num 1320
Grid num 1321
Grid num 1322
Grid n

Grid num 1839
Grid num 1840
Grid num 1841
Grid num 1842
Grid num 1843
Grid num 1844
Grid num 1845
Grid num 1846
Grid num 1847
Grid num 1848
Grid num 1849
Grid num 1850
Grid num 1851
Grid num 1852
Grid num 1853
Grid num 1854
Grid num 1855
Grid num 1856
Grid num 1857
Grid num 1858
Grid num 1859
Grid num 1860
Grid num 1861
Grid num 1862
Grid num 1863
Grid num 1864
Grid num 1865
Grid num 1866
Grid num 1867
Grid num 1868
Grid num 1869
Grid num 1870
Grid num 1871
Grid num 1872
Grid num 1873
Grid num 1874
Grid num 1875
Grid num 1876
Grid num 1877
Grid num 1878
Grid num 1879
Grid num 1880
Grid num 1881
Grid num 1882
Grid num 1883
Grid num 1884
Grid num 1885
Grid num 1886
Grid num 1887
Grid num 1888
Grid num 1889
Grid num 1890
Grid num 1891
Grid num 1892
Grid num 1893
Grid num 1894
Grid num 1895
Grid num 1896
Grid num 1897
Grid num 1898
Grid num 1899
Grid num 1900
Grid num 1901
Grid num 1902
Grid num 1903
Grid num 1904
Grid num 1905
Grid num 1906
Grid num 1907
Grid num 1908
Grid num 1909
Grid n

Grid num 2425
Grid num 2426
Grid num 2427
Grid num 2428
Grid num 2429
Grid num 2430
Grid num 2431
Grid num 2432
Grid num 2433
Grid num 2434
Grid num 2435
Grid num 2436
Grid num 2437
Grid num 2438
Grid num 2439
Grid num 2440
Grid num 2441
Grid num 2442
Grid num 2443
Grid num 2444
Grid num 2445
Grid num 2446
Grid num 2447
Grid num 2448
Grid num 2449
Grid num 2450
Grid num 2451
Grid num 2452
Grid num 2453
Grid num 2454
Grid num 2455
Grid num 2456
Grid num 2457
Grid num 2458
Grid num 2459
Grid num 2460
Grid num 2461
Grid num 2462
Grid num 2463
Grid num 2464
Grid num 2465
Grid num 2466
Grid num 2467
Grid num 2468
Grid num 2469
Grid num 2470
Grid num 2471
Grid num 2472
Grid num 2473
Grid num 2474
Grid num 2475
Grid num 2476
Grid num 2477
Grid num 2478
Grid num 2479
Grid num 2480
Grid num 2481
Grid num 2482
Grid num 2483
Grid num 2484
Grid num 2485
Grid num 2486
Grid num 2487
Grid num 2488
Grid num 2489
Grid num 2490
Grid num 2491
Grid num 2492
Grid num 2493
Grid num 2494
Grid num 2495
Grid n

Grid num 3011
Grid num 3012
Grid num 3013
Grid num 3014
Grid num 3015
Grid num 3016
Grid num 3017
Grid num 3018
Grid num 3019
Grid num 3020
Grid num 3021
Grid num 3022
Grid num 3023
Grid num 3024
Grid num 3025
Grid num 3026
Grid num 3027
Grid num 3028
Grid num 3029
Grid num 3030
Grid num 3031
Grid num 3032
Grid num 3033
Grid num 3034
Grid num 3035
Grid num 3036
Grid num 3037
Grid num 3038
Grid num 3039
Grid num 3040
Grid num 3041
Grid num 3042
Grid num 3043
Grid num 3044
Grid num 3045
Grid num 3046
Grid num 3047
Grid num 3048
Grid num 3049
Grid num 3050
Grid num 3051
Grid num 3052
Grid num 3053
Grid num 3054
Grid num 3055
Grid num 3056
Grid num 3057
Grid num 3058
Grid num 3059
Grid num 3060
Grid num 3061
Grid num 3062
Grid num 3063
Grid num 3064
Grid num 3065
Grid num 3066
Grid num 3067
Grid num 3068
Grid num 3069
Grid num 3070
Grid num 3071
Grid num 3072
Grid num 3073
Grid num 3074
Grid num 3075
Grid num 3076
Grid num 3077
Grid num 3078
Grid num 3079
Grid num 3080
Grid num 3081
Grid n

Grid num 3597
Grid num 3598
Grid num 3599
Grid num 3600
Grid num 3601
Grid num 3602
Grid num 3603
Grid num 3604
Grid num 3605
Grid num 3606
Grid num 3607
Grid num 3608
Grid num 3609
Grid num 3610
Grid num 3611
Grid num 3612
Grid num 3613
Grid num 3614
Grid num 3615
Grid num 3616
Grid num 3617
Grid num 3618
Grid num 3619
Grid num 3620
Grid num 3621
Grid num 3622
Grid num 3623
Grid num 3624
Grid num 3625
Grid num 3626
Grid num 3627
Grid num 3628
Grid num 3629
Grid num 3630
Grid num 3631
Grid num 3632
Grid num 3633
Grid num 3634
Grid num 3635
Grid num 3636
Grid num 3637
Grid num 3638
Grid num 3639
Grid num 3640
Grid num 3641
Grid num 3642
Grid num 3643
Grid num 3644
Grid num 3645
Grid num 3646
Grid num 3647
Grid num 3648
Grid num 3649
Grid num 3650
Grid num 3651
Grid num 3652
Grid num 3653
Grid num 3654
Grid num 3655
Grid num 3656
Grid num 3657
Grid num 3658
Grid num 3659
Grid num 3660
Grid num 3661
Grid num 3662
Grid num 3663
Grid num 3664
Grid num 3665
Grid num 3666
Grid num 3667
Grid n

Grid num 4184
Grid num 4185
Grid num 4186
Grid num 4187
Grid num 4188
Grid num 4189
Grid num 4190
Grid num 4191
Grid num 4192
Grid num 4193
Grid num 4194
Grid num 4195
Grid num 4196
Grid num 4197
Grid num 4198
Grid num 4199
Grid num 4200
Grid num 4201
Grid num 4202
Grid num 4203
Grid num 4204
Grid num 4205
Grid num 4206
Grid num 4207
Grid num 4208
Grid num 4209
Grid num 4210
Grid num 4211
Grid num 4212
Grid num 4213
Grid num 4214
Grid num 4215
Grid num 4216
Grid num 4217
Grid num 4218
Grid num 4219
Grid num 4220
Grid num 4221
Grid num 4222
Grid num 4223
Grid num 4224
Grid num 4225
Grid num 4226
Grid num 4227
Grid num 4228
Grid num 4229
Grid num 4230
Grid num 4231
Grid num 4232
Grid num 4233
Grid num 4234
Grid num 4235
Grid num 4236
Grid num 4237
Grid num 4238
Grid num 4239
Grid num 4240
Grid num 4241
Grid num 4242
Grid num 4243
Grid num 4244
Time 841.6724088191986
Done.


In [21]:
X_data_df['temperature'] = temperature
X_data_df['humidity'] = humidity

In [22]:
for col in ['epa_pm25_value', 'temperature', 'humidity']:
    v = X_data_df[col].mean()
    X_data_df[col].fillna(v, inplace = True)

In [25]:
# get neighbors, using jake's code

def get_coords(line):
    box = boxes[(boxes.min_lat < line.lat) & (boxes.max_lat > line.lat) & 
                (boxes.min_lon < line.lon) & (boxes.max_lon > line.lon)]
    assert box.shape[0] == 1
    return box.iloc[0, 4], box.iloc[0, 5] # x,y

#sensor_locs = real_sensor_df[['sensor_id', 'lat', 'lon']].drop_duplicates() -- shouldn't be dups from getData(), no?
real_sensor_df['xy_'] = real_sensor_df.apply(get_coords, axis = 1)

real_sensor_df = real_sensor_df.drop_duplicates(subset=['sensor_id']) \
                         .assign(x = lambda d: real_sensor_df['xy_'].map(lambda f:f[0]),
                                 y = lambda d: real_sensor_df['xy_'].map(lambda f:f[1])) \
                         .set_index('sensor_id')

def time_space(line):
    """ takes x, y, created at
    
    returns string of form TTTT_x_y
    
    Where TTTT is unix timestamp divided by 600 (so an increment of 1 TTTT
    is equivalent to 10 minutes, or how often we get readings)
    
    """
    
    ts_ = int(datetime.datetime.strptime(line.created_at, "%Y/%m/%dT%H:%M").timestamp() / 600)
    return f"{ts_}_{line.x}_{line.y}"

real_sensor_df['time_space_id'] = real_sensor_df.apply(time_space, axis = 1)

# create a key value mapping of the from time_space_id:(row number in X_train)
neighbor_lookup = defaultdict(list)
real_sensor_df.apply(lambda x: neighbor_lookup[x.time_space_id].append(x.name), axis = 1)

def get_neighbors_space_time(line, train_df, delta = 0):
    """
    Inputs: single observation, a training dataframe, and a time delta
    Outputs: vector of length 24 corresponding to surrounding neighbor observations
    """
    if delta > 0:
        raise Exception("Cannot see the future")
    
    t, x, y = line.time_space_id.split("_")
    t = int(t)
    x = int(x)
    y = int(y)
    neighbors = np.zeros((24))
    
    c = 0
    for i in range(-2,3):
        for j in range(-2,3):
            if i == 0 and j == 0 : continue
            n = neighbor_lookup[f"{t + delta}_{x+i}_{y+j}"] # get rows in train_df for that particular time-block
            
            if n:
                neighbors[c] = train_df.loc[n, '2_5um'].mean()
            c += 1
    
    return neighbors

X_neighbors = X_data_df.apply(lambda x: get_neighbors_space_time(x, real_sensor_df, delta = 0), axis =1)

X_neighbors = np.array(X_neighbors.to_list())

X_data_df = np.concatenate((X_data_df, X_neighbors), axis = 1)


AttributeError: ("'Series' object has no attribute 'time_space_id'", 'occurred at index 0')

In [27]:

X_neighbors = real_sensor_df.apply(lambda x: get_neighbors_space_time(x, real_sensor_df, delta = 0), axis =1)

X_neighbors = np.array(X_neighbors.to_list())

X_data_df = np.concatenate((X_data_df, X_neighbors), axis = 1)

ValueError: all the input array dimensions for the concatenation axis must match exactly, but along dimension 0, the array at index 0 has size 4244 and the array at index 1 has size 185

In [30]:
X_neighbors.shape

(185, 24)

In [24]:
#for n in range(nfeatures):
#    X_data_df['neighbor_'+str(n)] = ndict['neighbor_'+str(n)] 

In [None]:
X_data_df

In [None]:
X_data_df.to_csv(path_or_buf="./data/xfile_rf.csv", index=True)

In [30]:
X_data_df = pd.read_csv("./data/xfile_rf.csv")

In [31]:
X_data_df

Unnamed: 0.1,Unnamed: 0,created,lat,lon,x,y,epa_pm25_value,ndvi,wind_x,wind_y,elevation,neighbor_0,neighbor_1,neighbor_2,neighbor_3,neighbor_4,neighbor_5,neighbor_6,neighbor_7,neighbor_8,neighbor_9,neighbor_10,neighbor_11,neighbor_12,neighbor_13,neighbor_14,neighbor_15,neighbor_16,neighbor_17,neighbor_18,neighbor_19,neighbor_20,neighbor_21,neighbor_22,neighbor_23,temperature,humidity
0,0,201909010000,37.824436,-122.534739,0,34,5.1,-2000,1.215537,-6.893654,17,4.41,1.45,,,3.72,2.24,22.97,,,3.14,,,2.91,2.97,4.30,,,,,,,,,,80.500000,49.000000
1,1,201909010000,37.827984,-122.534739,0,35,5.1,-2000,1.215537,-6.893654,0,4.41,1.45,,,3.72,3.14,2.24,22.97,,,,,2.91,,2.97,4.30,,,,,,,,,80.500000,49.000000
2,2,201909010000,37.831531,-122.534739,0,36,5.1,-2000,1.215537,-6.893654,0,4.41,1.45,,,3.72,3.14,2.24,,22.97,,,,2.91,,,2.97,,4.30,,,,,,5.42,80.500000,49.000000
3,3,201909010000,37.835079,-122.534739,0,37,5.1,5159,1.215537,-6.893654,55,4.41,1.45,,,3.72,3.14,,2.24,,22.97,,,,,2.91,,,,2.97,5.42,4.30,,,,80.500000,49.000000
4,4,201909010000,37.838626,-122.534739,0,38,5.1,7053,1.215537,-6.893654,132,4.41,1.45,3.72,,,3.14,,,,2.24,,,22.97,,,5.42,2.91,,,,2.97,,2.62,4.30,84.000000,45.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4239,4239,201909010000,37.994537,-122.184396,78,82,8.5,3827,-5.638156,2.052121,177,0.00,1.93,1.63,1.67,5.50,3.34,,,,3.93,,,,,3.75,1.36,,,,,,,0.00,1.06,97.333333,24.333333
4240,4240,201909010000,37.998076,-122.184396,78,83,8.5,3024,-5.638156,2.052121,159,0.00,1.63,1.93,1.67,5.50,3.34,,,,3.93,,,,,3.75,,1.36,,,,,3.77,,0.00,97.333333,24.333333
4241,4241,201909010000,38.001616,-122.184396,78,84,8.5,3110,-5.638156,2.052121,203,0.00,1.63,1.93,1.67,5.50,3.34,,,,3.93,,,,,,1.36,3.75,,,3.77,,,,2.73,97.333333,24.333333
4242,4242,201909010000,38.005155,-122.184396,78,85,8.5,3110,-5.638156,2.052121,243,0.00,1.63,1.93,1.67,5.50,3.34,,,,3.93,,,,,3.77,1.36,,,3.75,,,,,2.73,97.333333,24.333333


## Using a trained Random Forest model

In [36]:
# load in the model from a pickle file

import s3fs
import boto3
#s3 = s3fs.S3FileSystem()
#myopen = s3.open
#s3_resource = boto3.resource('s3')
#s3_resource.Object('capstone-air-pollution', 'model_stuff/201901103_RF.joblib')

from joblib import dump, load
s3 = boto3.resource('s3')
model = load(s3.Bucket("capstone-air-pollution").Object("model_stuff/201901103_RF.joblib").get()['Body'].read())
y_pred = model.predict(X_data_df)

MemoryError: 

In [35]:
#from boto.s3.connection import S3Connection
#from joblib import dump, load
#import os

#s3 = s3fs.S3FileSystem()
#myopen = s3.open
#s3_resource = boto3.resource('s3')
#s3_resource.Object('capstone-air-pollution', 'model_stuff/201901103_RF.joblib')
#local_file = '/tmp/201901103_RF.joblib'
#s3_resource.get_contents_to_filename(local_file)
#model = load(local_file)
#os.remove(local_file)

AttributeError: 's3.ServiceResource' object has no attribute 'get_contents_to_filename'

In [None]:
#model2 = load(model)

In [None]:
# make predictions
y_pred = model.predict(X_data_df)

## Using a trained kNN model 

In [None]:
# load in the model from a pickle file
from joblib import dump, load
model = load('VirtualSensing/models/kNN_model.joblib') 

In [None]:
# create a features dataframe
X_data_df = boxes.copy(deep=True) 
X_data_df = X_data_df[X_data_df.in_water == False]
X_data_df = X_data_df.drop(columns = ['min_lat', 'max_lat', 'min_lon', 'max_lon', 'x','y','in_water'])
X_data_df.rename(columns={'center_lat': 'lat', 'center_lon': 'lon'}, inplace=True)



In [None]:
# add column for time_delta for kNN model
lats_to_add = list(X_data_df.lat)
lons_to_add = list(X_data_df.lon)
all_sensors = len(X_data_df)
max_time = (pd.Timestamp('2019-09-30 23:50:00') - pd.Timestamp('2019-09-01 00:00:00')) / np.timedelta64(1, 'm')
lat = [lats_to_add]
lon = [lons_to_add]

In [None]:
current_time = 0
times = [[current_time] * all_sensors]
while (current_time <= max_time):
    current_time += 10
    time = [current_time] * all_sensors
    times.append(time)
    lat.append(lats_to_add[:])
    lon.append(lons_to_add[:])


In [None]:
flat_lat = [item for sublist in lat for item in sublist]
flat_lon = [item for sublist in lon for item in sublist]
flat_times = [item for sublist in times for item in sublist]

In [None]:
# make the dataframe 
data = {'lat': flat_lat, 'lon': flat_lon, 'time_delta': flat_times}
X_df = pd.DataFrame(data)

In [None]:
X_df.head()

In [None]:
X_df.describe()

In [None]:
# make predictions
y_pred = model.predict(X_df)

In [None]:
X_df['pred_PM2_5'] = y_pred

In [None]:
X_df['avg'] = y_pred

In [None]:
boxes['avg_PM2_5'] = [0] * len(boxes)

In [None]:
for sensor in range(len(boxes)):
    boxes.avg_PM2_5.iloc[sensor] = X_df[(X_df.lat == boxes.center_lat.iloc[sensor]) & 
         (X_df.lon == boxes.center_lon.iloc[sensor])].pred_PM2_5.mean()

In [None]:
# create a dataframe for mapping
map_df = boxes[boxes.in_water == False]
map_df = map_df.sort_values(by='avg_PM2_5', ascending=False)

In [None]:
%matplotlib inline

from geopy.distance import distance
import pandas as pd
from time import sleep
import shapely.geometry
import pyproj
import geopandas as gpd
from matplotlib import pyplot as plt
from shapely.geometry import Point

# libraries
import datetime
from datetime import date, timedelta
from os import path
import pandas as pd
import numpy as np
import statistics
import boto3
import s3fs
import sys
from fastparquet import ParquetFile
from sklearn import linear_model
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
import geopy
from geopy import distance

import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_columns', 500)

In [None]:
def makeGrid():
    # Set up projections
    p_ll = pyproj.Proj(init='epsg:4283') # grid in lat/lon
    p_mt = pyproj.Proj(init='epsg:3857') # metric; same as EPSG:900913

    # Create corners of rectangle to be transformed to a grid
    MIN_LAT = 37.701933
    MAX_LAT = 38.008050
    MIN_LON = -122.536985
    MAX_LON = -122.186437
    sw = shapely.geometry.Point((MIN_LON, MIN_LAT))
    ne = shapely.geometry.Point((MAX_LON,MAX_LAT))

    stepsize = 500 # 0.5 km grid step size

    # Project corners to target projection
    s = pyproj.transform(p_ll, p_mt, sw.x, sw.y) # Transform NW point to 3857
    e = pyproj.transform(p_ll, p_mt, ne.x, ne.y) # .. same for SE

    # Iterate over 2D area
    boxes = []
    min_lon = s[0]
    x = 0
    while min_lon < e[0]:
        max_lon = min_lon + stepsize
        min_lat = s[1]
        y = 0
    
        while min_lat < e[1]:
            max_lat = min_lat + stepsize
            b_left = shapely.geometry.Point(pyproj.transform(p_mt, p_ll, min_lon, min_lat))
            t_right = shapely.geometry.Point(pyproj.transform(p_mt, p_ll, max_lon, max_lat))
        
            bound_box = {'min_lat':b_left.y, 'max_lat':t_right.y, 'min_lon':b_left.x, 'max_lon':t_right.x, 'x': x, 'y':y}
        
            boxes.append(bound_box)
            min_lat = max_lat
            y += 1
        min_lon = max_lon
        x += 1
        
    boxes = pd.DataFrame(boxes)

    # find the center of each box
    boxes['center_lat'] = (boxes.min_lat + boxes.max_lat)/2
    boxes['center_lon'] = (boxes.min_lon + boxes.max_lon)/2
    
    base = gpd.read_file("bayarea.json")
    
    # map every box to whether it overlaps with the bay as defined by the shapefile
    boxes['in_water'] = [bay_and_ocean.contains(pt) for pt in boxes_as_points]

    # convert lat/lon to Point objects
    boxes_as_points = boxes.apply(lambda line: Point(line.center_lon, line.center_lat), axis = 1)
    
    return(boxes)

In [None]:
# save grid to csv file
boxes[['min_lat', 'max_lat', 'min_lon', 'max_lon', 'x', 'y', 'center_lat',
       'center_lon', 'in_water']].to_csv("500m_grid.csv", index = False)

In [None]:
boxes.head()

In [None]:
# create feature data at each center point of the grid
created, lat, lon, wind_data, wind_direction, wind_speed, gusts, gust_speed, variable_winds, epa_pm25_value, wkday, 
temperature, humidity, elevation, hour, month, timeofday_afternoon, timeofday_evening, timeofday_morning, timeofday_night,
daytype_Weekday, daytype_Weekend, compass_ERROR, compass_East, compass_Missing, compass_No wind, compass_North, 
compass_South, compass_West




In [None]:
# constants
sys.path.append("./HistoricalData/")
from getData import get_data

UP_LEFT = (38.008050, -122.536985)    
UP_RIGHT = (38.008050, -122.186437)   
DOWN_RIGHT = (37.701933, -122.186437) 
DOWN_LEFT = (37.701933, -122.536985)  
START_DATE = '2018/09/10' 
END_DATE = '2019/09/10'   
START_HOUR = '0'        
END_HOUR = '24'   

In [None]:
# load data into dataframe
data_df = get_data(UP_LEFT, UP_RIGHT, DOWN_RIGHT, DOWN_LEFT, START_DATE, END_DATE, START_HOUR, END_HOUR, 'Monthly')

In [None]:
# add elevation data
elev_df = pd.read_csv('VirtualSensing/sensor_elevations.csv', header='infer', float_precision='high')
elev_df = elev_df.drop(columns='resolution')
data_df = pd.merge(data_df, elev_df)
print("How many elevations are missing?", data_df.elevation.isna().sum())
print("Shape of the new dataframe:", data_df.shape)

In [None]:
# winnow down the features
columns_to_keep = ['created', 'lat', 'lon', 'wind_data', 'wind_direction', 'wind_speed', 'gusts', 'gust_speed', 
                   'variable_winds', 'variable_wind_info', 'epa_pm25_value', 'wkday', 
                   'daytype', 'timeofday', 'wind_compass', 'temperature', 'humidity', 'elevation', 'hour', 'month']
X_data_df = data_df[columns_to_keep]
y_data_df = data_df['2_5um']

In [None]:
X_data_df.wind_data = X_data_df.wind_data.astype(bool)
X_data_df.variable_winds = X_data_df.variable_winds.astype(bool)
X_data_df.gusts = X_data_df.gusts.astype(bool)
X_data_df.daytype = X_data_df.daytype.astype(str)
X_data_df.daytype = X_data_df.daytype.astype('category')
X_data_df.timeofday = X_data_df.timeofday.astype(str)
X_data_df.timeofday = X_data_df.timeofday.astype('category')
X_data_df.wind_compass = X_data_df.wind_compass.astype(str)
X_data_df.wind_compass = X_data_df.wind_compass.astype('category')
X_data_df.wkday = pd.to_numeric(X_data_df.wkday)
X_data_df.wkday = X_data_df.wkday.astype('category')
X_data_df.hour = X_data_df.hour.astype(int)
X_data_df.month = X_data_df.month.astype(int)

In [None]:
# handle variable winds missing values 
vrb_wind_range_readings = 0
mid_ranges = list()

for row in range(len(X_data_df)):
    if X_data_df.variable_winds.iloc[row]:
        info = X_data_df.variable_wind_info.iloc[row]
        if info:
            vrb_wind_range_readings += 1
            first, second = info.split('V')
            mid_range = int((int(first) + int(second)) / 2)
            if (X_data_df.wind_direction.iloc[row] == 'VRB'):
                X_data_df.wind_direction.lloc[row] = mid_range
            mid_ranges.append(mid_range)

replacement = statistics.mode(mid_ranges)
X_data_df = X_data_df.replace('VRB', replacement) # give variable wind the most frequent midpoint variable range
X_data_df = X_data_df.drop(columns = ['variable_wind_info'])

In [None]:
# handle missing wind values with means 
wind_direction_obs = pd.to_numeric(X_data_df.wind_direction.dropna())
wind_direction_avg = wind_direction_obs.mean()
wind_direction_avg = int(wind_direction_avg)
X_data_df.wind_direction = X_data_df.wind_direction.replace(np.nan, wind_direction_avg) # give missing wind direction the mean
X_data_df.wind_direction = X_data_df.wind_direction.replace('', wind_direction_avg)
X_data_df.wind_direction = X_data_df.wind_direction.astype(int)
wind_speed_obs = pd.to_numeric(X_data_df.wind_speed.dropna())
wind_speed_avg = wind_speed_obs.mean()
X_data_df.wind_speed = X_data_df.wind_speed.replace(np.nan, wind_speed_avg) # give missing wind speed the mean
X_data_df.wind_speed = X_data_df.wind_speed.replace('', wind_speed_avg)
X_data_df.gust_speed = X_data_df.gust_speed.replace(np.nan, 0)
X_data_df.gust_speed = X_data_df.gust_speed.replace('', 0)
X_data_df.epa_pm25_value = X_data_df.epa_pm25_value.replace(np.nan, X_data_df.epa_pm25_value.mean())
X_data_df.temperature = X_data_df.temperature.replace(np.nan, X_data_df.temperature.mean())
X_data_df.humidity = X_data_df.humidity.replace(np.nan, X_data_df.humidity.mean())

In [None]:
# one hot encode the categoricals
X_data_df = pd.concat([X_data_df,pd.get_dummies(X_data_df['timeofday'], prefix='timeofday')],axis=1)
X_data_df = pd.concat([X_data_df,pd.get_dummies(X_data_df['daytype'], prefix='daytype')], axis=1)
X_data_df = pd.concat([X_data_df,pd.get_dummies(X_data_df['wind_compass'], prefix='compass')], axis=1)

In [None]:
# remove the original columns that got one hot encoded, type the one hots as bools
X_data_df = X_data_df.drop(columns=['timeofday','daytype','wind_compass'])
X_data_df.timeofday_afternoon = X_data_df.timeofday_afternoon.astype(bool)
X_data_df.timeofday_evening = X_data_df.timeofday_evening.astype(bool)
X_data_df.timeofday_morning = X_data_df.timeofday_morning.astype(bool)
X_data_df.timeofday_night = X_data_df.timeofday_night.astype(bool)
X_data_df.daytype_Weekday = X_data_df.daytype_Weekday.astype(bool)
X_data_df.daytype_Weekend = X_data_df.daytype_Weekend.astype(bool)
X_data_df.compass_ERROR = X_data_df.compass_ERROR.astype(bool)
X_data_df.compass_East = X_data_df.compass_East.astype(bool)
X_data_df.compass_Missing = X_data_df.compass_Missing.astype(bool)
X_data_df['compass_No wind'] = X_data_df['compass_No wind'].astype(bool)
X_data_df.compass_North = X_data_df.compass_North.astype(bool)
X_data_df.compass_South = X_data_df.compass_South.astype(bool)
X_data_df.compass_West = X_data_df.compass_West.astype(bool)

In [None]:
# confirm no NAs at this point
for col in X_data_df.columns:
    testy = X_data_df[col]
    print(col, testy.isna().sum())