# Classifying tanks by whether or not tanks lie on floodplains (using spatial joins), and merging with National Risk Index Data

### Import statements

In [1]:
import pandas as pd
import geopandas as gpd



### Reading AST Data

In [2]:
df_tanks = gpd.read_file('/hpc/group/codeplus22-vis/celine_data/ast_master.shp')
df_tanks.head(n=3)

Unnamed: 0,state,tank_type,diameter,lat_t_4326,lon_t_4326,lat_t_3857,lon_t_3857,county,geometry
0,New York,closed_roof_tank,39.6,40.625572,-73.745231,-8209282.0,4957270.0,36059,POINT (-73.74523 40.62557)
1,New York,closed_roof_tank,19.8,40.624761,-73.74442,-8209191.0,4957151.0,36059,POINT (-73.74442 40.62476)
2,New York,closed_roof_tank,12.6,40.626086,-73.746257,-8209396.0,4957345.0,36059,POINT (-73.74626 40.62609)


### Using ```.sjoin()``` to classify tanks by whether or not they are on a floodplain

#### Reading floodplain data from the Federal Emergency Management Agency (FEMA)
Then filtering for only the column we need (geometry) to find which tanks lie on a floodplain.

In [4]:
df_floodplains = gpd.read_file('/hpc/group/codeplus22-vis/celine_data/source_files/nat_hazard_data/NFHL/NFHL_Key_Layers.gdb')
df_floodplains

Unnamed: 0,DFIRM_ID,VERSION_ID,BFE_LN_ID,ELEV,LEN_UNIT,V_DATUM,SOURCE_CIT,GFID,Shape_Length,geometry
0,01001C,1.1.1.0,01001C_835,209.0,Feet,NAVD88,01001C_STUDY1,20140910,0.000081,"MULTILINESTRING ((-86.60878 32.46751, -86.6088..."
1,01001C,1.1.1.0,01001C_693,207.0,Feet,NAVD88,01001C_STUDY1,20140910,0.003936,"MULTILINESTRING ((-86.49836 32.47248, -86.4983..."
2,01001C,1.1.1.0,01001C_680,209.0,Feet,NAVD88,01001C_STUDY1,20140910,0.003361,"MULTILINESTRING ((-86.49785 32.47265, -86.4972..."
3,01001C,1.1.1.0,01001C_726,307.0,Feet,NAVD88,01001C_STUDY1,20140910,0.000544,"MULTILINESTRING ((-86.44600 32.48021, -86.4460..."
4,01001C,1.1.1.0,01001C_600,159.0,Feet,NAVD88,01001C_STUDY1,20140910,0.012341,"MULTILINESTRING ((-86.56813 32.44133, -86.5765..."
...,...,...,...,...,...,...,...,...,...,...
1573946,780000,1.1.1.0,780000_200,40.0,Feet,LOCAL TIDAL DATUM,780000_FIS1,c4d2dc2f-44fb-4578-8aaf-b595d5af4abb,0.000297,"MULTILINESTRING ((-64.83159 17.69516, -64.8313..."
1573947,780000,1.1.1.0,780000_183,46.0,Feet,LOCAL TIDAL DATUM,780000_FIS1,c4d2dc2f-44fb-4578-8aaf-b595d5af4abb,0.000908,"MULTILINESTRING ((-64.71085 17.74271, -64.7101..."
1573948,780000,1.1.1.0,780000_142,54.0,Feet,LOCAL TIDAL DATUM,780000_FIS1,c4d2dc2f-44fb-4578-8aaf-b595d5af4abb,0.000563,"MULTILINESTRING ((-64.82895 17.69961, -64.8285..."
1573949,780000,1.1.1.0,780000_260,20.0,Feet,LOCAL TIDAL DATUM,780000_FIS1,c4d2dc2f-44fb-4578-8aaf-b595d5af4abb,0.000938,"MULTILINESTRING ((-64.87432 17.71066, -64.8739..."


In [5]:
df_floodplains = df_floodplains[['geometry']]
df_floodplains

Unnamed: 0,geometry
0,"MULTILINESTRING ((-86.60878 32.46751, -86.6088..."
1,"MULTILINESTRING ((-86.49836 32.47248, -86.4983..."
2,"MULTILINESTRING ((-86.49785 32.47265, -86.4972..."
3,"MULTILINESTRING ((-86.44600 32.48021, -86.4460..."
4,"MULTILINESTRING ((-86.56813 32.44133, -86.5765..."
...,...
1573946,"MULTILINESTRING ((-64.83159 17.69516, -64.8313..."
1573947,"MULTILINESTRING ((-64.71085 17.74271, -64.7101..."
1573948,"MULTILINESTRING ((-64.82895 17.69961, -64.8285..."
1573949,"MULTILINESTRING ((-64.87432 17.71066, -64.8739..."


#### Using the GeoPandas buffer function
This way, tanks within 200 meters from either side of the floodplain will be marked as near a floodplain. As The floodplain data is given in linestring or multilinestring geometries, so buffering it provides a more generalized understanding of the tanks that are near areas of flooding risk, not only the ones directly in the line of risk.

In order to buffer the geometries by 200 meters, it is necessary to convert the coordinate system of the dataframe to EPSG 3857, as the unit of measurement for this coordinate system is the meter. The final coordinate system conversion sets the floodplain dataframe to the same coordinate system as the tank dataframe. This consistency is key in the next few steps. 

In [6]:
%%time
df_floodplains = df_floodplains.to_crs("EPSG:3857")
df_floodplains = df_floodplains.buffer(200)
df_floodplains= df_floodplains.to_crs("EPSG:4326")
df_floodplains

CPU times: user 1min 7s, sys: 3.08 s, total: 1min 10s
Wall time: 1min 10s


0          POLYGON ((-86.60927 32.46605, -86.60944 32.466...
1          POLYGON ((-86.49688 32.47334, -86.49637 32.472...
2          POLYGON ((-86.49868 32.47109, -86.49926 32.471...
3          POLYGON ((-86.44780 32.48028, -86.44783 32.480...
4          POLYGON ((-86.57656 32.43987, -86.57658 32.439...
                                 ...                        
1573946    POLYGON ((-64.83182 17.69687, -64.83177 17.696...
1573947    POLYGON ((-64.70898 17.74343, -64.70885 17.743...
1573948    POLYGON ((-64.82800 17.70113, -64.82789 17.701...
1573949    POLYGON ((-64.87515 17.71228, -64.87514 17.712...
1573950    POLYGON ((-64.88165 18.32657, -64.88154 18.326...
Length: 1573951, dtype: geometry

The buffer function transforms the geopandas dataframe into a geoseries, but to find which tanks lie on or near floodplains, it is necessary to convert this geoseries into a GeoDataFrame.

In [7]:
gdf_floodplains = gpd.GeoDataFrame(df_floodplains)
gdf_floodplains.rename(columns = {0: 'geometry'}, inplace = True)
gdf_floodplains

Unnamed: 0,geometry
0,"POLYGON ((-86.60927 32.46605, -86.60944 32.466..."
1,"POLYGON ((-86.49688 32.47334, -86.49637 32.472..."
2,"POLYGON ((-86.49868 32.47109, -86.49926 32.471..."
3,"POLYGON ((-86.44780 32.48028, -86.44783 32.480..."
4,"POLYGON ((-86.57656 32.43987, -86.57658 32.439..."
...,...
1573946,"POLYGON ((-64.83182 17.69687, -64.83177 17.696..."
1573947,"POLYGON ((-64.70898 17.74343, -64.70885 17.743..."
1573948,"POLYGON ((-64.82800 17.70113, -64.82789 17.701..."
1573949,"POLYGON ((-64.87515 17.71228, -64.87514 17.712..."


#### Finding the tanks that lie on/near floodplains using the sjoin() function

The GeoPandas ```.sjoin()``` function performs a spatial join of two GeoDataFrames. In this case, the predicate parameter input 'intersects' means that the output is a new GeoDataFrame containing only the rows in one GeoDataFrame whose geometries intersected those in the other GeoDataFrame. In other words, function outputs a GeoDataFrame containing only the tanks that lie within 200 meters of a floodplain.

In [8]:
%%time
df_intersect = gpd.sjoin(df_tanks, gdf_floodplains, how='inner', predicate='intersects')
df_intersect.head()

CPU times: user 1.98 s, sys: 23.9 ms, total: 2 s
Wall time: 2.01 s


Unnamed: 0,state,tank_type,diameter,lat_t_4326,lon_t_4326,lat_t_3857,lon_t_3857,county,geometry,index_right
172,Massachusetts,closed_roof_tank,38.4,42.174907,-72.459486,-8066153.0,5187216.0,25013,POINT (-72.45949 42.17491),504383
223,Minnesota,closed_roof_tank,56.4,44.460883,-95.789712,-10663260.0,5537044.0,27083,POINT (-95.78971 44.46088),542577
296,Minnesota,closed_roof_tank,11.4,44.460841,-95.791086,-10663410.0,5537038.0,27083,POINT (-95.79109 44.46084),542577
297,Minnesota,closed_roof_tank,10.8,44.460674,-95.791176,-10663420.0,5537012.0,27083,POINT (-95.79118 44.46067),542577
299,Minnesota,narrow_closed_roof_tank,7.2,44.460757,-95.791131,-10663420.0,5537024.0,27083,POINT (-95.79113 44.46076),542577


Drop tanks with the same latitude and longitude, which are therefore duplicates:

In [9]:
df_intersect = df_intersect.drop_duplicates(subset = ['lat_t_4326', 'lon_t_4326'])
df_intersect.head()

Unnamed: 0,state,tank_type,diameter,lat_t_4326,lon_t_4326,lat_t_3857,lon_t_3857,county,geometry,index_right
172,Massachusetts,closed_roof_tank,38.4,42.174907,-72.459486,-8066153.0,5187216.0,25013,POINT (-72.45949 42.17491),504383
223,Minnesota,closed_roof_tank,56.4,44.460883,-95.789712,-10663260.0,5537044.0,27083,POINT (-95.78971 44.46088),542577
296,Minnesota,closed_roof_tank,11.4,44.460841,-95.791086,-10663410.0,5537038.0,27083,POINT (-95.79109 44.46084),542577
297,Minnesota,closed_roof_tank,10.8,44.460674,-95.791176,-10663420.0,5537012.0,27083,POINT (-95.79118 44.46067),542577
299,Minnesota,narrow_closed_roof_tank,7.2,44.460757,-95.791131,-10663420.0,5537024.0,27083,POINT (-95.79113 44.46076),542577


#### Using ```df_intersect``` to classify the tanks in the original dataset

In [10]:
idx = list(df_intersect.index.values)

In [11]:
%%time
df_tanks['on_floodplain'] = 0

for num in idx:
    df_tanks['on_floodplain'].iloc[num] = 1
    
df_tanks

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


CPU times: user 567 ms, sys: 871 µs, total: 568 ms
Wall time: 568 ms


Unnamed: 0,state,tank_type,diameter,lat_t_4326,lon_t_4326,lat_t_3857,lon_t_3857,county,geometry,on_floodplain
0,New York,closed_roof_tank,39.6,40.625572,-73.745231,-8.209282e+06,4.957270e+06,36059,POINT (-73.74523 40.62557),0
1,New York,closed_roof_tank,19.8,40.624761,-73.744420,-8.209191e+06,4.957151e+06,36059,POINT (-73.74442 40.62476),0
2,New York,closed_roof_tank,12.6,40.626086,-73.746257,-8.209396e+06,4.957345e+06,36059,POINT (-73.74626 40.62609),0
3,New York,closed_roof_tank,30.6,40.625786,-73.746203,-8.209390e+06,4.957301e+06,36059,POINT (-73.74620 40.62579),0
4,New York,closed_roof_tank,24.0,40.625781,-73.745813,-8.209346e+06,4.957300e+06,36059,POINT (-73.74581 40.62578),0
...,...,...,...,...,...,...,...,...,...,...
98164,Colorado,narrow_closed_roof_tank,5.4,39.777431,-104.920718,-1.167972e+07,4.833652e+06,08031,POINT (-104.92072 39.77743),0
98165,Colorado,narrow_closed_roof_tank,4.8,39.777301,-104.920631,-1.167971e+07,4.833633e+06,08031,POINT (-104.92063 39.77730),0
98166,Colorado,narrow_closed_roof_tank,3.6,39.777701,-104.920609,-1.167971e+07,4.833691e+06,08031,POINT (-104.92061 39.77770),0
98167,Colorado,narrow_closed_roof_tank,4.8,39.776628,-104.920617,-1.167971e+07,4.833535e+06,08031,POINT (-104.92062 39.77663),0


Now, our original tanks dataframe, ```df_tanks``` has a column indicating whether or not that tank is near a floodplain.

### Reading National Risk Index Data, taken from FEMA
We want to classify each tank by its risk from a variety of natural hazards. To do this, we read in NRI data, filter for only the columns we want, as stipulated by our researcher, and rename them for standardization purposes.

In [12]:
df_nri = gpd.read_file('/hpc/group/codeplus22-vis/celine_data/source_files/nat_hazard_data/NRI_Data/NRI_GDB_Counties/NRI_GDB_Counties.gdb')
df_nri.head()

Unnamed: 0,NRI_ID,STATE,STATEABBRV,STATEFIPS,COUNTY,COUNTYTYPE,COUNTYFIPS,STCOFIPS,POPULATION,BUILDVALUE,...,WNTW_EALA,WNTW_EALT,WNTW_EALS,WNTW_EALR,WNTW_RISKS,WNTW_RISKR,NRI_VER,Shape_Length,Shape_Area,geometry
0,C21115,Kentucky,KY,21,Johnson,County,115,21115,23356,1924008000.0,...,4.235939,47363.199731,19.448529,Relatively Moderate,14.131237,Relatively Low,November 2021,190441.334565,1098944000.0,"MULTIPOLYGON (((-9196369.959 4562386.043, -919..."
1,C21117,Kentucky,KY,21,Kenton,County,117,21117,159720,18773380000.0,...,44.606252,64259.532691,21.530408,Relatively Moderate,12.47004,Relatively Low,November 2021,140730.907028,704249200.0,"MULTIPOLYGON (((-9407183.321 4735315.123, -940..."
2,C21119,Kentucky,KY,21,Knott,County,119,21119,16346,1170376000.0,...,0.023091,30809.75462,16.851393,Relatively Low,14.46627,Relatively Low,November 2021,211206.226178,1448900000.0,"MULTIPOLYGON (((-9233790.126 4509476.801, -923..."
3,C21121,Kentucky,KY,21,Knox,County,121,21121,31883,2135773000.0,...,0.082573,61427.308851,21.209328,Relatively Moderate,19.585915,Relatively Moderate,November 2021,237214.255701,1572984000.0,"MULTIPOLYGON (((-9305143.376 4432946.710, -930..."
4,C21123,Kentucky,KY,21,Larue,County,123,21123,14193,1221343000.0,...,246.668438,12870.385216,12.597091,Relatively Low,7.715952,Very Low,November 2021,226736.66586,1088060000.0,"MULTIPOLYGON (((-9520186.985 4516660.323, -952..."


In [13]:
df_nri = df_nri[['STCOFIPS', 'ERQK_RISKS', 'SWND_RISKS', 'HRCN_RISKS', 'TRND_RISKS', 'CFLD_RISKS', 'RFLD_RISKS']]
df_nri.rename(columns = {'STCOFIPS': 'county', 'ERQK_RISKS': 'erqk_risks', 'SWND_RISKS': 'swnd_risks', 'HRCN_RISKS': 'hrcn_risks', 
                         'TRND_RISKS': 'trnd_risks', 'CFLD_RISKS': 'cfld_risks', 'RFLD_RISKS': 'rfld_risks'}, inplace = True)
df_nri

Unnamed: 0,county,erqk_risks,swnd_risks,hrcn_risks,trnd_risks,cfld_risks,rfld_risks
0,21115,1.953248,10.756017,1.668058,9.136885,,14.575572
1,21117,3.346640,19.688303,1.875497,24.280149,,8.279166
2,21119,2.281739,12.431024,1.129109,10.174559,,8.755275
3,21121,4.385020,17.589118,1.962140,19.273345,,14.443835
4,21123,2.042402,11.899304,2.473315,9.216597,,4.055177
...,...,...,...,...,...,...,...
3137,56037,2.070342,2.848189,,2.191509,,3.318171
3138,56039,4.292420,3.143585,,6.133900,,2.734316
3139,56041,3.206560,4.959357,,4.118598,,3.201339
3140,56043,3.156933,6.009518,,8.577072,,4.954794


Then, we merge this ```df_nri``` dataframe with our ```df_tanks``` dataframe, based on the ```county``` column. Therefore, tanks are associated with risks from each natural hazard based on the county in which it is located.

### Merging AST and NRI data using pandas' ```.merge()```

In [14]:
df_tank_risks = df_tanks.merge(df_nri, on = 'county', how = 'left')
df_tank_risks.head()

Unnamed: 0,state,tank_type,diameter,lat_t_4326,lon_t_4326,lat_t_3857,lon_t_3857,county,geometry,on_floodplain,erqk_risks,swnd_risks,hrcn_risks,trnd_risks,cfld_risks,rfld_risks
0,New York,closed_roof_tank,39.6,40.625572,-73.745231,-8209282.0,4957270.0,36059,POINT (-73.74523 40.62557),0,6.887656,14.447002,4.095282,13.081208,6.959016,14.834784
1,New York,closed_roof_tank,19.8,40.624761,-73.74442,-8209191.0,4957151.0,36059,POINT (-73.74442 40.62476),0,6.887656,14.447002,4.095282,13.081208,6.959016,14.834784
2,New York,closed_roof_tank,12.6,40.626086,-73.746257,-8209396.0,4957345.0,36059,POINT (-73.74626 40.62609),0,6.887656,14.447002,4.095282,13.081208,6.959016,14.834784
3,New York,closed_roof_tank,30.6,40.625786,-73.746203,-8209390.0,4957301.0,36059,POINT (-73.74620 40.62579),0,6.887656,14.447002,4.095282,13.081208,6.959016,14.834784
4,New York,closed_roof_tank,24.0,40.625781,-73.745813,-8209346.0,4957300.0,36059,POINT (-73.74581 40.62578),0,6.887656,14.447002,4.095282,13.081208,6.959016,14.834784


### Calculating average risk and handling NaN values
We noticed the NRI data had a significant amount of NaN values, indicated that there is no information for that cell. Dropping all the rows with NaN values would eliminate two-thirds of our data. However, we noticed that NaN values were generally inputted for counties that had little to no risk for that specific natural hazards- counties in the center of the US had NaN values for the coastal flooding risk, for example. Therefore, after discussion with our researcher, we decided to calculate the average risk using ```0``` as the value for all NaN values, but then fill all the NaN values as ```-1```, an implausible number, to indicate in our visualizations that these values were unavailable.

To do this, we made a copy of the original ```df_tank_risks```, which had all the tank information, with natural hazard risks associated to each tank. Then, we filled NaN values of the copy of that dataframe, ```df_tank_risks_calc```, with the value ```0```. This is the dataframe we used to calculate the average risk for each tank, by adding all the risk indices and dividing it by the number of natural hazards (6). We also calculated ```adj_risk```, which is the average risk for the tank, adjusted for whether or not that tank lies near a floodplain. For this column, we added five points to the ```avg_risk``` if the tank was near a floodplain, using the ```on_floodplain``` column.

We also dropped all columns other than ```avg_risk``` and ```adj_risk```, because we will then merge this dataframe with the original ```df_tank_risks``` dataframe so that for each tank, we have risk indices for each individual natural hazards, along with these computed risks.

In [18]:
df_tank_risks_calc = df_tank_risks
df_tank_risks_calc = df_tank_risks_calc.fillna(0)
df_tank_risks_calc['avg_risk'] = (df_tank_risks_calc['erqk_risks'] + df_tank_risks_calc['swnd_risks'] + 
                               df_tank_risks_calc['hrcn_risks'] + df_tank_risks_calc['trnd_risks'] + 
                               df_tank_risks_calc['cfld_risks'] + df_tank_risks_calc['rfld_risks']) / 6
df_tank_risks_calc['adj_risk'] = df_tank_risks_calc['avg_risk'] + (5 * df_tank_risks_calc['on_floodplain'])
df_tank_risks_calc = df_tank_risks_calc[['avg_risk', 'adj_risk']]
df_tank_risks_calc

Unnamed: 0,avg_risk,adj_risk
0,10.050825,10.050825
1,10.050825,10.050825
2,10.050825,10.050825
3,10.050825,10.050825
4,10.050825,10.050825
...,...,...
98164,12.051158,12.051158
98165,12.051158,12.051158
98166,12.051158,12.051158
98167,12.051158,12.051158


In [20]:
tank_risks_merged = pd.merge(df_tank_risks, df_tank_risks_calc, left_index = True, right_index = True)
tank_risks_merged

Unnamed: 0,state,tank_type,diameter,lat_t_4326,lon_t_4326,lat_t_3857,lon_t_3857,county,geometry,on_floodplain,erqk_risks,swnd_risks,hrcn_risks,trnd_risks,cfld_risks,rfld_risks,avg_risk,adj_risk
0,New York,closed_roof_tank,39.6,40.625572,-73.745231,-8.209282e+06,4.957270e+06,36059,POINT (-73.74523 40.62557),0,6.887656,14.447002,4.095282,13.081208,6.959016,14.834784,10.050825,10.050825
1,New York,closed_roof_tank,19.8,40.624761,-73.744420,-8.209191e+06,4.957151e+06,36059,POINT (-73.74442 40.62476),0,6.887656,14.447002,4.095282,13.081208,6.959016,14.834784,10.050825,10.050825
2,New York,closed_roof_tank,12.6,40.626086,-73.746257,-8.209396e+06,4.957345e+06,36059,POINT (-73.74626 40.62609),0,6.887656,14.447002,4.095282,13.081208,6.959016,14.834784,10.050825,10.050825
3,New York,closed_roof_tank,30.6,40.625786,-73.746203,-8.209390e+06,4.957301e+06,36059,POINT (-73.74620 40.62579),0,6.887656,14.447002,4.095282,13.081208,6.959016,14.834784,10.050825,10.050825
4,New York,closed_roof_tank,24.0,40.625781,-73.745813,-8.209346e+06,4.957300e+06,36059,POINT (-73.74581 40.62578),0,6.887656,14.447002,4.095282,13.081208,6.959016,14.834784,10.050825,10.050825
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98164,Colorado,narrow_closed_roof_tank,5.4,39.777431,-104.920718,-1.167972e+07,4.833652e+06,08031,POINT (-104.92072 39.77743),0,7.743007,12.625942,,45.758161,,6.179840,12.051158,12.051158
98165,Colorado,narrow_closed_roof_tank,4.8,39.777301,-104.920631,-1.167971e+07,4.833633e+06,08031,POINT (-104.92063 39.77730),0,7.743007,12.625942,,45.758161,,6.179840,12.051158,12.051158
98166,Colorado,narrow_closed_roof_tank,3.6,39.777701,-104.920609,-1.167971e+07,4.833691e+06,08031,POINT (-104.92061 39.77770),0,7.743007,12.625942,,45.758161,,6.179840,12.051158,12.051158
98167,Colorado,narrow_closed_roof_tank,4.8,39.776628,-104.920617,-1.167971e+07,4.833535e+06,08031,POINT (-104.92062 39.77663),0,7.743007,12.625942,,45.758161,,6.179840,12.051158,12.051158


Finally, we fill in the NaN values for the risk data with -1, as discussed previously, and save this dataframe as a shapefile.

In [21]:
values = {"erqk_risks": -1, "swnd_risks": -1, "hrcn_risks": -1, "trnd_risks": -1, "cfld_risks": -1, "rfld_risks": -1}
tank_risks_merged = tank_risks_merged.fillna(value=values)
tank_risks_merged

Unnamed: 0,state,tank_type,diameter,lat_t_4326,lon_t_4326,lat_t_3857,lon_t_3857,county,geometry,on_floodplain,erqk_risks,swnd_risks,hrcn_risks,trnd_risks,cfld_risks,rfld_risks,avg_risk,adj_risk
0,New York,closed_roof_tank,39.6,40.625572,-73.745231,-8.209282e+06,4.957270e+06,36059,POINT (-73.74523 40.62557),0,6.887656,14.447002,4.095282,13.081208,6.959016,14.834784,10.050825,10.050825
1,New York,closed_roof_tank,19.8,40.624761,-73.744420,-8.209191e+06,4.957151e+06,36059,POINT (-73.74442 40.62476),0,6.887656,14.447002,4.095282,13.081208,6.959016,14.834784,10.050825,10.050825
2,New York,closed_roof_tank,12.6,40.626086,-73.746257,-8.209396e+06,4.957345e+06,36059,POINT (-73.74626 40.62609),0,6.887656,14.447002,4.095282,13.081208,6.959016,14.834784,10.050825,10.050825
3,New York,closed_roof_tank,30.6,40.625786,-73.746203,-8.209390e+06,4.957301e+06,36059,POINT (-73.74620 40.62579),0,6.887656,14.447002,4.095282,13.081208,6.959016,14.834784,10.050825,10.050825
4,New York,closed_roof_tank,24.0,40.625781,-73.745813,-8.209346e+06,4.957300e+06,36059,POINT (-73.74581 40.62578),0,6.887656,14.447002,4.095282,13.081208,6.959016,14.834784,10.050825,10.050825
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98164,Colorado,narrow_closed_roof_tank,5.4,39.777431,-104.920718,-1.167972e+07,4.833652e+06,08031,POINT (-104.92072 39.77743),0,7.743007,12.625942,-1.000000,45.758161,-1.000000,6.179840,12.051158,12.051158
98165,Colorado,narrow_closed_roof_tank,4.8,39.777301,-104.920631,-1.167971e+07,4.833633e+06,08031,POINT (-104.92063 39.77730),0,7.743007,12.625942,-1.000000,45.758161,-1.000000,6.179840,12.051158,12.051158
98166,Colorado,narrow_closed_roof_tank,3.6,39.777701,-104.920609,-1.167971e+07,4.833691e+06,08031,POINT (-104.92061 39.77770),0,7.743007,12.625942,-1.000000,45.758161,-1.000000,6.179840,12.051158,12.051158
98167,Colorado,narrow_closed_roof_tank,4.8,39.776628,-104.920617,-1.167971e+07,4.833535e+06,08031,POINT (-104.92062 39.77663),0,7.743007,12.625942,-1.000000,45.758161,-1.000000,6.179840,12.051158,12.051158


In [26]:
tank_risks_merged.to_file('/hpc/group/codeplus22-vis/celine_data/tanks_risk_score.shp')