# Classifying tanks by county

### Importing packages and libraries

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd
import geopandas as gpd

### Reading AST Data
Has additional data such as whether or not the tank is on a fault line or a flood plain.

In [3]:
df_tanks = gpd.read_file('/hpc/group/codeplus22-vis/infousa_copy/ast_master.shp')

In [4]:
df_tanks

Unnamed: 0,state,tank_type,diameter,lat_t_4326,lon_t_4326,lat_t_3857,lon_t_3857,geometry
0,New York,closed_roof_tank,39.6,40.625572,-73.745231,-8.209282e+06,4.957270e+06,"POLYGON ((-73.74547 40.62575, -73.74500 40.625..."
1,New York,closed_roof_tank,19.8,40.624761,-73.744420,-8.209191e+06,4.957151e+06,"POLYGON ((-73.74465 40.62485, -73.74419 40.624..."
2,New York,closed_roof_tank,12.6,40.626086,-73.746257,-8.209396e+06,4.957345e+06,"POLYGON ((-73.74633 40.62615, -73.74618 40.626..."
3,New York,closed_roof_tank,30.6,40.625786,-73.746203,-8.209390e+06,4.957301e+06,"POLYGON ((-73.74639 40.62593, -73.74601 40.625..."
4,New York,closed_roof_tank,24.0,40.625781,-73.745813,-8.209346e+06,4.957300e+06,"POLYGON ((-73.74595 40.62590, -73.74567 40.625..."
...,...,...,...,...,...,...,...,...
98164,Colorado,narrow_closed_roof_tank,5.4,39.777431,-104.920718,-1.167972e+07,4.833652e+06,"POLYGON ((-104.92075 39.77746, -104.92069 39.7..."
98165,Colorado,narrow_closed_roof_tank,4.8,39.777301,-104.920631,-1.167971e+07,4.833633e+06,"POLYGON ((-104.92066 39.77732, -104.92060 39.7..."
98166,Colorado,narrow_closed_roof_tank,3.6,39.777701,-104.920609,-1.167971e+07,4.833691e+06,"POLYGON ((-104.92064 39.77772, -104.92058 39.7..."
98167,Colorado,narrow_closed_roof_tank,4.8,39.776628,-104.920617,-1.167971e+07,4.833535e+06,"POLYGON ((-104.92065 39.77665, -104.92059 39.7..."


### Reading in a county shapefile
So that we can determine which tanks lie in each county. Filter for counties in states or territories where there are no tanks (Alaska, Hawaii, Puerto Rico, Virgin Islands, American Samoa, Guam, Northern Marian Islands).

In [5]:
df_counties = gpd.read_file('/hpc/group/codeplus22-vis/county_shp_files/us_counties.shp')

In [6]:
df_counties = df_counties[((df_counties['STATEFP'] != '02') & (df_counties['STATEFP'] != '15') & 
                          (df_counties['STATEFP'] != '72') & (df_counties['STATEFP'] != '78') &
                          (df_counties['STATEFP'] != '60') & (df_counties['STATEFP'] != '66') &
                          (df_counties['STATEFP'] != '69'))]
df_counties.head()

Unnamed: 0,STATEFP,COUNTYFP,COUNTYNS,AFFGEOID,GEOID,NAME,LSAD,ALAND,AWATER,geometry
0,21,7,516850,0500000US21007,21007,Ballard,6,639387454,69473325,"POLYGON ((-89.18137 37.04630, -89.17938 37.053..."
1,21,17,516855,0500000US21017,21017,Bourbon,6,750439351,4829777,"POLYGON ((-84.44266 38.28324, -84.44114 38.283..."
2,21,31,516862,0500000US21031,21031,Butler,6,1103571974,13943044,"POLYGON ((-86.94486 37.07341, -86.94346 37.074..."
3,21,65,516879,0500000US21065,21065,Estill,6,655509930,6516335,"POLYGON ((-84.12662 37.64540, -84.12483 37.646..."
4,21,69,516881,0500000US21069,21069,Fleming,6,902727151,7182793,"POLYGON ((-83.98428 38.44549, -83.98246 38.450..."


### Iterating through each county and finding the tanks in that county
Then adding a column in the tanks dataset for county

In [7]:
%%time

df_tanks['county'] = ''

for i in range(0, len(df_counties)):
    county = df_counties.iloc[i] ## finding county
    frame = county.to_frame() ## making county to a dataframe
    row = gpd.GeoDataFrame(frame.T) ## transforming pandas df to geopandas df
    df_intersect = gpd.sjoin(df_tanks, row, how='inner', predicate='intersects') ## finding tanks in that county
    idx = list(df_intersect.index.values) ## finding indices of those tanks
    for num in idx: ## looping over those indices 
        df_tanks['county'].iloc[idx] = row.iloc[0]['GEOID']

df_tanks

CPU times: user 3min 32s, sys: 0 ns, total: 3min 32s
Wall time: 3min 34s


Unnamed: 0,state,tank_type,diameter,lat_t_4326,lon_t_4326,lat_t_3857,lon_t_3857,geometry,county
0,New York,closed_roof_tank,39.6,40.625572,-73.745231,-8.209282e+06,4.957270e+06,"POLYGON ((-73.74547 40.62575, -73.74500 40.625...",36059
1,New York,closed_roof_tank,19.8,40.624761,-73.744420,-8.209191e+06,4.957151e+06,"POLYGON ((-73.74465 40.62485, -73.74419 40.624...",36059
2,New York,closed_roof_tank,12.6,40.626086,-73.746257,-8.209396e+06,4.957345e+06,"POLYGON ((-73.74633 40.62615, -73.74618 40.626...",36059
3,New York,closed_roof_tank,30.6,40.625786,-73.746203,-8.209390e+06,4.957301e+06,"POLYGON ((-73.74639 40.62593, -73.74601 40.625...",36059
4,New York,closed_roof_tank,24.0,40.625781,-73.745813,-8.209346e+06,4.957300e+06,"POLYGON ((-73.74595 40.62590, -73.74567 40.625...",36059
...,...,...,...,...,...,...,...,...,...
98164,Colorado,narrow_closed_roof_tank,5.4,39.777431,-104.920718,-1.167972e+07,4.833652e+06,"POLYGON ((-104.92075 39.77746, -104.92069 39.7...",08031
98165,Colorado,narrow_closed_roof_tank,4.8,39.777301,-104.920631,-1.167971e+07,4.833633e+06,"POLYGON ((-104.92066 39.77732, -104.92060 39.7...",08031
98166,Colorado,narrow_closed_roof_tank,3.6,39.777701,-104.920609,-1.167971e+07,4.833691e+06,"POLYGON ((-104.92064 39.77772, -104.92058 39.7...",08031
98167,Colorado,narrow_closed_roof_tank,4.8,39.776628,-104.920617,-1.167971e+07,4.833535e+06,"POLYGON ((-104.92065 39.77665, -104.92059 39.7...",08031


### Export as shapefile

In [9]:
# df_tanks.to_file('/hpc/group/codeplus22-vis/infousa_copy/tanks_fips.shp')
df_tanks.to_file('/hpc/group/codeplus22-vis/infousa_copy/ast_master.shp')

In [10]:
# df = gpd.read_file('/hpc/group/codeplus22-vis/infousa_copy/tanks_fips.shp')
df_tanks = gpd.read_file('/hpc/group/codeplus22-vis/infousa_copy/ast_master.shp')
df_tanks

Unnamed: 0,state,tank_type,diameter,lat_t_4326,lon_t_4326,lat_t_3857,lon_t_3857,county,geometry
0,New York,closed_roof_tank,39.6,40.625572,-73.745231,-8.209282e+06,4.957270e+06,36059,"POLYGON ((-73.74547 40.62575, -73.74500 40.625..."
1,New York,closed_roof_tank,19.8,40.624761,-73.744420,-8.209191e+06,4.957151e+06,36059,"POLYGON ((-73.74465 40.62485, -73.74419 40.624..."
2,New York,closed_roof_tank,12.6,40.626086,-73.746257,-8.209396e+06,4.957345e+06,36059,"POLYGON ((-73.74633 40.62615, -73.74618 40.626..."
3,New York,closed_roof_tank,30.6,40.625786,-73.746203,-8.209390e+06,4.957301e+06,36059,"POLYGON ((-73.74639 40.62593, -73.74601 40.625..."
4,New York,closed_roof_tank,24.0,40.625781,-73.745813,-8.209346e+06,4.957300e+06,36059,"POLYGON ((-73.74595 40.62590, -73.74567 40.625..."
...,...,...,...,...,...,...,...,...,...
98164,Colorado,narrow_closed_roof_tank,5.4,39.777431,-104.920718,-1.167972e+07,4.833652e+06,08031,"POLYGON ((-104.92075 39.77746, -104.92069 39.7..."
98165,Colorado,narrow_closed_roof_tank,4.8,39.777301,-104.920631,-1.167971e+07,4.833633e+06,08031,"POLYGON ((-104.92066 39.77732, -104.92060 39.7..."
98166,Colorado,narrow_closed_roof_tank,3.6,39.777701,-104.920609,-1.167971e+07,4.833691e+06,08031,"POLYGON ((-104.92064 39.77772, -104.92058 39.7..."
98167,Colorado,narrow_closed_roof_tank,4.8,39.776628,-104.920617,-1.167971e+07,4.833535e+06,08031,"POLYGON ((-104.92065 39.77665, -104.92059 39.7..."
