In [6]:
import pandas as pd
import os
import sys
import boto3
import io
import geopandas as gpd

# suppress pandas purely educational warnings
from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

sys.path.append(os.path.expanduser('../../'))
from scripts.utils.file_helpers import pull_gpkg_from_directory, upload_csv_aws, pull_csv_from_directory
from scripts.utils.write_metadata import append_metadata

In [3]:
# pull csv from aws
bucket_name = 'ca-climate-index'
aws_dir = '2b_reproject/governance/emergency_response/usgs/'

pull_gpkg_from_directory(bucket_name, aws_dir)

Saved GeoPackage as 'governance_usgs_fire_stations.gpkg' locally


In [35]:
fire_station_data = gpd.read_file('governance_usgs_fire_stations.gpkg')
fire_station_data.columns

Index(['USCB_STATEFP', 'USCB_COUNTYFP', 'USCB_TRACTCE', 'USCB_GEOID',
       'USCB_NAME', 'USCB_NAMELSAD', 'USCB_MTFCC', 'USCB_FUNCSTAT',
       'USCB_ALAND', 'USCB_AWATER', 'USCB_INTPTLAT', 'USCB_INTPTLON',
       'geometry'],
      dtype='object')

In [36]:
fire_station_data = fire_station_data.rename(columns={'USCB_COUNTYFP':'countyfp'})
fire_station_data


Unnamed: 0,USCB_STATEFP,countyfp,USCB_TRACTCE,USCB_GEOID,USCB_NAME,USCB_NAMELSAD,USCB_MTFCC,USCB_FUNCSTAT,USCB_ALAND,USCB_AWATER,USCB_INTPTLAT,USCB_INTPTLON,geometry
0,06,037,401902,06037401902,4019.02,Census Tract 4019.02,G5020,S,2656563,3536,+34.1011641,-117.7249135,POINT (-117.72358 34.09620)
1,06,037,400205,06037400205,4002.05,Census Tract 4002.05,G5020,S,23546199,341777,+34.1521556,-117.7176795,POINT (-117.70773 34.13628)
2,06,037,980013,06037980013,9800.13,Census Tract 9800.13,G5020,S,5308102,0,+33.9164970,-118.3871461,POINT (-118.38491 33.92406)
3,06,037,980013,06037980013,9800.13,Census Tract 9800.13,G5020,S,5308102,0,+33.9164970,-118.3871461,POINT (-118.37953 33.92064)
4,06,115,040901,06115040901,409.01,Census Tract 409.01,G5020,S,263666616,4473094,+39.1852102,-121.3585368,POINT (-121.28916 39.20226)
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3145,06,071,010806,06071010806,108.06,Census Tract 108.06,G5020,S,12526996,0,+34.2587545,-117.3129594,POINT (-117.30146 34.24676)
3146,06,071,006603,06071006603,66.03,Census Tract 66.03,G5020,S,1829958,0,+34.0821080,-117.3367255,POINT (-117.33472 34.08379)
3147,06,037,102107,06037102107,1021.07,Census Tract 1021.07,G5020,S,12908558,13753,+34.2409052,-118.3395988,POINT (-118.36711 34.23625)
3148,06,037,101110,06037101110,1011.10,Census Tract 1011.10,G5020,S,1142401,0,+34.2594737,-118.2929869,POINT (-118.30154 34.25842)


In [37]:
fire_station_data_county_count = fire_station_data[fire_station_data['countyfp'] == '037']
fire_station_data_county_count

Unnamed: 0,USCB_STATEFP,countyfp,USCB_TRACTCE,USCB_GEOID,USCB_NAME,USCB_NAMELSAD,USCB_MTFCC,USCB_FUNCSTAT,USCB_ALAND,USCB_AWATER,USCB_INTPTLAT,USCB_INTPTLON,geometry
0,06,037,401902,06037401902,4019.02,Census Tract 4019.02,G5020,S,2656563,3536,+34.1011641,-117.7249135,POINT (-117.72358 34.09620)
1,06,037,400205,06037400205,4002.05,Census Tract 4002.05,G5020,S,23546199,341777,+34.1521556,-117.7176795,POINT (-117.70773 34.13628)
2,06,037,980013,06037980013,9800.13,Census Tract 9800.13,G5020,S,5308102,0,+33.9164970,-118.3871461,POINT (-118.38491 33.92406)
3,06,037,980013,06037980013,9800.13,Census Tract 9800.13,G5020,S,5308102,0,+33.9164970,-118.3871461,POINT (-118.37953 33.92064)
135,06,037,261104,06037261104,2611.04,Census Tract 2611.04,G5020,S,4446940,151199,+34.1126013,-118.4072679,POINT (-118.40628 34.12698)
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3139,06,037,930400,06037930400,9304,Census Tract 9304,G5020,S,1489696833,4805908,+34.3144808,-117.9304341,POINT (-118.36110 34.29468)
3140,06,037,103300,06037103300,1033,Census Tract 1033,G5020,S,6141955,0,+34.2573786,-118.3554785,POINT (-118.33687 34.26400)
3147,06,037,102107,06037102107,1021.07,Census Tract 1021.07,G5020,S,12908558,13753,+34.2409052,-118.3395988,POINT (-118.36711 34.23625)
3148,06,037,101110,06037101110,1011.10,Census Tract 1011.10,G5020,S,1142401,0,+34.2594737,-118.2929869,POINT (-118.30154 34.25842)


In [38]:
filtered_fire_station_data = fire_station_data.drop_duplicates(subset=['USCB_GEOID', 'USCB_INTPTLAT', 'USCB_INTPTLON'])
filtered_fire_station_data

Unnamed: 0,USCB_STATEFP,countyfp,USCB_TRACTCE,USCB_GEOID,USCB_NAME,USCB_NAMELSAD,USCB_MTFCC,USCB_FUNCSTAT,USCB_ALAND,USCB_AWATER,USCB_INTPTLAT,USCB_INTPTLON,geometry
0,06,037,401902,06037401902,4019.02,Census Tract 4019.02,G5020,S,2656563,3536,+34.1011641,-117.7249135,POINT (-117.72358 34.09620)
1,06,037,400205,06037400205,4002.05,Census Tract 4002.05,G5020,S,23546199,341777,+34.1521556,-117.7176795,POINT (-117.70773 34.13628)
2,06,037,980013,06037980013,9800.13,Census Tract 9800.13,G5020,S,5308102,0,+33.9164970,-118.3871461,POINT (-118.38491 33.92406)
4,06,115,040901,06115040901,409.01,Census Tract 409.01,G5020,S,263666616,4473094,+39.1852102,-121.3585368,POINT (-121.28916 39.20226)
6,06,013,385100,06013385100,3851,Census Tract 3851,G5020,S,2390590,0,+37.9242312,-122.2957499,POINT (-122.29975 37.92984)
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3145,06,071,010806,06071010806,108.06,Census Tract 108.06,G5020,S,12526996,0,+34.2587545,-117.3129594,POINT (-117.30146 34.24676)
3146,06,071,006603,06071006603,66.03,Census Tract 66.03,G5020,S,1829958,0,+34.0821080,-117.3367255,POINT (-117.33472 34.08379)
3147,06,037,102107,06037102107,1021.07,Census Tract 1021.07,G5020,S,12908558,13753,+34.2409052,-118.3395988,POINT (-118.36711 34.23625)
3148,06,037,101110,06037101110,1011.10,Census Tract 1011.10,G5020,S,1142401,0,+34.2594737,-118.2929869,POINT (-118.30154 34.25842)


In [39]:
county_count_fire_stations = filtered_fire_station_data['countyfp'].value_counts().reset_index()
county_count_fire_stations = county_count_fire_stations.rename(columns={'count':'num_fire_stations'})
county_count_fire_stations.head(50)

Unnamed: 0,countyfp,num_fire_stations
0,37,376
1,73,165
2,71,147
3,59,136
4,65,128
5,1,92
6,85,85
7,67,80
8,13,64
9,29,61


In [31]:
# read in CA census tiger file
ca_tract_county = "s3://ca-climate-index/0_map_data/ca_tracts_county.csv"
ca_tract_county = gpd.read_file(ca_tract_county)
ca_tract_county = ca_tract_county.drop(columns={'field_1', 'geometry'})
ca_tract_county.columns = ca_tract_county.columns.str.lower()
ca_tract_county = ca_tract_county.applymap(lambda s: s.lower() if type(s) == str else s)

ca_tract_county

Unnamed: 0,tract,countyfp,county
0,06085504321,085,santa clara
1,06085504410,085,santa clara
2,06085507003,085,santa clara
3,06085507004,085,santa clara
4,06085502204,085,santa clara
...,...,...,...
9124,06059001303,059,orange
9125,06059001304,059,orange
9126,06059001401,059,orange
9127,06013367200,013,contra costa


In [40]:
fire_stations_merged = pd.merge(ca_tract_county, county_count_fire_stations, how='left', on='countyfp')
fire_stations_merged

Unnamed: 0,tract,countyfp,county,num_fire_stations
0,06085504321,085,santa clara,85
1,06085504410,085,santa clara,85
2,06085507003,085,santa clara,85
3,06085507004,085,santa clara,85
4,06085502204,085,santa clara,85
...,...,...,...,...
9124,06059001303,059,orange,136
9125,06059001304,059,orange,136
9126,06059001401,059,orange,136
9127,06013367200,013,contra costa,64


In [7]:
# pull csv from aws
bucket_name = 'ca-climate-index'
aws_dir = '2a_subset/governance/emergency_response/usgs/fire_stations/'

pull_csv_from_directory(bucket_name, aws_dir, search_zipped=False)

Saved DataFrame as 'fire_station_subset.csv'


In [10]:
csv_fire_station_data = pd.read_csv('fire_station_subset.csv')
csv_fire_station_data.columns

Index(['X', 'Y', 'OBJECTID', 'PERMANENT_IDENTIFIER', 'SOURCE_FEATUREID',
       'SOURCE_DATASETID', 'SOURCE_DATADESC', 'SOURCE_ORIGINATOR',
       'DATA_SECURITY', 'DISTRIBUTION_POLICY', 'LOADDATE', 'FTYPE', 'FCODE',
       'NAME', 'ISLANDMARK', 'POINTLOCATIONTYPE', 'ADMINTYPE',
       'ADDRESSBUILDINGNAME', 'ADDRESS', 'CITY', 'STATE', 'ZIPCODE', 'GNIS_ID',
       'FOOT_ID', 'COMPLEX_ID', 'GLOBALID'],
      dtype='object')

In [12]:
csv_fire_station_data

Unnamed: 0,X,Y,OBJECTID,PERMANENT_IDENTIFIER,SOURCE_FEATUREID,SOURCE_DATASETID,SOURCE_DATADESC,SOURCE_ORIGINATOR,DATA_SECURITY,DISTRIBUTION_POLICY,...,ADMINTYPE,ADDRESSBUILDINGNAME,ADDRESS,CITY,STATE,ZIPCODE,GNIS_ID,FOOT_ID,COMPLEX_ID,GLOBALID
0,-117.723579,34.096203,2645,{F07E224D-D5D6-44DD-B2CA-5ADB414673DD},10522666,a9fde8f0-4665-4fb0-a41e-c142025bd57e,740 TNMC Update 10/18/2016 09:22:28,U.S. Geological Survey,5,E4,...,,,606 West Bonita Avenue,Claremont,CA,91711,2105685.0,,,{9209D351-33A2-4234-9E5D-DB6939BA1D24}
1,-117.707733,34.136280,2646,{8211277F-8D3D-4BAE-AF8C-3E6EC787C430},9999999999,a9fde8f0-4665-4fb0-a41e-c142025bd57e,740 TNMC Update 10/18/2016 09:22:28,U.S. Geological Survey,5,E4,...,,,3710 North Mills Avenue,Claremont,CA,91711-1440,2105672.0,,,{C0860E81-14B5-40B2-A4D1-AAFC53059A90}
2,-118.384911,33.924062,2649,{6E74FE27-F8B3-4CDB-8609-40356C1E1159},,{CC44F99C-6658-4584-B606-926E498D5C82},TNMC VWE DB Build - Vol Update,U.S. Geological Survey,5,E4,...,,,2261 Mariposa Avenue,El Segundo,CA,90245,,,,{A356E3A2-6D23-4F1E-920C-658F9AFC0E82}
3,-121.289161,39.202265,2651,{03F027BF-F279-4EEE-8BAE-73767C55D895},,{CC44F99C-6658-4584-B606-926E498D5C82},TNMC VWE DB Build - Vol Update,U.S. Geological Survey,5,E4,...,,,8839 Highway 20,Smartsville,CA,95977,,,,{11AE10FD-ECE8-49AA-A3EE-F26C01059D5D}
4,-122.299750,37.929839,2687,{E6AB51D2-311B-4005-9112-04B59E133890},9999999999,{CC44F99C-6658-4584-B606-926E498D5C82},TNMC VWE DB Build - Vol Update,U.S. Geological Survey,5,E4,...,,,1520 Arlington Boulevard,El Cerrito,CA,94530-2003,,,,{027167F7-8E9A-46F9-B6D0-3FE6151CB152}
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3145,-117.334718,34.083786,52136,{B6E78F54-C52C-4320-83FE-CAEF1BAEF0BD},10522085,e38b9ff6-cb74-4d3f-b3dd-cdc062dc814d,740 TNMC Update 1/31/2020,U.S. Geological Survey,5,E4,...,,,1151 North Rancho Avenue,Colton,CA,92324,2665182.0,,,{208C29EC-3E68-4087-935B-A09993A52984}
3146,-118.367110,34.236254,52151,{22ED7401-EF7B-4590-94D1-35F1187EE01B},10523938,d414bec8-c36c-4234-bbc1-b6a5d28d3fbe,740 TNMC Update 2/4/2020,U.S. Geological Survey,5,E4,...,,,9224 Sunland Boulevard,Sun Valley,CA,91352,2105570.0,,,{9B16CED4-9B73-4D02-9DF6-D356E368E2CD}
3147,-118.301541,34.258416,52157,{B11F77D5-26CF-4DAE-B09F-EC9134D7B959},10523935,d414bec8-c36c-4234-bbc1-b6a5d28d3fbe,740 TNMC Update 2/4/2020,U.S. Geological Survey,5,E4,...,,,7777 Foothill Boulevard,Tujunga,CA,91042,2105568.0,,,{0616F770-F34F-4AB9-97D7-D90B52D03E68}
3148,-122.549821,39.378371,52176,{1F4D318D-0048-46A3-AF35-D2482B7B9623},10507610,6b5b75bb-767a-47be-a636-c752e4c5f902,740 TNMC Update 1/23/2020,U.S. Geological Survey,5,E4,...,,,5122 E Park Rd,Stonyford,CA,95979,2622246.0,,,{43E6D81B-555B-4DDA-ADF9-2E74DDDB8AC2}


In [None]:
filtered_csv_fire_station_data = csv_fire_station_data[['USCB_COUNTYFP', 'USCB_GEOID', 'County','geometry']]

# Establish columns to check for duplicates
columns_to_check = ['CECPlantID']

# Find duplicate rows based on specified columns
duplicate_mask = filtered_power_plants.duplicated(subset=columns_to_check, keep='first')

# Filter rows based on condition on the numeric column
cleaned_power_plants = filtered_power_plants[~(duplicate_mask)]

cleaned_power_plants

variable

In [2]:
governance_usgs_fire_stations

NameError: name 'governance_usgs_fire_stations' is not defined