# Data Creation for BLX DB
#### October 2021

Combine crop data, parcel data, GSA data, and Bulletin 118 data to apply: APNs, basins, GSAs to crop polygons

In [1]:
import fiona
import os
import geopandas as gpd
import pandas as pd

In [2]:
!pwd

/home/watermaster/Projects/BLX/GIS/Dream_DB


In [3]:
fiona.listlayers('./raw_data/i15_crop_mapping_2018_gdb/i15_crop_mapping_2018.gdb')

['i15_Crop_Mapping_2018']

In [4]:
fiona.listlayers('./raw_data/Parcels_CA_2014.gdb/')

['CA_PARCELS_STATEWIDE_INFO', 'CA_PARCELS_STATEWIDE']

In [5]:
fiona.listlayers('./raw_data/ca-county-boundaries/CA_Counties/CA_Counties_TIGER2016.dbf')

['CA_Counties_TIGER2016']

In [6]:
fiona.listlayers('./raw_data/B118_2018_GISdata/Geodatabase/B118_v6-1.gdb')

['i08_B118_v6_1']

In [7]:
fiona.listlayers('./raw_data/submittedgsa/GSA_Master.dbf')

['GSA_Master']

## Load in county data to use a mask to chunk other data by individual county. 

In [8]:
gdf_counties = gpd.read_file('./raw_data/ca-county-boundaries/CA_Counties/CA_Counties_TIGER2016.dbf',
                    driver='FileGDB',
                    layer='CA_Counties_TIGER2016')
gdf_counties = gdf_counties.to_crs(3310)

In [9]:
gdf_kern_county = gdf_counties.loc[gdf_counties['NAME']=='Kern']

In [10]:
# gdf_kern_county.crs
# gdf_kern_county.plot()

## Load in crop data

In [11]:
gdf_crop = gpd.read_file('./raw_data/i15_crop_mapping_2018_gdb/i15_crop_mapping_2018.gdb',
                    driver='FileGDB',
                    layer='i15_Crop_Mapping_2018',
                    mask = gdf_kern_county)
gdf_crop = gdf_crop.to_crs(3310)

In [12]:
# gdf_crop.crs
# gdf_crop.plot()
# gdf_crop.columns

## Load in parcel data

In [13]:
gdf_apn = gpd.read_file('./raw_data/Parcels_CA_2014.gdb/',
                        driver='FileGDB',
                        layer='CA_PARCELS_STATEWIDE',
                        mask = gdf_kern_county)
gdf_apn = gdf_apn.to_crs(3310)

In [14]:
# gdf_apn.crs
# gdf_apn.plot()
# gdf_crop.sindex.valid_query_predicates
# gdf_apn.sindex.valid_query_predicates
# gdf_apn.columns

### __gdf_combo__
(spatial intersectional join of __gdf_crop__ with __gdf_apn__)  

In [19]:
gdf_combo = gpd.sjoin(gdf_crop, gdf_apn, how = 'inner', op = 'intersects')

In [20]:
# gdf_combo.columns
# gdf_combo.loc[gdf_combo['UniqueID'] == '1509614'].T#.plot() # test crop row

### __gdf_over_max__ 
(selection of the maximum spatial overlap of __gdf_crop__ with __gdf_apn__,   
practically translates to each __gdf_crop__ row (`uniqueID` is good identifier) being associated with the __gdf_apn__ row (`PARNO` is good identifier) that has the maximum spatial overlap)

In [21]:
gdf_over = gpd.overlay(gdf_crop, gdf_apn, how = 'intersection')
gdf_over['area_overlap'] = gdf_over.geometry.area
gdf_over_max = gdf_over.loc[gdf_over.groupby('UniqueID')['area_overlap'].agg(pd.Series.idxmax)][['UniqueID','PARNO','area_overlap']]

### Merging
__gdf_combo__ with __gdf_over_max__ 

In [22]:
gdf_combo_max_area = gdf_combo.merge(gdf_over_max, left_on = ['UniqueID','PARNO'], right_on = ['UniqueID','PARNO'])

In [23]:
# gdf_combo_max_area.loc[gdf_combo_max_area['UniqueID'] == '1509614'] # test crop row

In [24]:
gdf_combo_max_area.CROPTYP2.unique()

array(['X', 'T9', 'D14', 'P1', 'V', 'D1', 'F11', 'T6', 'T10', 'T30',
       'T18', 'F16', 'YP', 'P6', 'G6', 'D10', 'C7', 'U', 'G2', 'C', 'D12',
       'T19', 'D5', 'D15', 'F1', 'T31', 'T15', 'T21', 'P3', 'F10', 'T4',
       'D3', 'T27', 'F2', 'T16', 'D13', 'D16', 'C6', 'C5', 'C4', 'T20'],
      dtype=object)

## Add metadata

In [26]:
meta_data_dict = pd.read_excel('./crop_metadata.xlsx', sheet_name='formatted',header=None, names =['key', 'value']).set_index('key').T.to_dict('records')[0]

  """Entry point for launching an IPython kernel.


In [27]:
meta_data_dict['G']

'Grain and hay crops'

In [28]:
gdf_combo_max_area['crop2018'] = gdf_combo_max_area['CROPTYP2'].map(meta_data_dict)

In [117]:
# gdf_combo_max_area.head().T

## Load in GSA data

In [15]:
gdf_gsa = gpd.read_file('./raw_data/submittedgsa/',
                        driver='FileGDB',
                        layer='GSA_Master',
                        mask = gdf_kern_county)
gdf_gsa = gdf_gsa.to_crs(3310)

In [16]:
# gdf_apn.crs
# gdf_apn.plot()
# gdf_crop.sindex.valid_query_predicates
# gdf_apn.sindex.valid_query_predicates
gdf_gsa.columns

Index(['GSA_ID', 'DWR_GSA_ID', 'GSA_Name', 'Basin', 'Local_ID', 'Posted_DT',
       'GSA_URL', 'POC_Name', 'POC_Phone', 'POC_Email', '90_Days', 'geometry'],
      dtype='object')

## Load in Bulletin 118 basin data

In [17]:
gdf_118 = gpd.read_file('./raw_data/B118_2018_GISdata/Geodatabase/B118_v6-1.gdb',
                        driver='FileGDB',
                        layer='i08_B118_v6_1',
                        mask = gdf_kern_county)
gdf_118 = gdf_118.to_crs(3310)

In [39]:
# gdf_apn.crs
# gdf_apn.plot()
# gdf_crop.sindex.valid_query_predicates
# gdf_apn.sindex.valid_query_predicates
gdf_118.columns

Index(['Basin_Number', 'Basin_Subbasin_Number', 'Basin_Name',
       'Basin_Subbasin_Name', 'Region_Office', 'Date_Record_Last_Edited',
       'Record_Edited_By', 'Comments', 'Date_Data_Applies_To', 'GlobalID',
       'SHAPE_Length', 'SHAPE_Area', 'geometry'],
      dtype='object')

In [63]:
basins_df = gdf_118[['Basin_Subbasin_Number', 'Basin_Name','Basin_Subbasin_Name', 'Region_Office']]

In [81]:
gdf_gsa['Basin_Subbasin_Number'] = gdf_gsa['Basin'].str.replace(' \(Exclusive\)','')

  """Entry point for launching an IPython kernel.


In [94]:
gdf_SGMA_basins = gdf_gsa.merge(basins_df, on = 'Basin_Subbasin_Number')

In [118]:
# gdf_SGMA_basins.T#['Basin'].str.replace('\(Exclusive\)','')

In [104]:
gdf_combo_SGMA = gpd.sjoin(gdf_combo_max_area.drop('index_right', axis=1), gdf_SGMA_basins, how = 'inner', op = 'intersects')

In [105]:
gdf_combo_SGMA.columns#head().T

Index(['UniqueID', 'DWR_REVISE', 'SYMB_CLASS', 'MULTIUSE', 'CLASS1',
       'SUBCLASS1', 'SPECOND1', 'IRR_TYP1PA', 'IRR_TYP1PB', 'PCNT1', 'CLASS2',
       'SUBCLASS2', 'SPECOND2', 'IRR_TYP2PA', 'IRR_TYP2PB', 'PCNT2', 'CLASS3',
       'SUBCLASS3', 'SPECOND3', 'IRR_TYP3PA', 'IRR_TYP3PB', 'PCNT3', 'CLASS4',
       'SUBCLASS4', 'SPECOND4', 'IRR_TYP4PA', 'IRR_TYP4PB', 'PCNT4', 'UCF_ATT',
       'CROPTYP1', 'ADOY1', 'CROPTYP2', 'ADOY2', 'CROPTYP3', 'ADOY3',
       'CROPTYP4', 'ADOY4', 'REGION', 'ACRES', 'COUNTY', 'LIQ_REPORT',
       'Shape_Length_left', 'Shape_Area_left', 'geometry', 'PARNO', 'County',
       'ADDRESS', 'CITY', 'ZIP', 'Shape_Length_right', 'Shape_Area_right',
       'area_overlap', 'crop2018', 'index_right', 'GSA_ID', 'DWR_GSA_ID',
       'GSA_Name', 'Basin', 'Local_ID', 'Posted_DT', 'GSA_URL', 'POC_Name',
       'POC_Phone', 'POC_Email', '90_Days', 'Basin_Subbasin_Number',
       'Basin_Name', 'Basin_Subbasin_Name', 'Region_Office'],
      dtype='object')

In [116]:
gdf_combo_SGMA[['UniqueID','geometry', 'PARNO', 'County','GSA_ID', 'DWR_GSA_ID', 'GSA_Name', 'Basin_Subbasin_Number', 'crop2018','REGION','ACRES']].T

Unnamed: 0,2,3,45,46,73,74,75,76,77,78,...,8656,8658,8659,8660,9189,10523,20679,20680,20681,20717
UniqueID,1512576,1515669,1518246,1500069,1500381,1501087,1509510,1512938,1514175,1502893,...,1503174,1502860,1502859,1502858,1518674,1520264,6000904,6000906,6000907,5419704
geometry,(POLYGON Z ((196394.2325934503 -254149.6547569...,(POLYGON Z ((196356.2806880391 -254184.5322515...,(POLYGON Z ((196143.7768861336 -254686.9234781...,(POLYGON Z ((196143.7768861336 -254686.9234781...,(POLYGON Z ((200030.5898800215 -266994.2565698...,(POLYGON Z ((200237.6536688873 -267123.1117529...,(POLYGON Z ((199849.9552417842 -267217.4845524...,(POLYGON Z ((200245.9009661526 -267045.1894938...,(POLYGON Z ((200049.5893713578 -267210.3829931...,(POLYGON Z ((200427.6294562493 -266962.4169548...,...,(POLYGON Z ((104978.2933755068 -352412.2874137...,(POLYGON Z ((105292.029056878 -352178.77142398...,(POLYGON Z ((105249.58296029 -352197.004352957...,(POLYGON Z ((105239.7813719653 -352194.4602828...,(POLYGON Z ((104084.9982403273 -353016.3007215...,(POLYGON Z ((104344.0350112696 -353310.7596693...,(POLYGON Z ((102349.8458877693 -353802.3695916...,(POLYGON Z ((103306.4487640464 -351737.8218081...,(POLYGON Z ((102379.1370728991 -350225.5724244...,(POLYGON Z ((48328.00548791623 -246826.3523456...
PARNO,35236004,35236002,35236015,35236015,34110134,34110134,34110134,34110134,34110134,34110134,...,25518218,25518218,25518218,25518218,25529024,25518217,25562048,25518216,25539001,330030006
County,Kern,Kern,Kern,Kern,Kern,Kern,Kern,Kern,Kern,Kern,...,Kern,Kern,Kern,Kern,Kern,Kern,Kern,Kern,Kern,Tulare
GSA_ID,35,35,35,35,35,35,35,35,35,35,...,400,400,400,400,400,400,400,400,400,490
DWR_GSA_ID,145,145,145,145,145,145,145,145,145,145,...,361,361,361,361,361,361,361,361,361,379
GSA_Name,Indian Wells Valley Groundwater Authority GSA,Indian Wells Valley Groundwater Authority GSA,Indian Wells Valley Groundwater Authority GSA,Indian Wells Valley Groundwater Authority GSA,Indian Wells Valley Groundwater Authority GSA,Indian Wells Valley Groundwater Authority GSA,Indian Wells Valley Groundwater Authority GSA,Indian Wells Valley Groundwater Authority GSA,Indian Wells Valley Groundwater Authority GSA,Indian Wells Valley Groundwater Authority GSA,...,Castac Basin GSA,Castac Basin GSA,Castac Basin GSA,Castac Basin GSA,Castac Basin GSA,Castac Basin GSA,Castac Basin GSA,Castac Basin GSA,Castac Basin GSA,Tri-County Water Authority GSA - Tule
Basin_Subbasin_Number,6-054,6-054,6-054,6-054,6-054,6-054,6-054,6-054,6-054,6-054,...,5-029,5-029,5-029,5-029,5-029,5-029,5-029,5-029,5-029,5-022.13
crop2018,Deciduous fruits and nuts; Pistachios,Deciduous fruits and nuts; Pistachios,Deciduous fruits and nuts; Pistachios,Deciduous fruits and nuts; Pistachios,Deciduous fruits and nuts; Pistachios,Deciduous fruits and nuts; Pistachios,Young Perennial; None,Deciduous fruits and nuts; Pistachios,Deciduous fruits and nuts; Pistachios,Deciduous fruits and nuts; Pistachios,...,Vineyards,Vineyards,Vineyards,Vineyards,Not cropped or unclassified; None,Not cropped or unclassified; None,"Urban-residential, commercial, and industrial,...","Urban-residential, commercial, and industrial,...","Urban-residential, commercial, and industrial,...",Grain and hay crops; Wheat
REGION,SRO,SRO,SRO,SRO,SRO,SRO,SRO,SRO,SRO,SRO,...,SCRO,SCRO,SCRO,SCRO,SCRO,SCRO,SCRO,SCRO,SCRO,SCRO


In [114]:
gdf_combo_SGMA['PARNO'].unique()

array(['35236004', '35236002', '35236015', ..., '25518216', '25539001',
       '330030006'], dtype=object)