## This notebook reads the opentET dataset and generates a corresponding parquet file

In [11]:
from simpledbf import Dbf5
import geopandas as gpd
import numpy as np

# make a centroid-based geojson output for each field in the list
fieldOptions = {
        #'HUC8_code': 'HUC - 8 or HUC - 12 number',
        #'HUC12_name': 'HUC - 8 or HUC - 12 name',
        #'ACRES_[YEAR]': 'Total irrigated acreage for the given year',
        #'ET_v_[YEAR]': 'Actual ET volume for the given year in acre - feet',
        #'ETc_v_[YEAR]': 'Potential crop ET volume for the given year in acre - feet',
        #'ETo_v_[YEAR]': 'Bias - corrected gridMET grass reference ET volume for the given year in acre - feet',
        #'PPT_v_[YEAR]': 'Total gridMET precipitation volume for the given year in acre - feet',
        #'EFF_v_[YEAR]': 'Adjusted effective precipitation(Prz) volume for the given year in acre - feet',
        #'NIWR_v_[YEAR]': 'Net irrigation water requirement volume for the given year in acre - feet',
        #'CU_v_[YEAR]': 'Adjusted irrigation consumptive use(actual ET less adjusted Prz) volume for the given year in acre - feet',
        #'AW_v_[YEAR]': 'Applied water volume for the given year in acre - feet',
        #'ET_r_[YEAR]': 'Actual ET area - weighted average rate for the given year in feet',
        #'ETc_r_[YEAR]': 'Potential crop ET area - weighted average rate for the given year in feet',
        #'ETo_r_[YEAR]': 'Bias - corrected gridMET grass reference ET area - weighted average rate for the given year in feet',
        #'PPT_r_[YEAR]': 'Total gridMET precipitation area - weighted average rate for the given year in feet',
        #'EFF_r_[YEAR]': 'Adjusted effective precipitation(Prz) area - weighted average rate for the given year in feet',
        #'NIWR_r_[YEAR]': 'Net irrigation water requirement area - weighted average rate for the given year in feet',
        #'CU_r_[YEAR]': 'Adjusted irrigation consumptive use(actual ET less adjusted Prz) area - weighted average rate for the given year in feet',
        #'AW_r_[YEAR]': 'Applied water area - weighted average rate for the given year in acre - feet',
        'ET_v': 'Long - term average of the individual annual actual ET volumes in acre - feet',
        'ETc_v': 'Long - term average of the individual annual potential crop ET volumes in acre - feet',
        'ETo_v': 'Long - term average of the individual annual bias - corrected gridMET grass reference ET volumes in acre - feet',
        'PPT_v': 'Long - term average of the individual annual gridMET precipitation volumes in acre - feet',
        'EFF_v': 'Long - term average of the individual annual adjusted Prz volumes in acre - feet',
        'NIWR_v': 'Long - term average of the individual annual net irrigation water requirement volumes in acre - feet',
        'CUirr_v': 'Long - term average of the individual annual adjusted irrigation consumptive use volumes in acre - feet',
        'AW_v': 'Long - term average of the individual annual applied water volumes in acre - feet',
        'ET_r': 'Long - term average of the individual annual actual ET area - weighted average rates in feet',
        'ETc_r': 'Long - term average of the individual annual potential crop ET area - weighted average rates in feet',
        'ETo_r': 'Long - term average of the individual annual bias - corrected gridMET grass reference ET area - weighted average rates in feet',
        'PPT_r': 'Long - term average of the individual annual gridMET precipitation area - weighted average rates in feet',
        'EFF_r': 'Long - term average of the individual annual adjusted Prz area - weighted average rates in feet',
        'NIWR_r': 'Long - term average of the individual annual net irrigation water requirement area - weighted average rates in feet',
        'CUirr_r': 'Long - term average of the individual annual adjusted irrigation consumptive use area - weighted average rates in feet',
        'AW_r': 'Long - term average of the individual annual applied water area - weighted average rates in feet',
        'areaacres': 'HUC - 8 or HUC - 12 geometry area in acres',
        'areasqkm': 'HUC - 8 or HUC - 12 geometry area in square kilometers'
    }

src = 'd:/Data/Spatial/OregonCropWaterUse/or_openet_huc8_irrigated_all.shp'
out0 = 'd:/Data/Spatial/OregonCropWaterUse/or_openet_huc8_irrigated_all.parquet'
out1 = 'd:/Data/Spatial/OregonCropWaterUse/or_openet_huc8_irrigated_all.geojson'
out2 = 'd:/Data/Spatial/OregonCropWaterUse/or_openet_huc8_irrigated_geom.geojson'

print(f"Reading {src}")
gdf = gpd.read_file(src)

print(f"Writing {out0}")
#df = gdf.drop('geometry', axis=1)
#df.to_parquet(out0)


print(f"Writing {out1}")
fields = [f for f in fieldOptions]
print(fields)

# concatenate lists
fields = ['geometry'] + fields
_gdf = gdf[fields]
_gdf['Index'] = np.arange(0,gdf.shape[0], dtype=np.int32)
_gdf.to_file(out1, driver="GeoJSON")


print(f"Writing {out2}")
_gdf = gdf[['geometry']]
_gdf['Index'] = np.arange(0,gdf.shape[0], dtype=np.int32)
_gdf.to_file(out2, driver="GeoJSON")


for field in fieldOptions:


    _gdf = gdf[['geometry', field]]
    path = f'd:/Data/Spatial/OregonCropWaterUse/or_openet_huc8_irrigated_all_{field}.geojson'
    _gdf.to_file(path, driver="GeoJSON")


    print(f"Writing {field} json")
    values = gdf[field].values
    # write numpy array to disk as a JSON object
    with open(f'd:/Data/Spatial/OregonCropWaterUse/or_openet_huc8_irrigated_all_{field}.json', 'w') as f:
        f.write('[')
        for i in range(len(values)):
            if i > 0:
                f.write(',')
            f.write(f'{float(values[i]):.4f}')
        f.write(']')

print('all done')


Reading d:/Data/Spatial/OregonCropWaterUse/or_openet_huc8_irrigated_all.shp
Writing d:/Data/Spatial/OregonCropWaterUse/or_openet_huc8_irrigated_all.parquet
Writing d:/Data/Spatial/OregonCropWaterUse/or_openet_huc8_irrigated_all.geojson
['ET_v', 'ETc_v', 'ETo_v', 'PPT_v', 'EFF_v', 'NIWR_v', 'CUirr_v', 'AW_v', 'ET_r', 'ETc_r', 'ETo_r', 'PPT_r', 'EFF_r', 'NIWR_r', 'CUirr_r', 'AW_r', 'areaacres', 'areasqkm']


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


Writing d:/Data/Spatial/OregonCropWaterUse/or_openet_huc8_irrigated_geom.geojson


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


Writing ET_v json
Writing ETc_v json
Writing ETo_v json
Writing PPT_v json
Writing EFF_v json
Writing NIWR_v json
Writing CUirr_v json
Writing AW_v json
Writing ET_r json
Writing ETc_r json
Writing ETo_r json
Writing PPT_r json
Writing EFF_r json
Writing NIWR_r json
Writing CUirr_r json
Writing AW_r json
Writing areaacres json
Writing areasqkm json
all done
