In [1]:
# file paths
import os
from pathlib import Path

# visualization
import matplotlib.pyplot as plt
import geopandas as gpd
from shapely.geometry import box

# PDG packages
import pdgstaging

In [2]:
base_dir = Path('/home/pdg/data/nitze_lake_change/data_sample_2022-09-09')
subdirs = ['32607', '32608', '32609']
filename = 'lake_change.gpkg'
# to define each .gpkg file within each UTM subdir as a string representation with forward slashes, use as_posix() for each iteration
# of base_dir + filename. The ** represents that any subdir string can be present between the base_dir and the filename, meaning I do not
# think that we needed to create the object subdirs above
data_paths = [p.as_posix() for p in base_dir.glob('**/' + filename)]
data_paths

['/home/pdg/data/nitze_lake_change/data_sample_2022-09-09/32609/05_Lake_Dataset_Raster_02_final/lake_change.gpkg',
 '/home/pdg/data/nitze_lake_change/data_sample_2022-09-09/32608/05_Lake_Dataset_Raster_02_final/lake_change.gpkg',
 '/home/pdg/data/nitze_lake_change/data_sample_2022-09-09/32607/05_Lake_Dataset_Raster_02_final/lake_change.gpkg']

In [3]:
path = data_paths[0]
path

'/home/pdg/data/nitze_lake_change/data_sample_2022-09-09/32609/05_Lake_Dataset_Raster_02_final/lake_change.gpkg'

In [4]:
# following approach in stage() here: https://github.com/PermafrostDiscoveryGateway/viz-staging/blob/30400f5db65b7f590916b17daa9d58e4e0761784/pdgstaging/TileStager.py#L115

gdf = gpd.read_file(path)
gdf

Unnamed: 0,label_id,id,Area_start_ha,Area_end_ha,NetChange_ha,NetChange_perc,GrossIncrease_ha,GrossIncrease_perc,GrossDecrease_ha,GrossDecrease_perc,StableWater_ha,Perimeter_meter,Orientation_degree,Solidity_ratio,Eccentricity_ratio,ChangeRateNet_myr-1,ChangeRateGrowth_myr-1,geometry
0,2,2,16.952401,7.7319,-9.220501,-54.390533,1.8675,11.016139,11.088001,143.405911,5.8644,2374.446968,30.683739,0.538462,0.662637,-1.941610,0.393249,"POLYGON ((544920.000 8415090.000, 544950.000 8..."
1,7,7,1.383300,1.2132,-0.170100,-12.296683,0.0585,4.229017,0.228600,18.842731,1.1547,343.492424,34.838432,0.866667,0.776685,-0.247604,0.085155,"POLYGON ((612900.000 7991640.000, 612960.000 7..."
2,11,11,4.681800,4.4802,-0.201600,-4.306037,0.1368,2.921954,0.338400,7.553235,4.3434,946.690476,129.759564,0.843750,0.880804,-0.106476,0.072252,"POLYGON ((615990.000 7991490.000, 616050.000 7..."
3,28,28,1.615500,1.5318,-0.083700,-5.181059,0.0423,2.618385,0.126000,8.225617,1.4895,422.132034,159.305943,0.857143,0.758043,-0.099140,0.050103,"POLYGON ((613830.000 7990410.000, 613890.000 7..."
4,29,29,2.032200,1.9710,-0.061200,-3.011515,0.0594,2.922941,0.120600,6.118722,1.9116,469.705627,25.624186,0.956522,0.327027,-0.065147,0.063231,"POLYGON ((613950.000 7990290.000, 614010.000 7..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90870,198086,198086,0.383400,1.4004,1.017000,265.258241,1.0215,266.431950,0.004500,0.321337,0.3789,337.279221,12.773606,0.800000,0.788271,1.507653,1.514324,"POLYGON ((409860.000 6651600.000, 409890.000 6..."
90871,198088,198088,1.549800,1.5084,-0.041400,-2.671314,0.1026,6.620209,0.144000,9.546541,1.4058,367.279221,171.869898,0.937500,0.716115,-0.056360,0.139676,"POLYGON ((505890.000 6651600.000, 505920.000 6..."
90872,198089,198089,1.381500,1.2042,-0.177300,-12.833877,0.0954,6.905537,0.272700,22.645741,1.1088,379.705627,163.963000,0.882353,0.838945,-0.233470,0.125624,"POLYGON ((584100.000 6651600.000, 584220.000 6..."
90873,198098,198098,1.863900,1.7406,-0.123300,-6.615162,0.1620,8.691453,0.285300,16.390900,1.5786,415.918831,162.347805,0.833333,0.910687,-0.148226,0.194750,"POLYGON ((584370.000 6651570.000, 584400.000 6..."


In [7]:
# subset gdf to single row (1 polygon) in case want to test this later
gdf_single_polygon = gdf.loc[[0]]
gdf_single_polygon

Unnamed: 0,label_id,id,Area_start_ha,Area_end_ha,NetChange_ha,NetChange_perc,GrossIncrease_ha,GrossIncrease_perc,GrossDecrease_ha,GrossDecrease_perc,StableWater_ha,Perimeter_meter,Orientation_degree,Solidity_ratio,Eccentricity_ratio,ChangeRateNet_myr-1,ChangeRateGrowth_myr-1,geometry
0,2,2,16.952401,7.7319,-9.220501,-54.390533,1.8675,11.016139,11.088001,143.405911,5.8644,2374.446968,30.683739,0.538462,0.662637,-1.94161,0.393249,"POLYGON ((544920.000 8415090.000, 544950.000 8..."


In [8]:
# filter out any geometries that are not polygons
gdf = gdf[gdf.geometry.type == 'Polygon']
len(gdf) # looks like they all were polygons originally

90875

In [9]:
# manually set tolerance specified in config
tolerance = 0.0001

In [10]:
# simplify the gdf geometry to the tolerance we want
# documentation for geopandas simpify: https://geopandas.org/en/stable/docs/reference/api/geopandas.GeoSeries.simplify.html
gdf['geometry'] = gdf['geometry'].simplify(tolerance)
# code pulled from custom function simplify_geoms() defined here: https://github.com/PermafrostDiscoveryGateway/viz-staging/blob/30400f5db65b7f590916b17daa9d58e4e0761784/pdgstaging/TileStager.py#L216

In [11]:
# check what the crs already is set to
gdf.crs

<Derived Projected CRS: EPSG:32609>
Name: WGS 84 / UTM zone 9N
Axis Info [cartesian]:
- E[east]: Easting (metre)
- N[north]: Northing (metre)
Area of Use:
- name: Between 132°W and 126°W, northern hemisphere between equator and 84°N, onshore and offshore. Canada - British Columbia (BC); NorthW Territories (NWT); Yukon. United States (USA) - Alaska (AK).
- bounds: (-132.0, 0.0, -126.0, 84.0)
Coordinate Operation:
- name: UTM zone 9N
- method: Transverse Mercator
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [12]:
# execute same steps in custom function set_crs() that is created here: https://github.com/PermafrostDiscoveryGateway/viz-staging/blob/30400f5db65b7f590916b17daa9d58e4e0761784/pdgstaging/TileStager.py#L180

# set the gdf crs to the input crs
input_crs = 'EPSG:32609' # in the custom function at the link above, the input crs is pulled from the config using get('input_crs') but idk how it is getting that, bc there is no key in the few configs I have for 'input_crs' so maybe this is where the error is, unless the function get() is able to retrieve the crs from the TMS which can be found with the key `tms_id`
output_crs = 'EPSG:4326' # just a random guess that this might be what the desired crs is, might not matter exactly as long as it differs from the input_crs since I am testing if it is changing it successfully in general

# the input gdf crs MUST be set in order to transform it with to_crs() later, so do that first!
gdf.set_crs(input_crs, inplace = True, allow_override = True)
# actuallys transform the data:
gdf.to_crs(crs = output_crs, epsg = output_crs, inplace = True)
gdf.crs


<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [None]:
# maybe need to use `set_crs()` again after to_crs() even tho the above metadata does show it is changed to the new crs...
# gdf = gdf.set_crs(output_crs)
# gdf.crs

## Questions:

1. Could the error be because we do not `set_crs()` AGAIN after running `set_crs()` initally, then `to_crs()`? printing the gdf's crs after `to_crs()` did indeed reflect the new crs, but maybe that is not truly the metadata that needs to be changed by `set_crs()`

2. Could the error be in the to_crs() part, where Robyn's code is:\
`# Re-project the geoms`\
`if output_crs:`\
    `gdf.to_crs(output_crs, inplace=True)`

When maybe it should more explicity define all 3 arguments like this:\
`if output_crs:`\
    `gdf.to_crs(crs = output_crs, epsg = output_crs, inplace = True)`

3. How is the custom class reading in the `input_crs` from the config? My config does not include that as a key, but maybe somehow it is deriving it from the TMS? But if that was the case, wouldn't it want to pull from the config using the key `"tms_id"` ?

Note to self: figure out what the line `self.grid = self.make_tms_grid(gdf)` does, but it might not relevant to the gdf object (?) for the purposes of this troubleshooting

In [None]:
gdf = gdf.add_properties(path)