In [1]:
from sdm.utils import set_project_wd
set_project_wd()

Current Working Directory: /Users/work/Data Science/sheffield-bats


In [2]:
from pathlib import Path
import geopandas as gpd
import json
import pandas as pd
from pipelines.GenerateAppData.main import points_to_grid_squares
import rioxarray as rxr

In [3]:
predictions_dir = Path("data/sdm_predictions")
predictions_files = list(predictions_dir.glob("*.tif"))

predictions = rxr.open_rasterio(predictions_files[0])
raster_bounds = predictions.rio.bounds()
predictions


In [4]:
input_file = Path("data/processed/sybg-bats.parquet")
bats = gpd.read_parquet(input_file)  # type: gpd.GeoDataFrame

bats.head()

Unnamed: 0,grid_reference,species_raw,activity_type,source_data,date,latin_name,common_name,genus,x,y,accuracy,geometry,grid_square_geom
0,NZ115084,Common pipistrelle,Roost,"{""Recorder"":""BCT\/NE"",""Date"":1274054400000,""Gr...",2010-05-17 00:00:00,Pipistrellus pipistrellus,Common Pipistrelle,Pipistrellus,411550.0,508450.0,100.0,POINT (411550.000 508450.000),"POLYGON ((411600.000 508400.000, 411600.000 50..."
1,NZ14640021,Pipistrellus sp.,Unknown,"{""Recorder"":""Giles Manners"",""Date"":14043456000...",2014-07-03 00:00:00,Pipistrellus sp.,Unidentified Pipistrelle,Pipistrellus,414645.0,500215.0,10.0,POINT (414645.000 500215.000),"POLYGON ((414650.000 500210.000, 414650.000 50..."
2,NZ20291106,Soprano pipistrelle,Unknown,"{""Recorder"":""Natural England Volunteer Bat Roo...",2013-11-28 00:00:00,Pipistrellus pygmaeus,Soprano Pipistrelle,Pipistrellus,420295.0,511065.0,10.0,POINT (420295.000 511065.000),"POLYGON ((420300.000 511060.000, 420300.000 51..."
3,NZ170014,Unidentified bat species,Roost,"{""Recorder"":""BCT\/NE"",""Date"":1283212800000,""Gr...",2010-08-31 00:00:00,Unknown,Unidentified Bat,Unknown,417050.0,501450.0,100.0,POINT (417050.000 501450.000),"POLYGON ((417100.000 501400.000, 417100.000 50..."
4,NZ185116,Unidentified bat species,Roost,"{""Recorder"":""BCT\/NE"",""Date"":1242777600000,""Gr...",2009-05-20 00:00:00,Unknown,Unidentified Bat,Unknown,418550.0,511650.0,100.0,POINT (418550.000 511650.000),"POLYGON ((418600.000 511600.000, 418600.000 51..."


In [5]:
bats.query("latin_name == 'Myotis nattereri' and activity_type == 'Roost'").explore()

In [43]:

assert (
    bats.crs == 27700
), "The bats dataframe must be in British National Grid to allow records to be filtered by raster bounds"
bats = bats[bats.accuracy <= 100]

source_data = bats.source_data.apply(json.loads).apply(pd.Series)

bats_gdf_full = bats.join(source_data)
bats_gdf_full = bats_gdf_full[
    [
        "grid_reference",
        "species_raw",
        "activity_type",
        "source_data",
        "date",
        "latin_name",
        "common_name",
        "genus",
        "x",
        "y",
        "accuracy",
        "Recorder",
        "Notes",
        "Evidence",
        "Source",
        "row_id",
        "geometry",
    ]
]


# Drop the records that are outside the raster bounds
bats_gdf_full = bats_gdf_full.cx[
    raster_bounds[0] : raster_bounds[2], raster_bounds[1] : raster_bounds[3]
]
## This was the issue vv - index mismatch
#bats_gdf_full.reset_index(drop=True, inplace=True)


bats_gdf_full["geometry"] = points_to_grid_squares(
    bats_gdf_full.geometry,
    grid_size=250,
    origin=(0,0),
)

In [40]:
bats_gdf_full.query("latin_name == 'Myotis nattereri' and activity_type == 'Roost'").explore()

In [None]:


# then group by the geometry, latin_name and activity_type and count the number of records
bats_gdf_full = (
    bats_gdf_full.groupby(["geometry", "latin_name", "activity_type"])
    .size()
    .reset_index(name="count")
)
# convert back to geodataframe as this is lost during group by
bats_gdf_full = gpd.GeoDataFrame(bats_gdf_full, geometry="geometry", crs=27700)