# Reading OccurrenceCubeWERBirds data

The following script helps to produce a spatial GRID datasets from a GBIF data collection -which has the EEA-GRID ID as attribute.
https://www.gbif.org/

 The script used the donwloaded occurence CSV file, where the geospatial reference is provided by a text code for the EEA reference grid cell identifiers, e.g. 1kmE5432N4321 or 250mE1025N22000 to prode a raster (tif) and vector grid.






THE FOLLOWING CSV FILE SHOWS THE OCCURENCE OF BIRDS

The source data is a 
[CSV file](https://github.com/FAIRiCUBE/data-requests/blob/main/encoding-examples/datacube_nl_farmland_birds_1.csv)  with the following columns:
Year, EEA Grid Cell, TaxonID, Count, Uncertainty

In [None]:
## read the csv file:
import pandas as pd
import re
import geopandas as gpd
import re
from shapely.geometry import Point
from shapely.geometry import Polygon
import rasterio
from rasterio.features import rasterize
from rasterio import Affine
# URL of the CSV file
#url = "https://github.com/FAIRiCUBE/data-requests/blob/main/encoding-examples/datacube_nl_farmland_birds_1.csv"
csv_url = "https://raw.githubusercontent.com/FAIRiCUBE/data-requests/main/encoding-examples/datacube_nl_farmland_birds_1.csv"
df=pd.read_csv(csv_url)
# add header:
df.columns = ['year', 'gridnum_10m', 'species_id','Count','Uncertainty']
# Display the first few rows of the DataFrame
df


The next code extract the coordinates form the EEA GRID ID:

In [None]:

# Extraxt the EAST coordinate
def extract_east_number(text):
    numbers = re.findall(r'\d+', text)
    if len(numbers) >= 2:
        return int(numbers[1])  # Extract the second number
    else:
        return None

# Extraxt the NORTH coordinate
def extract_north_number(text):
    numbers = re.findall(r'\d+', text)
    if len(numbers) >= 2:
        return int(numbers[2])  # Extract the second number
    else:
        return None

# Apply the function to the text_column and create a new column
df['east_number'] = df['gridnum_10m'].apply(lambda x: extract_east_number(x))
df['north_number'] = df['gridnum_10m'].apply(lambda x: extract_north_number(x))

## get final coordinate: - for 10m reslolution:
df['east_coordinate_epsg3035_meter'] = df['east_number'] * 10
df['north_coordinate_epsg3035_meter'] = df['north_number'] * 10



In [None]:
def create_square_polygon(row):
    side_length = 10
    # Define the coordinates of the square's vertices
    p1 = (row['east_coordinate_epsg3035_meter'], row['north_coordinate_epsg3035_meter'])
    p2 = (row['east_coordinate_epsg3035_meter'], row['north_coordinate_epsg3035_meter']+side_length)
    p3 = (row['east_coordinate_epsg3035_meter']+side_length, row['north_coordinate_epsg3035_meter']+side_length)
    p4 = (row['east_coordinate_epsg3035_meter']+side_length, row['north_coordinate_epsg3035_meter'])

    # # Create the square polygon
    square = Polygon([p1,p2,p3,p4,p1])
    return square

# Apply the create_square_polygon function to each row of the GeoDataFrame
df['square_polygon'] = df.apply(create_square_polygon, axis=1)
    
# Convert DataFrame to GeoDataFrame
gdf_vector_grid = gpd.GeoDataFrame(df, geometry='square_polygon')

# Specify the projection
gdf_vector_grid.crs = "EPSG:3035"

The following notebook produced raster files (tif format) in EPSG:3035 projection from the species table:

--> please add you outputfile path & grid size:

In [None]:

# INPUT
unique_species_ids = df['species_id'].unique().tolist()
# print(unique_species_ids)
output_raster_tif_folder = r"data"
##################################################################

# START
resolution = 10
for selected_species in unique_species_ids:
    print(selected_species)

    # Export the GeoDataFrame to a shapefile
    # Export gdf.to_file(output_shapefile)
    # Define the file path for the shapefile
    output_shapefile = output_raster_tif_folder + "\species_" + \
        str(selected_species) + "_vector_"+str(resolution)+"m.shp"
    print(output_shapefile)
    # Export the GeoDataFrame to a shapefile
    gdf_vector_grid[gdf_vector_grid.species_id == selected_species].to_file(
        output_shapefile, index=False, geometry='square_polygon')

    ## vector to raster:##############################################################
    # Define the bounds and resolution of the raster
    xmin, ymin, xmax, ymax = gdf_vector_grid.total_bounds  # Get the bounds of the GeoDataFrame
    # Define the resolution of the raster (adjust as needed)

    # Calculate the number of rows and columns in the raster
    rows = int((ymax - ymin) / resolution)
    cols = int((xmax - xmin) / resolution)

    # Define the transform for the raster
    # resolution is the pixel size (assumed to be 10 meters).
    # xmin is the x-coordinate of the upper-left corner.
    # ymax is the y-coordinate of the upper-left corner.

    transform = Affine(resolution, 0, xmin, 0, - resolution, ymax)

    output_raster_tif = output_raster_tif_folder + "\species_" + \
        str(selected_species) + "_raster_"+str(resolution)+"m.tif"
    print(output_raster_tif)
    gdf_filtered = gdf_vector_grid[gdf_vector_grid.species_id == selected_species]
    # Create an empty raster
    with rasterio.open(output_raster_tif, 'w', driver='GTiff',
                       height=rows, width=cols, count=1, dtype='float64',
                       crs=gdf_vector_grid.crs, transform=transform) as dst:
        # Rasterize the GeoDataFrame onto the raster using attribute values
        rasterized = rasterize(
            [(geom, value)
             for geom, value in zip(gdf_filtered['square_polygon'], gdf_filtered['species_id'])],
            out_shape=(rows, cols),
            fill=0,
            transform=dst.transform,
            all_touched=True,
            dtype='float64'
        )

        # Write the rasterized data to the raster file
        dst.write(rasterized, indexes=1)

    print("Raster file created successfully.")

print("done")

Show the results:

In [None]:
# Load your raster file (replace 'your_raster.tif' with your actual file path)
raster_path = output_raster_tif
with rasterio.open(raster_path) as src:
    raster_array = src.read(1)  # Read the first band (adjust if needed)


import matplotlib.pyplot as plt

plt.imshow(raster_array, cmap='coolwarm')  # You can choose a colormap (e.g., 'gray')
plt.colorbar()  # Add a colorbar if desired
plt.title("show the last written raster")
plt.show()