# Reading OccurrenceCubeWERBirds data

In [20]:

import pandas as pd
import re

# geometry:
import geopandas as gpd
from shapely.geometry import Point
from shapely.geometry import Polygon
from shapely.wkt import loads
import geodatasets

import matplotlib.pyplot as plt



##file_path = 'https://github.com/FAIRiCUBE/data-requests/blob/main/encoding-examples/datacube_nl_farmland_birds_1.csv'
file_path =r"N:\C2205_FAIRiCUBE\f02_data\d100_species_distribution\data\datacube_nl_farmland_birds_1.csv"
file_path =r"N:\C2205_FAIRiCUBE\f02_data\d100_species_distribution\data\datacube_nl_farmland_birds_1_sub.csv"
df = pd.read_csv(file_path)
df.columns = ['year', 'gridnum_10m', 'species_id','B','C']

# Extraxt the EAST coordinate
def extract_east_number(text):
    numbers = re.findall(r'\d+', text)
    if len(numbers) >= 2:
        return int(numbers[1])  # Extract the second number
    else:
        return None

# Extraxt the NORTH coordinate
def extract_north_number(text):
    numbers = re.findall(r'\d+', text)
    if len(numbers) >= 2:
        return int(numbers[2])  # Extract the second number
    else:
        return None

# Apply the function to the text_column and create a new column
df['east_number'] = df['gridnum_10m'].apply(lambda x: extract_east_number(x))
df['north_number'] = df['gridnum_10m'].apply(lambda x: extract_north_number(x))

## get final coordinate: - for 10m reslolution:
df['east_coordinate_epsg3035_meter'] = df['east_number'] * 10
df['north_coordinate_epsg3035_meter'] = df['north_number'] * 10


print (df)



   year        gridnum_10m  species_id  B       C  east_number  north_number  \
0  2018  10mE401855N329204     2480242  1  5000.0       401855        329204   
1  2018  10mE401856N328589     2490266  1  5000.0       401856        328589   

   east_coordinate_epsg3035_meter  north_coordinate_epsg3035_meter  
0                         4018550                          3292040  
1                         4018560                          3285890  


The following notebook produced raster files (tif format) in EPSG:3035 projection from the species table:

In [26]:

import pandas as pd
import re

# geometry:
import geopandas as gpd
from shapely.geometry import Point
from shapely.geometry import Polygon
from shapely.wkt import loads
import geodatasets
import matplotlib.pyplot as plt
import rasterio
from rasterio import features
from rasterio.features import rasterize
from rasterio.transform import from_bounds
from rasterio import Affine

################################################################## INPUT

#selected_species = 2494686

grid_size = 100 ## add. the cell size in meter
side_length = grid_size

output_raster_tif_folder = r"N:\C2205_FAIRiCUBE\f02_data\d100_species_distribution\data"


##file_path = 'https://github.com/FAIRiCUBE/data-requests/blob/main/encoding-examples/datacube_nl_farmland_birds_1.csv'
file_path =r"N:\C2205_FAIRiCUBE\f02_data\d100_species_distribution\data\datacube_nl_farmland_birds_1.csv"

df = pd.read_csv(file_path)
df.columns = ['year', 'gridnum_10m', 'species_id','B','C']
unique_species_ids = df['species_id'].unique().tolist()
print(unique_species_ids)


##################################################################

## START

for selected_species in unique_species_ids:
    print (selected_species)

    df = pd.read_csv(file_path)
    df.columns = ['year', 'gridnum_10m', 'species_id','B','C']
   


    ## select species:
    filtered_df = df[df['species_id'] ==  selected_species]
    df =  filtered_df


    # Extraxt the EAST coordinate
    def extract_east_number(text):
        numbers = re.findall(r'\d+', text)
        if len(numbers) >= 2:
            return int(numbers[1])  # Extract the second number
        else:
            return None

    # Extraxt the NORTH coordinate
    def extract_north_number(text):
        numbers = re.findall(r'\d+', text)
        if len(numbers) >= 2:
            return int(numbers[2])  # Extract the second number
        else:
            return None

    # Apply the function to the text_column and create a new column
    df['east_number'] = df['gridnum_10m'].apply(lambda x: extract_east_number(x))
    df['north_number'] = df['gridnum_10m'].apply(lambda x: extract_north_number(x))

    ## get final coordinate: - for 10m reslolution:
    df['east_coordinate_epsg3035_meter'] = df['east_number'] * 10
    df['north_coordinate_epsg3035_meter'] = df['north_number'] * 10


    #print (df)
    ## geometry to df:
    # Create geometry column using latitude and longitude

    geometry = [Point(xy) for xy in zip(df['east_coordinate_epsg3035_meter'], df['north_coordinate_epsg3035_meter'])]


    # Convert DataFrame to GeoDataFrame
    gdf = gpd.GeoDataFrame(df, geometry=geometry)


    ## build square polygon from point: from lower left starting:

    def create_square_polygon(row, side_length):
        # Define the coordinates of the square's vertices
        north_point = Point(row['geometry'].x, row['geometry'].y + side_length)
        west_point = Point(row['geometry'].x + side_length, row['geometry'].y)
        northwest_point = Point(row['geometry'].x + side_length, row['geometry'].y + side_length)

        # Create the square polygon
        square = Polygon([row['geometry'], north_point, northwest_point, west_point, row['geometry']])
        
        return square

    side_length = 100  ## size of the GRIDCELL

    # Apply the create_square_polygon function to each row of the GeoDataFrame
    gdf['square_polygon'] = gdf.apply(create_square_polygon, args=(side_length,), axis=1)


    # Convert DataFrame to GeoDataFrame
    gdf_vector_grid = gpd.GeoDataFrame(gdf, geometry='square_polygon')

    # Specify the projection
    gdf_vector_grid.crs = "EPSG:3035" 



    ## map the data:
    #gdf_vector_grid.explore()


    ## vector to raster:##############################################################
    # Assuming you have a GeoDataFrame named 'gdf' with geometry column containing polygons

    gdf = gdf_vector_grid
    # Define the bounds and resolution of the raster
    xmin, ymin, xmax, ymax = gdf.total_bounds  # Get the bounds of the GeoDataFrame
    resolution = side_length  # Define the resolution of the raster (adjust as needed)

    # Calculate the number of rows and columns in the raster
    rows = int((ymax - ymin) / resolution)
    cols = int((xmax - xmin) / resolution)

    # Define the transform for the raster
    transform = Affine(resolution, 0, xmin, 0, -resolution, ymax)

    output_raster_tif = output_raster_tif_folder + "\species_"+ str(selected_species) +"_raster_"+str(side_length)+"m.tif"

    # Create an empty raster
    with rasterio.open(output_raster_tif, 'w', driver='GTiff', 
                    height=rows, width=cols, count=1, dtype='float64', 
                    crs=gdf.crs, transform=transform) as dst:
        # Rasterize the GeoDataFrame onto the raster using attribute values
        rasterized = rasterize(
            [(geom, value) for geom, value in zip(gdf['geometry'], gdf['species_id'])],
            out_shape=(rows, cols),
            fill=0,
            transform=dst.transform,
            all_touched=True,
            dtype='float64'
        )
        
        # Write the rasterized data to the raster file
        dst.write(rasterized, indexes=1)

    print("Raster file created successfully.")

print ("done")

[2480242, 2490266, 2494686, 2481819, 2481714, 9515886, 9809229, 9616058, 9701857, 2481685, 8077224, 8332393, 2492943, 5231198, 7788295, 2490774, 8250742, 2495708, 7634625, 2474156, 2481792, 2493220, 2491534, 2482513, 2497266]
2480242
Raster file created successfully.
2490266
Raster file created successfully.
2494686
Raster file created successfully.
2481819
Raster file created successfully.
2481714
Raster file created successfully.
9515886
Raster file created successfully.
9809229
Raster file created successfully.
9616058
Raster file created successfully.
9701857
Raster file created successfully.
2481685
Raster file created successfully.
8077224
Raster file created successfully.
8332393
Raster file created successfully.
2492943
Raster file created successfully.
5231198
Raster file created successfully.
7788295
Raster file created successfully.
2490774
Raster file created successfully.
8250742
Raster file created successfully.
2495708
Raster file created successfully.
7634625
Raster fil