# LUCAS 2022 data preprocessing to create EO4BK Nomenclature crop classes
## Table of content  

1. [Load data](#1-load-data)
2. [Reduce and Split](#2-reduce-and-split)
3. [Create Class for data with a low detail level](#3-create-class-for-data-with-a-low-detail-level)
4. [Create Class for data with a high detail level](#4-Create-class-for-data-with-a-high-detail-level)
5. [Create Function to merge low detail level and high detail level](#5-create-function-to-merge-low-detail-level-and-high-detail-level)
6. [Create final EO4BKLUCAS dataset](#6-create-final-eo4bklucas-dataset)
7. [Save EO4BKLUCAS dataset](#7-save-eo4bklucas-dataset)


In [1]:
import numpy as np
import geopandas as gpd
import pandas as pd

# 1. Load data 

In [2]:
lcs2022 = gpd.read_file('/net/projects/EO4BK/WP1/P1-EO4BK/data/LUCAS_Copernicus/l2022_survey_cop_radpoly_attr.gpkg')
# Defind data type if needed 
lcs2022['survey_lc1_perc'] = pd.to_numeric(lcs2022['survey_lc1_perc'])
lcs2022['survey_lc1'] = lcs2022['survey_lc1'].astype(str)
lcs2022['survey_lu1'] = lcs2022['survey_lu1'].astype(str)
lcs2022['survey_lc2'] = lcs2022['survey_lc2'].astype(str)
lcs2022['survey_lu2'] = lcs2022['survey_lu2'].astype(str)
lcs2022['surveycprnlc'] = lcs2022['surveycprnlc'].astype(str)
lcs2022['survey_calc_dist'] = pd.to_numeric(lcs2022['survey_calc_dist'])
lcs2022['nuts0'] = lcs2022['nuts0'].astype(str) 
lcs2022['nuts1'] = lcs2022['nuts1'].astype(str) 
lcs2022['nuts2'] = lcs2022['nuts2'].astype(str) 
lcs2022['nuts3'] = lcs2022['nuts3'].astype(str) 

# 2. Reduce and Split 

From lc2022 only data with U111 'Agriculture (excluding fallow land and kitchen gardens)' == True is used. This subset is further divided into LUCAS Copernicus module (lcscpncs) and LUCAS theoretical points (lcstheo). 
Check whether the lcstheo and lcscpncs spatially agrees (Yes/No). If "No": ld_data, if "Yes": Check if lcstheo and lcspncs LC agrees (Yes/No). If "No": Remove data, if "Yes": hd_data. 

In [3]:
# only U111
lcs2022_u111 = lcs2022[(lcs2022['survey_lu1'].str.slice(0,4) == 'U111') | (lcs2022['survey_lu2'].str.slice(0,4) == 'U111') | (lcs2022['lu1_code'] == 'U111')]

# Coordinates from lcs2022 that belongs to lucastheo
lcstheo = pd.DataFrame(index = lcs2022_u111.index)
lcstheo['lon'] = lcs2022_u111['point_long']
lcstheo['lat'] = lcs2022_u111['point_lat']

# the lucas theoretical points are in EPSGG:4326, while the polygons are in EPSG:3035 # https://ec.europa.eu/eurostat/documents/205002/13686460/C1-LUCAS-2022.pdf
lcstheo = gpd.GeoSeries(gpd.points_from_xy(lcstheo.lon, lcstheo.lat, crs = "EPSG:4326")) 
# change to EPSG: 3035
lcstheo = lcstheo.to_crs(lcs2022_u111.crs) 

# Create Subset of data that spatially agrees == No
spatially_agree_boolean = lcstheo.within(lcs2022_u111.geometry, align = False)
spatially_agree_boolean.index = lcs2022_u111.index
lcs2022_ld_data = lcs2022_u111[spatially_agree_boolean == False]

# Create Subset of data that spatially agrees == Yes
spatially_agree_boolean = lcstheo.within(lcs2022_u111.geometry, align = False)
spatially_agree_boolean.index = lcs2022_u111.index
spatially_agree = lcs2022_u111[spatially_agree_boolean]

# Check from spatially agrees == Yes, whether LC between lcstheo and lcscpncs agree or dont agree? 

## LC agrees == No
error_data = spatially_agree[(spatially_agree['survey_lc1'].str.slice(0,3) != spatially_agree['surveycprnlc'])&(spatially_agree['survey_lc2'].str.slice(0,3) != spatially_agree['surveycprnlc'])]

## LC agrees == Yes
lcs2022_hd_data = spatially_agree[(spatially_agree['survey_lc1'].str.slice(0,3) == spatially_agree['surveycprnlc'])|(spatially_agree['survey_lc2'].str.slice(0,3) == spatially_agree['surveycprnlc'])]



# 3 Create Class for data with a low detail level


In [4]:
class lowdetail:

    def __init__(self, ld_data, eo4bkclass, **kwargs):
        
        self.ld_data = ld_data 
        self.eo4bkclass = eo4bkclass
        lc1 = kwargs.get('lc1')
        lc2 = kwargs.get('lc2')
        lc3 = kwargs.get('lc3')



        self.class_list = [lc for lc in [lc1,lc2,lc3] if lc is not None]

        self.ld_class = self.filter_subset()
        self.ld_gdf = self.create_ld_gdf()
    
    def filter_subset(self):
        ld_class = pd.DataFrame()
        for i in self.class_list:
            filtered_ld_data = self.ld_data[self.ld_data['surveycprnlc'] == i]
            ld_class = pd.concat([ld_class, filtered_ld_data], ignore_index= True)
        return ld_class

    
    def create_ld_gdf(self):
        '''
        Creates GeoDataframe from input LC class, with just these Attributes from the LUCAS dataset, that are collected in the COPERNICUS module
        '''
        ld_gdf = gpd.GeoDataFrame({'point_id': [],
                                    'survey_date': [],
                                    'survey_year': [],
                                    'nuts0' :[], 
                                    'nuts1' :[],
                                    'nuts2' : [],
                                    'nuts3'  : [],
                                    'poly_area_sqm' : [], 
                                    'lc3':[],
                                    'lc_eo4bk': [],
                                    'geometry': []
                                    }, 
                                    crs = self.ld_data.crs)
        
        ld_gdf['point_id']      = self.ld_class['point_id'].astype(str)
        ld_gdf['survey_date']   = pd.to_datetime(self.ld_class['survey_date'])
        ld_gdf['survey_year']   = self.ld_class['survey_year']
        ld_gdf['nuts0']         = self.ld_class['nuts0'].astype(str)
        ld_gdf['nuts1']         = self.ld_class['nuts1'].astype(str)
        ld_gdf['nuts2']         = self.ld_class['nuts2'].astype(str)
        ld_gdf['nuts3']         = self.ld_class['nuts3'].astype(str)
        ld_gdf['poly_area_sqm'] = round(self.ld_class['poly_area_sqm'].astype(float), 2)
        ld_gdf['lc3']           = self.ld_class['surveycprnlc']
        ld_gdf['lc_eo4bk']      = self.eo4bkclass
        ld_gdf['geometry']      = self.ld_class.geometry

        return ld_gdf



# 4. Create Class for data with a high detail level

In [10]:
class highdetail:

    def __init__(self, hd_data, eo4bkclass, **kwargs):
        self.hd_data = hd_data
        self.eo4bkclass = eo4bkclass

        lc1 = kwargs.get('lc1')
        lc2 = kwargs.get('lc2')
        lc3 = kwargs.get('lc3')
        lc4 = kwargs.get('lc4')
        lc5 = kwargs.get('lc5')
        lc6 = kwargs.get('lc6')
        lc7 = kwargs.get('lc7')

        lcspec1 = kwargs.get('lcspec1')
        lcspec2 = kwargs.get('lcspec2')

        lc2dbl = kwargs.get('lc2dbl')

        self.class_list = [lc for lc in [lc1,lc2,lc3,lc4,lc5,lc6,lc7] if lc is not None]
        self.class_spec_list = [lc for lc in [lcspec1, lcspec2] if lc is not None]
        
        self.hd_class = self.filter_level3_subset()
        self.hd_spec_class = self.filter_level4_subset()
        self.hd_sglcrp = self.single_cropping()
        self.hd_dblcrp = self.double_cropping(lc2dbl = lc2dbl)

    def filter_level3_subset(self):

        hd_class = pd.DataFrame()
        for i in self.class_list:
            condition1 = (self.hd_data['survey_lc1'].notnull()) & (self.hd_data['survey_lc2'].str.slice(0,3) == i)
            for j in range(0,len(self.class_list)):
                condition2 = (self.hd_data['survey_lc1'].str.slice(0,3) == i) & (self.hd_data['survey_lc2'].str.slice(0,3) == self.class_list[j])
            condition3 = (self.hd_data['survey_lc1'].str.slice(0,3) == i) & (self.hd_data['survey_lc2']  == '8 - Not relevant')

            filtered_hd_data = self.hd_data[condition1|condition2|condition3]
            hd_class = pd.concat([hd_class, filtered_hd_data], ignore_index=True)

        return hd_class

    def filter_level4_subset(self):

        hd_class = pd.DataFrame()
        for i in self.class_spec_list:
            condition1 = (self.hd_data['survey_lc1_spec'].notnull()) & (self.hd_data['survey_lc2_spec'].str.slice(0,4) == i)
            for j in range(0,len(self.class_spec_list)):
                condition2 = (self.hd_data['survey_lc1_spec'].str.slice(0,4) == i) & (self.hd_data['survey_lc2_spec'].str.slice(0,4) == self.class_spec_list[j])
            condition3 = (self.hd_data['survey_lc1_spec'].str.slice(0,4) == i) & (self.hd_data['survey_lc2_spec']  == '8 - Not relevant')

            filtered_hd_data = self.hd_data[condition1|condition2|condition3]
            hd_class = pd.concat([hd_class, filtered_hd_data], ignore_index=True)

        return hd_class
    
    def single_cropping(self):

        sglcrps = pd.DataFrame()
        if self.class_list:
            for i in self.class_list:
                sglcrp = self.hd_class[((self.hd_class['survey_lc1'].str.slice(0,3) == i) & (self.hd_class['survey_lc2'] == '8 - Not relevant'))]
                sglcrps = pd.concat([sglcrps, sglcrp], ignore_index=True)
            return sglcrps
        if self.class_spec_list:
            for i in self.class_spec_list:
                sglcrp = self.hd_spec_class[((self.hd_spec_class['survey_lc1_spec'].str.slice(0,4) == i) & (self.hd_spec_class['survey_lc2_spec'] == '8 - Not relevant'))]
                sglcrps = pd.concat([sglcrps, sglcrp], ignore_index=True)
            return sglcrps

    def double_cropping(self, lc2dbl):

        lc2dbl = str(lc2dbl)
        # dblcrps = pd.DataFrame()
        if self.class_list: 
            dblcrps = self.hd_class[((self.hd_class['survey_lc1'].str.slice(0,3) == self.class_list[0]) & (self.hd_class['survey_lc2'].str.slice(0,3) == lc2dbl))|
                            ((self.hd_class['survey_lc1'].str.slice(0,3) == lc2dbl) & (self.hd_class['survey_lc2'].str.slice(0,3) == self.class_list[0]))]
            return dblcrps
        if self.class_spec_list:
            dblcrps = self.hd_spec_class[((self.hd_spec_class['survey_lc1_spec'].str.slice(0,4) == self.class_spec_list[0]) & (self.hd_spec_class['survey_lc2'].str.slice(0,3) == lc2dbl)) |
                            ((self.hd_spec_class['survey_lc1'].str.slice(0,3) == lc2dbl) & (self.hd_spec_class['survey_lc2'].str.slice(0,4) == self.class_spec_list[0]))]
            return dblcrps


    def create_hd_gdf(self, input_data):

        data = input_data

        hd_gdf = gpd.GeoDataFrame({
            'point_id': [],
            'survey_date': [],
            'survey_year': [],
            'nuts0' :[], 
            'nuts1' :[],
            'nuts2' : [],
            'nuts3'  : [],
            'poly_area_sqm' : [], 
            'lc1' : [],
            'lc2' : [],
            'lc3' : [],
            'lc1_spec':[],
            'lc2_spec':[],
            'lc_eo4bk': [],
            'survey_wm' :[],
            'survey_wm_type':[],
            'survey_wm_source':[],
            'survey_wm_delivery':[],
            'survey_wm_reclaim_signs':[],
            # 'survey_trees_secondary':[],
            # 'survey_homplot_fills_extwin':[],
            # 'survey_parcel_area_ha':[],
            # 'survey_inspire_arable':[],
            # 'survey_inspire_plcc1':[],
            # 'survey_inspire_plcc2':[],
            # 'survey_inspire_plcc3':[],
            # 'survey_inspire_plcc4':[],
            # 'survey_inspire_plcc5':[],
            # 'survey_inspire_plcc6':[],
            # 'survey_inspire_plcc7':[],
            # 'survey_inspire_percon':[],
            # 'survey_inspire_wetcon':[],
            # 'survey_inspire_orgcon':[],
            # 'survey_inspire_checon':[],
            # 'survey_inspire_intcon':[],
            # 'survey_inspire_frecon':[],
            # 'survey_inspire_frecon1':[],
            # 'survey_inspire_salcon':[],
            # 'survey_inspire_percon1':[],
            'survey_inspire_unvegetated':[],
            # 'survey_lm_stone_walls':[],
            # 'survey_lm_hedge':[],
            # 'survey_lm_grass_margins':[],
            # 'survey_lm_grass_hedge':[],
            'survey_lm_stand_veget':[],
            'survey_lm_by_veget':[],
            'survey_lm_crop_resid':[],
            'survey_lm_crop_resid_perc':[],
            # 'survey_grazing':[],
            # 'survey_lc_lu_special_remark':[],
            'geometry': []
            }, crs=self.hd_data.crs)

    
        hd_gdf['point_id']                   = data['point_id'].astype(str)
        hd_gdf['survey_date']                = pd.to_datetime(data['survey_date'])
        hd_gdf['survey_year']                = data['survey_year']
        hd_gdf['nuts0']                      = data['nuts0'].astype(str)
        hd_gdf['nuts1']                      = data['nuts1'].astype(str)
        hd_gdf['nuts2']                      = data['nuts2'].astype(str)
        hd_gdf['nuts3']                      = data['nuts3'].astype(str)
        hd_gdf['poly_area_sqm']              = round(data['poly_area_sqm'].astype(float), 2)
        hd_gdf['lc1']                        = data['survey_lc1']
        hd_gdf['lc2']                        = data['survey_lc2']
        hd_gdf['lc3']                        = data['surveycprnlc']  
        hd_gdf['lc1_spec']                   = data['survey_lc1_spec']  
        hd_gdf['lc2_spec']                   = data['survey_lc2_spec']  
        hd_gdf['lc_eo4bk']                   = self.eo4bkclass
        hd_gdf['survey_wm']                  = data['survey_wm']  
        hd_gdf['survey_wm_type']             = data['survey_wm_type']  
        hd_gdf['survey_wm_source']           = data['survey_wm_source']  
        hd_gdf['survey_wm_delivery']         = data['survey_wm_delivery']  
        hd_gdf['survey_wm_reclaim_signs']    = data['survey_wm_reclaim_signs'] 
        # hd_gdf['survey_trees_secondary']      = data['survey_trees_secondary']  
        # hd_gdf['survey_homplot_fills_extwin'] = data['survey_homplot_fills_extwin']  
        # hd_gdf['survey_parcel_area_ha']      = data['survey_parcel_area_ha']  
        # hd_gdf['survey_inspire_arable']       = data['survey_inspire_arable']  
        # hd_gdf['survey_inspire_plcc1']        = data['survey_inspire_plcc1']  
        # hd_gdf['survey_inspire_plcc2']        = data['survey_inspire_plcc2']  
        # hd_gdf['survey_inspire_plcc3']        = data['survey_inspire_plcc3']  
        # hd_gdf['survey_inspire_plcc4']        = data['survey_inspire_plcc4']  
        # hd_gdf['survey_inspire_plcc5']        = data['survey_inspire_plcc5']  
        # hd_gdf['survey_inspire_plcc6']        = data['survey_inspire_plcc6']  
        # hd_gdf['survey_inspire_plcc7']        = data['survey_inspire_plcc7']  
        # hd_gdf['survey_inspire_percon']       = data['survey_inspire_percon']  
        # hd_gdf['survey_inspire_wetcon']       = data['survey_inspire_wetcon'] 
        # hd_gdf['survey_inspire_orgcon']       = data['survey_inspire_orgcon']  
        # hd_gdf['survey_inspire_checon']       = data['survey_inspire_checon']  
        # hd_gdf['survey_inspire_intcon']       = data['survey_inspire_intcon']  
        # hd_gdf['survey_inspire_frecon']       = data['survey_inspire_frecon']  
        # hd_gdf['survey_inspire_frecon1']      = data['survey_inspire_frecon1']  
        # hd_gdf['survey_inspire_salcon']       = data['survey_inspire_salcon']  
        # hd_gdf['survey_inspire_percon1']      = data['survey_inspire_percon1']  
        hd_gdf['survey_inspire_unvegetated']  = data['survey_inspire_unvegetated']  
        # hd_gdf['survey_lm_stone_walls']       = data['survey_lm_stone_walls']  
        # hd_gdf['survey_lm_hedge']             = data['survey_lm_hedge'] 
        # hd_gdf['survey_lm_grass_margins']     = data['survey_lm_grass_margins']  
        # hd_gdf['survey_lm_grass_hedge']       = data['survey_lm_grass_hedge']  
        hd_gdf['survey_lm_stand_veget']       = data['survey_lm_stand_veget']  
        hd_gdf['survey_lm_by_veget']          = data['survey_lm_by_veget']  
        hd_gdf['survey_lm_crop_resid']        = data['survey_lm_crop_resid']  
        hd_gdf['survey_lm_crop_resid_perc']   = data['survey_lm_crop_resid_perc']  
        # hd_gdf['survey_grazing']              = data['survey_grazing']  
        # hd_gdf['survey_lc_lu_special_remark'] = data['survey_lc_lu_special_remark'] 
        hd_gdf['geometry']                    = data.geometry

        return hd_gdf

        

# 5. Create Function to merge low detail level and high detail level

In [6]:
def merge_gdfs(ld_gdf, hd_gdf):
    eo4bk_gdf = pd.concat([ld_gdf, hd_gdf], ignore_index = True, sort = False)
    return eo4bk_gdf


# 6. Create final EO4BKLUCAS dataset

In [32]:
# create dictionary to save files in a loop
gdf_dict = {}


#### Cereals

## Wheat

wheat_ld_class = lowdetail(lcs2022_ld_data, eo4bkclass='Wheat', lc1 = 'B11', lc2 = 'B12')
wheat_hd_class = highdetail(lcs2022_hd_data, eo4bkclass='Wheat', lc1 = 'B11', lc2 = 'B12')
gdf_dict['wheat_ld'] = wheat_ld_class.create_ld_gdf()
gdf_dict['wheat_hd'] = wheat_hd_class.create_hd_gdf(wheat_hd_class.hd_sglcrp)

# wheat_eo4bk = merge_gdfs(wheat_ld, wheat_hd) 

## Barley 

barley_ld_class = lowdetail(lcs2022_ld_data, eo4bkclass='Barley', lc1 = 'B13')
barley_hd_class = highdetail(lcs2022_hd_data, eo4bkclass='Barley', lc1 = 'B13')
gdf_dict['barley_ld'] = barley_ld_class.create_ld_gdf()
gdf_dict['barley_hd'] = barley_hd_class.create_hd_gdf(barley_hd_class.hd_sglcrp)

# barley_eo4bk = merge_gdfs(barley_ld, barley_hd) 

## Oats

oats_ld_class = lowdetail(lcs2022_ld_data, eo4bkclass='Oats', lc1 = 'B15')
oats_hd_class = highdetail(lcs2022_hd_data, eo4bkclass='Oats', lc1 = 'B15')
gdf_dict['oats_ld'] = oats_ld_class.create_ld_gdf()
gdf_dict['oats_hd'] = oats_hd_class.create_hd_gdf(oats_hd_class.hd_sglcrp)

# oats_eo4bk = merge_gdfs(oats_ld, oats_hd) 

## Maize 

maize_ld_class = lowdetail(lcs2022_ld_data, eo4bkclass='Maize', lc1 = 'B16')
maize_hd_class = highdetail(lcs2022_hd_data, eo4bkclass='Maize', lc1 = 'B16')
gdf_dict['maize_ld'] = maize_ld_class.create_ld_gdf()
gdf_dict['maize_hd'] = maize_hd_class.create_hd_gdf(maize_hd_class.hd_sglcrp)

# maize_eo4bk = merge_gdfs(maize_ld, maize_hd) 

## Rice

rice_ld_class = lowdetail(lcs2022_ld_data, eo4bkclass='Rice', lc1 = 'B17')
rice_hd_class = highdetail(lcs2022_hd_data, eo4bkclass='Rice', lc1 = 'B17')
gdf_dict['rice_ld'] = rice_ld_class.create_ld_gdf()
gdf_dict['rice_hd'] = rice_hd_class.create_hd_gdf(rice_hd_class.hd_sglcrp)

# rice_eo4bk = merge_gdfs(rice_ld, rice_hd) 

## Flax 

flax_hd_class = highdetail(lcs2022_hd_data, eo4bkclass='Flax', lcspec1 = 'B35a')
gdf_dict['flax_hd'] = flax_hd_class.create_hd_gdf(flax_hd_class.hd_sglcrp)

# flax_eo4bk = flax_hd 

## Other Cereals 

othercereals_ld_class = lowdetail(lcs2022_ld_data, eo4bkclass='Other_cereals', lc1 = 'B19')
othercereals_hd_class = highdetail(lcs2022_hd_data, eo4bkclass='Other_cereals', lc1 = 'B19')
gdf_dict['othercereals_ld ']= othercereals_ld_class.create_ld_gdf()
gdf_dict['othercereals_hd'] = othercereals_hd_class.create_hd_gdf(othercereals_hd_class.hd_sglcrp)

# othercereals_eo4bk = merge_gdfs(othercereals_ld, othercereals_hd) 

#### Root Crops 

## Potatoes

potatoes_ld_class = lowdetail(lcs2022_ld_data, eo4bkclass='Potatoes', lc1 = 'B21')
potatoes_hd_class = highdetail(lcs2022_hd_data, eo4bkclass='Potatoes', lc1 = 'B21')
gdf_dict['potatoes_ld'] = potatoes_ld_class.create_ld_gdf()
gdf_dict['potatoes_hd'] = potatoes_hd_class.create_hd_gdf(potatoes_hd_class.hd_sglcrp)


# potatoes_eo4bk = merge_gdfs(potatoes_ld, potatoes_hd) 

## Sugar beets

sugarbeetsld_class = lowdetail(lcs2022_ld_data, eo4bkclass='sugar_beets', lc1 = 'B22')
sugarbeetshd_class = highdetail(lcs2022_hd_data, eo4bkclass='sugar_beets', lc1 = 'B22')
gdf_dict['sugarbeets_ld'] = sugarbeetsld_class.create_ld_gdf()
gdf_dict['sugarbeets_hd'] = sugarbeetshd_class.create_hd_gdf(sugarbeetshd_class.hd_sglcrp)

# sugarbeets_eo4bk = merge_gdfs(sugarbeets_ld, sugarbeets_hd) 

## Other Root Crops 

otherrootcrops_ld_class = lowdetail(lcs2022_ld_data, eo4bkclass='Other_root_crops', lc1 = 'B23')
otherrootcrops_hd_class = highdetail(lcs2022_hd_data, eo4bkclass='Other_root_crops', lc1 = 'B23')
gdf_dict['otherrootcrops_ld ']= otherrootcrops_ld_class.create_ld_gdf()
gdf_dict['otherrootcrops_hd'] = otherrootcrops_hd_class.create_hd_gdf(otherrootcrops_hd_class.hd_sglcrp)

# otherrootcrops_eo4bk = merge_gdfs(otherrootcrops_ld, otherrootcrops_hd) 


### Oil seed

## Sunflowr 

sunflower_ld_class = lowdetail(lcs2022_ld_data, eo4bkclass='Sunflower', lc1 = 'B31')
sunflower_hd_class = highdetail(lcs2022_hd_data, eo4bkclass='Sunflower', lc1 = 'B31')
gdf_dict['sunflower_ld'] = sunflower_ld_class.create_ld_gdf()
gdf_dict['sunflower_hd'] = sunflower_hd_class.create_hd_gdf(sunflower_hd_class.hd_sglcrp)

# sunflower_eo4bk = merge_gdfs(sunflower_ld, sunflower_hd) 

## Rapeseed 

rapeseed_ld_class = lowdetail(lcs2022_ld_data, eo4bkclass='Rapeseed', lc1 = 'B32')
rapeseed_hd_class = highdetail(lcs2022_hd_data, eo4bkclass='Rapeseed', lc1 = 'B32')
gdf_dict['rapeseed_ld'] = rapeseed_ld_class.create_ld_gdf()
gdf_dict['rapeseed_hd'] = rapeseed_hd_class.create_hd_gdf(rapeseed_hd_class.hd_sglcrp)

# rapeseed_eo4bk = merge_gdfs(rapeseed_ld, rapeseed_hd) 

## Soybean 

soybean_ld_class = lowdetail(lcs2022_ld_data, eo4bkclass='Soybean', lc1 = 'B33')
soybean_hd_class = highdetail(lcs2022_hd_data, eo4bkclass='Soybean', lc1 = 'B33')
gdf_dict['soybean_ld']= soybean_ld_class.create_ld_gdf()
gdf_dict['soybean_hd'] = soybean_hd_class.create_hd_gdf(soybean_hd_class.hd_sglcrp)

# soybean_eo4bk = merge_gdfs(soybean_ld, soybean_hd) 

### Other Crops 

## Cotton 

cotton_ld_class = lowdetail(lcs2022_ld_data, eo4bkclass='Cotton', lc1 = 'B34')
cotton_hd_class = highdetail(lcs2022_hd_data, eo4bkclass='Cotton', lc1 = 'B34')
gdf_dict['cotton_ld ']= cotton_ld_class.create_ld_gdf()
gdf_dict['cotton_hd'] = cotton_hd_class.create_hd_gdf(cotton_hd_class.hd_sglcrp)

# cotton_eo4bk = merge_gdfs(cotton_ld, cotton_hd) 

## Sugarcane 

sugarcane_hd_class = highdetail(lcs2022_hd_data, eo4bkclass='Sugarcane', lcspec1 = 'B37e')
gdf_dict['sugarcane_hd'] = sugarcane_hd_class.create_hd_gdf(sugarcane_hd_class.hd_sglcrp)

# sugarcane_eo4bk = sugarcane_hd

### Permanent Crops 

## Coffee 

coffee_hd_class = highdetail(lcs2022_hd_data, eo4bkclass='Coffee', lcspec1 = 'B84c')
gdf_dict['coffee_hd'] = coffee_hd_class.create_hd_gdf(coffee_hd_class.hd_sglcrp)

# coffee_eo4bk = coffee_hd

## Fruit and Nut Orchards

fruitandnut_ld_class = lowdetail(lcs2022_ld_data, eo4bkclass='Fruit_and_nut', lc1 = 'B71', lc2 = 'B72', lc3 = 'B73', lc4 = 'B74', lc5 = 'B75', lc6 = 'B76', lc7 = 'B77')
fruitandnut_hd_class = highdetail(lcs2022_hd_data, eo4bkclass='Fruit_and_nut', lc1 = 'B71', lc2 = 'B72', lc3 = 'B73', lc4 = 'B74', lc5 = 'B75', lc6 = 'B76', lc7 = 'B77')
gdf_dict['fruitandnut_ld'] = fruitandnut_ld_class.create_ld_gdf()
gdf_dict['fruitandnut_hd'] = fruitandnut_hd_class.create_hd_gdf(fruitandnut_hd_class.hd_sglcrp)

# fruitandnut_eo4bk = merge_gdfs(fruitandnut_ld, fruitandnut_hd) 

## Grapes

grapes_ld_class = lowdetail(lcs2022_ld_data, eo4bkclass='Grapes', lc1 = 'B82')
grapes_hd_class = highdetail(lcs2022_hd_data, eo4bkclass='Grapes', lc1 = 'B82')
gdf_dict['grapes_ld'] = grapes_ld_class.create_ld_gdf()
gdf_dict['grapes_hd'] = grapes_hd_class.create_hd_gdf(grapes_hd_class.hd_sglcrp)

# grapes_eo4bk = merge_gdfs(grapes_ld, grapes_hd) 


## Fodder Crops

foddercrops_ld_class = lowdetail(lcs2022_ld_data, eo4bkclass='Fodder_crops', lc1 = 'B53', lc2 = 'B54')
foddercrops_hd_class = highdetail(lcs2022_hd_data, eo4bkclass='Fodder_crops', lc1 = 'B53', lc2 = 'B54')
gdf_dict['foddercrops_ld']= foddercrops_ld_class.create_ld_gdf()
gdf_dict['foddercrops_hd'] = foddercrops_hd_class.create_hd_gdf(foddercrops_hd_class.hd_sglcrp)

# foddercrops_eo4bk = merge_gdfs(foddercrops_ld, foddercrops_hd) 


# 7. Save EO4BKLUCAS dataset

In [35]:
eo4bk_class_outputpath = '/net/projects/EO4BK/WP1/P1-EO4BK/data/eo4bkclasses'


names_list = ['wheat', 'barley', 'oats', 'maize', 'rice', 'flax', 'othercereals', 'potatoes',
              'sugarbeets', 'otherrootcrops', 'sunflower', 'rapeseed', 'soybean', 'cotton', 'sugarcane', 
              'fruitandnut', 'grapes', 'foddercrops']

for name in names_list:
    hd_gdf = gdf_dict.get(f"{name}_hd")
    ld_gdf = gdf_dict.get(f"{name}_ld")
    if hd_gdf is not None:
        hd_gdf.to_file(f"{eo4bk_class_outputpath}/{name}_eo4bk.gpkg", driver='GPKG', layer='hd_data')
    
    if ld_gdf is not None:
        ld_gdf.to_file(f"{eo4bk_class_outputpath}/{name}_eo4bk.gpkg", driver='GPKG', layer='ld_data')




Given a GeoSeries 's', you can use '~s.is_empty & s.notna()' to get back the old behaviour.

  has_z_arr = geometry[geometry.notna() & (~geometry.is_empty)].has_z
Given a GeoSeries 's', you can use '~s.is_empty & s.notna()' to get back the old behaviour.

  has_z_arr = geometry[geometry.notna() & (~geometry.is_empty)].has_z
