# LUCAS 2018/2022 data preprocessing to create EO4BK Nomenclature crop classes

In this latest version of D2.1_v01, the new nomenclature (proposed in the D1.1 document dated 05/11/2024) is used to create the LUCAS reference dataset. In version D2.1_v00, the nomenclature of the proposal was used. 

## Table of content  

1. [Load data](#1-load-data)
2. [Reduce and Split](#2-reduce-and-split)
3. [Create Class for data with a low detail level](#3-create-class-for-data-with-a-low-detail-level)
4. [Create Class for data with a high detail level](#4-Create-class-for-data-with-a-high-detail-level)
5. [Create Function to merge low detail level and high detail level](#5-create-function-to-merge-low-detail-level-and-high-detail-level)
6. [Create final EO4BKLUCAS dataset](#6-create-final-eo4bklucas-dataset)
7. [Save EO4BKLUCAS dataset](#7-save-eo4bklucas-dataset)


In [1]:
import numpy as np
import geopandas as gpd
import pandas as pd
import os
from dotenv import load_dotenv

# 1. Load data 

In [2]:
load_dotenv()
LUCAS = os.getenv('LUCAS_D21_V01')

# load LUCAS COPERNICUS 2022 data 

lcs2022 = gpd.read_file(f'{LUCAS}/lucas_input_data/l2022_survey_cop_radpoly_attr.gpkg')
# subset polygons larger than 100 sqm

lcs2022 = lcs2022[lcs2022['poly_area_sqm'] >= 100]
# load LUCAS COPERNICUS 2018 data

lcs2018_hd_data = gpd.read_file(f'{LUCAS}/lucas_input_data/LUCAS_2018_U111_TRUE.gpkg')

# calculate area 
lcs2018_hd_data = lcs2018_hd_data.to_crs(crs = lcs2022.crs)
lcs2018_hd_data['poly_area_sqm'] = lcs2018_hd_data.area      # returns a series containing the area of each geometry in the geoseries expressed in the units of the CRS
lcs2018_hd_data = lcs2018_hd_data[lcs2018_hd_data['poly_area_sqm'] >= 100]

# Defind data type if needed 
lcs2022['survey_lc1_perc'] = pd.to_numeric(lcs2022['survey_lc1_perc'])
lcs2022['survey_lc1'] = lcs2022['survey_lc1'].astype(str)
lcs2022['survey_lu1'] = lcs2022['survey_lu1'].astype(str)
lcs2022['survey_lc2'] = lcs2022['survey_lc2'].astype(str)
lcs2022['survey_lu2'] = lcs2022['survey_lu2'].astype(str)
lcs2022['surveycprnlc'] = lcs2022['surveycprnlc'].astype(str)
lcs2022['survey_calc_dist'] = pd.to_numeric(lcs2022['survey_calc_dist'])
lcs2022['nuts0'] = lcs2022['nuts0'].astype(str) 
lcs2022['nuts1'] = lcs2022['nuts1'].astype(str) 
lcs2022['nuts2'] = lcs2022['nuts2'].astype(str) 
lcs2022['nuts3'] = lcs2022['nuts3'].astype(str) 
lcs2022['survey_date'] = pd.to_datetime(lcs2022['survey_date'])
lcs2022['survey_date'] = lcs2022['survey_date'].apply(lambda x: x.strftime('%Y-%m-%d'))


# 2. Reduce and Split 

From lc2022 only data with U111 'Agriculture (excluding fallow land and kitchen gardens)' == True is used. This subset is further divided into LUCAS Copernicus module (lcscpncs) and LUCAS theoretical points (lcstheo). 
Check whether the lcstheo and lcscpncs spatially agrees (Yes/No). If "No": ld_data, if "Yes": Check if lcstheo and lcspncs LC agrees (Yes/No). If "No": Remove data, if "Yes": hd_data. 

In [3]:
# only U111
lcs2022_u111 = lcs2022[(lcs2022['survey_lu1'].str.slice(0,4) == 'U111') | (lcs2022['survey_lu2'].str.slice(0,4) == 'U111') | (lcs2022['lu1_code'] == 'U111')]

# Coordinates from lcs2022 that belongs to lucastheo
lcstheo = pd.DataFrame(index = lcs2022_u111.index)
lcstheo['lon'] = lcs2022_u111['point_long']
lcstheo['lat'] = lcs2022_u111['point_lat']

# the lucas theoretical points are in EPSGG:4326, while the polygons are in EPSG:3035 # https://ec.europa.eu/eurostat/documents/205002/13686460/C1-LUCAS-2022.pdf
lcstheo = gpd.GeoSeries(gpd.points_from_xy(lcstheo.lon, lcstheo.lat, crs = "EPSG:4326")) 
# change to EPSG: 3035
lcstheo = lcstheo.to_crs(lcs2022_u111.crs) 

# Create Subset of data that spatially agrees == No
spatially_agree_boolean = lcstheo.within(lcs2022_u111.geometry, align = False)
spatially_agree_boolean.index = lcs2022_u111.index
lcs2022_ld_data = lcs2022_u111[spatially_agree_boolean == False]

# Create Subset of data that spatially agrees == Yes
spatially_agree_boolean = lcstheo.within(lcs2022_u111.geometry, align = False)
spatially_agree_boolean.index = lcs2022_u111.index
spatially_agree = lcs2022_u111[spatially_agree_boolean]

# Check from spatially agrees == Yes, whether LC between lcstheo and lcscpncs agree or dont agree? 

## LC agrees == No
error_data = spatially_agree[(spatially_agree['survey_lc1'].str.slice(0,3) != spatially_agree['surveycprnlc'])&(spatially_agree['survey_lc2'].str.slice(0,3) != spatially_agree['surveycprnlc'])]

## LC agrees == Yes
lcs2022_hd_data = spatially_agree[(spatially_agree['survey_lc1'].str.slice(0,3) == spatially_agree['surveycprnlc'])|(spatially_agree['survey_lc2'].str.slice(0,3) == spatially_agree['surveycprnlc'])]



## Rename LUCAS 2018 to fit LUCAS 2022

For LUCAS 2018: 'survey_wm_reclaim_signs', 'survey_inspire_unvegetated', 'survey_lm_stand_veget', 'survey_lm_by_veget', 'survey_lm_crop_resid', 'survey_lm_crop_resid_perc' is missing 

In [4]:


lcs2018_hd_data = lcs2018_hd_data.rename(columns=str.lower)
lcs2018_hd_data = lcs2018_hd_data.rename(columns={'year':'survey_year',
                                                  'lc1':'survey_lc1',
                                                  'lc2':'survey_lc2',
                                                  'lc1_spec':'survey_lc1_spec',
                                                  'lc2_spec':'survey_lc2_spec',
                                                  'cprn_lc':'surveycprnlc',
                                                  'wm':'survey_wm',
                                                  'wm_source':'survey_wm_source',
                                                  'wm_type':'survey_wm_type',
                                                  'wm_delivery':'survey_wm_delivery',
                                                  'crop_residues':'survey_lm_crop_resid',
                                                  'lc_lu_special_remark':'survey_lc_lu_special_remark'})


lcs2018_hd_data['survey_date'] = pd.to_datetime(lcs2018_hd_data['survey_date'])
lcs2018_hd_data['survey_date'] = lcs2018_hd_data['survey_date'].apply(lambda x: x.strftime('%Y-%m-%d'))


# Because for LUCAS 2018 '8 Not relevant' is written as '8', harmonize that field 
lcs2018_hd_data['survey_lc2'] = np.where(
    lcs2018_hd_data['survey_lc2'] == '8',
    '8 - Not relevant',
    lcs2018_hd_data['survey_lc2']
)

lcs2018_hd_data['survey_lc1_spec'] = np.where(
    lcs2018_hd_data['survey_lc1_spec'] == '8',
    '8 - Not relevant',
    lcs2018_hd_data['survey_lc1_spec']
)

lcs2018_hd_data['survey_lc2_spec'] = np.where(
    lcs2018_hd_data['survey_lc2_spec'] == '8',
    '8 - Not relevant',
    lcs2018_hd_data['survey_lc2_spec']
)

# because the last letter of 2018 is written in capital letters, unlike for LUCAS 2022, where the last letter is lower case. 

lcs2018_hd_data['survey_lc1_spec'] = np.where(
    lcs2018_hd_data['survey_lc1_spec'] != '8 - Not relevant',
    lcs2018_hd_data['survey_lc1_spec'].str.capitalize(),
    lcs2018_hd_data['survey_lc1_spec']
)

lcs2018_hd_data['survey_lc2_spec'] = np.where(
    lcs2018_hd_data['survey_lc2_spec'] != '8 - Not relevant',
    lcs2018_hd_data['survey_lc2_spec'].str.capitalize(),
    lcs2018_hd_data['survey_lc2_spec']
)


  lcs2018_hd_data['survey_date'] = pd.to_datetime(lcs2018_hd_data['survey_date'])


# 3 Create Class for data with a low detail level


In [5]:
class lowdetail:

    def __init__(self, ld_data, eo4bkclass,year = ['2018','2022'], **kwargs):
        
        self.ld_data = ld_data 
        self.eo4bkclass = eo4bkclass
        self.year = year
        lc1 = kwargs.get('lc1')
        lc2 = kwargs.get('lc2')
        lc3 = kwargs.get('lc3')



        self.class_list = [lc for lc in [lc1,lc2,lc3] if lc is not None]

        self.ld_class = self.filter_subset()
        self.ld_gdf = self.create_ld_gdf()
    
    def filter_subset(self):
        ld_class = pd.DataFrame()
        for i in self.class_list:
            filtered_ld_data = self.ld_data[self.ld_data['surveycprnlc'] == i]
            ld_class = pd.concat([ld_class, filtered_ld_data], ignore_index= True)
        return ld_class

    
    def create_ld_gdf(self):
        '''
        Creates GeoDataframe from input LC class, with just these Attributes from the LUCAS dataset, that are collected in the COPERNICUS module
        '''
        year = self.year
        ld_gdf = gpd.GeoDataFrame({'point_id': [],
                                    f'survey_date_{year}': [],
                                    f'survey_year_{year}': [],
                                    'nuts0' :[], 
                                    'nuts1' :[],
                                    'nuts2' : [],
                                    'nuts3'  : [],
                                    'poly_area_sqm' : [], 
                                    f'lc3_{year}':[],
                                    f'lc_eo4bk_{year}': [],
                                    'geometry': []
                                    }, 
                                    crs = self.ld_data.crs)
        
        ld_gdf['point_id']      = self.ld_class['point_id'].astype(str)
        ld_gdf[f'survey_date_{year}']   = pd.to_datetime(self.ld_class['survey_date'])
        ld_gdf[f'survey_year_{year}']   = self.ld_class['survey_year']
        ld_gdf[f'nuts0']         = self.ld_class['nuts0'].astype(str)
        ld_gdf[f'nuts1']         = self.ld_class['nuts1'].astype(str)
        ld_gdf[f'nuts2']         = self.ld_class['nuts2'].astype(str)
        ld_gdf['nuts3']         = self.ld_class['nuts3'].astype(str)
        ld_gdf['poly_area_sqm'] = round(self.ld_class['poly_area_sqm'].astype(float), 2)
        ld_gdf[f'lc3_{year}']           = self.ld_class['surveycprnlc']
        ld_gdf[f'lc_eo4bk_{year}']      = self.eo4bkclass
        ld_gdf['geometry']      = self.ld_class.geometry

        return ld_gdf



# 4. Create Class for data with a high detail level

In [7]:
class highdetail:

    def __init__(self, hd_data, eo4bkclass, year = ['2018','2022'], **kwargs):
        self.hd_data = hd_data
        self.eo4bkclass = eo4bkclass
        self.year = year

        lc1 = kwargs.get('lc1')
        lc2 = kwargs.get('lc2')
        lc3 = kwargs.get('lc3')
        lc4 = kwargs.get('lc4')
        lc5 = kwargs.get('lc5')
        lc6 = kwargs.get('lc6')
        lc7 = kwargs.get('lc7')

        lcspec1 = kwargs.get('lcspec1')
        lcspec2 = kwargs.get('lcspec2')

        lc2dbl = kwargs.get('lc2dbl')

        self.class_list = [lc for lc in [lc1,lc2,lc3,lc4,lc5,lc6,lc7] if lc is not None]
        self.class_spec_list = [lc for lc in [lcspec1, lcspec2] if lc is not None]
        
        self.hd_class = self.filter_level3_subset()
        self.hd_spec_class = self.filter_level4_subset()
        self.hd_sglcrp = self.single_cropping()
        self.hd_dblcrp = self.double_cropping(lc2dbl = lc2dbl)

    def filter_level3_subset(self):

        hd_class = pd.DataFrame()
        for i in self.class_list:
            condition1 = (self.hd_data['survey_lc1'].notnull()) & (self.hd_data['survey_lc2'].str.slice(0,3) == i)
            for j in range(0,len(self.class_list)):
                condition2 = (self.hd_data['survey_lc1'].str.slice(0,3) == i) & (self.hd_data['survey_lc2'].str.slice(0,3) == self.class_list[j])
            condition3 = (self.hd_data['survey_lc1'].str.slice(0,3) == i) & (self.hd_data['survey_lc2']  == '8 - Not relevant')

            filtered_hd_data = self.hd_data[condition1|condition2|condition3]
            hd_class = pd.concat([hd_class, filtered_hd_data], ignore_index=True)

        return hd_class

    def filter_level4_subset(self):

        hd_class = pd.DataFrame()
        for i in self.class_spec_list:
            condition1 = (self.hd_data['survey_lc1_spec'].notnull()) & (self.hd_data['survey_lc2_spec'].str.slice(0,4) == i)
            for j in range(0,len(self.class_spec_list)):
                condition2 = (self.hd_data['survey_lc1_spec'].str.slice(0,4) == i) & (self.hd_data['survey_lc2_spec'].str.slice(0,4) == self.class_spec_list[j])
            condition3 = (self.hd_data['survey_lc1_spec'].str.slice(0,4) == i) & (self.hd_data['survey_lc2_spec']  == '8 - Not relevant')

            filtered_hd_data = self.hd_data[condition1|condition2|condition3]
            hd_class = pd.concat([hd_class, filtered_hd_data], ignore_index=True)

        return hd_class
    
    def single_cropping(self):

        sglcrps = pd.DataFrame()
        if self.class_list:
            for i in self.class_list:
                sglcrp = self.hd_class[((self.hd_class['survey_lc1'].str.slice(0,3) == i) & (self.hd_class['survey_lc2'] == '8 - Not relevant'))]
                sglcrps = pd.concat([sglcrps, sglcrp], ignore_index=True)
            return sglcrps
        if self.class_spec_list:
            for i in self.class_spec_list:
                sglcrp = self.hd_spec_class[((self.hd_spec_class['survey_lc1_spec'].str.slice(0,4) == i) & (self.hd_spec_class['survey_lc2_spec'] == '8 - Not relevant'))]
                sglcrps = pd.concat([sglcrps, sglcrp], ignore_index=True)
            return sglcrps

    def double_cropping(self, lc2dbl):

        lc2dbl = str(lc2dbl)
        # dblcrps = pd.DataFrame()
        if self.class_list: 
            dblcrps = self.hd_class[((self.hd_class['survey_lc1'].str.slice(0,3) == self.class_list[0]) & (self.hd_class['survey_lc2'].str.slice(0,3) == lc2dbl))|
                            ((self.hd_class['survey_lc1'].str.slice(0,3) == lc2dbl) & (self.hd_class['survey_lc2'].str.slice(0,3) == self.class_list[0]))]
            return dblcrps
        if self.class_spec_list:
            dblcrps = self.hd_spec_class[((self.hd_spec_class['survey_lc1_spec'].str.slice(0,4) == self.class_spec_list[0]) & (self.hd_spec_class['survey_lc2'].str.slice(0,3) == lc2dbl)) |
                            ((self.hd_spec_class['survey_lc1'].str.slice(0,3) == lc2dbl) & (self.hd_spec_class['survey_lc2'].str.slice(0,4) == self.class_spec_list[0]))]
            return dblcrps


    def create_hd_gdf(self, input_data):

        data = input_data
        year = self.year
        hd_gdf = gpd.GeoDataFrame({
            'point_id': [],
            f'survey_date_{year}': [],
            f'survey_year_{year}': [],
            'nuts0' :[], 
            'nuts1' :[],
            'nuts2' : [],
            'nuts3'  : [],
            'poly_area_sqm' : [], 
            f'lc1_{year}' : [],
            f'lc2_{year}' : [],
            f'lc3_{year}' : [],
            f'lc1_spec_{year}':[],
            f'lc2_spec_{year}':[],
            f'lc_eo4bk_{year}': [],
            f'survey_wm_{year}' :[],
            f'survey_wm_type_{year}':[],
            f'survey_wm_source_{year}':[],
            f'survey_wm_delivery_{year}':[],
            # 'survey_wm_reclaim_signs':[],
            # 'survey_inspire_unvegetated':[],
            # 'survey_lm_stand_veget':[],
            # 'survey_lm_by_veget':[],
            f'survey_lm_crop_resid_{year}':[],
            # 'survey_lm_crop_resid_perc':[],
            f'survey_lc_lu_special_remark_{year}':[],
            'geometry': []
            }, crs=self.hd_data.crs)

    
        hd_gdf['point_id']                   = data['point_id'].astype(str)
        hd_gdf[f'survey_date_{year}']                = pd.to_datetime(data['survey_date'])
        hd_gdf[f'survey_year_{year}']                = data['survey_year']
        hd_gdf['nuts0']                      = data['nuts0'].astype(str)
        hd_gdf['nuts1']                      = data['nuts1'].astype(str)
        hd_gdf['nuts2']                      = data['nuts2'].astype(str)
        hd_gdf['nuts3']                      = data['nuts3'].astype(str)
        hd_gdf['poly_area_sqm']              = round(data['poly_area_sqm'].astype(float), 2)
        hd_gdf[f'lc1_{year}']                        = data['survey_lc1']
        hd_gdf[f'lc2_{year}']                        = data['survey_lc2']
        hd_gdf[f'lc3_{year}']                        = data['surveycprnlc']  
        hd_gdf[f'lc1_spec_{year}']                   = data['survey_lc1_spec']  
        hd_gdf[f'lc2_spec_{year}']                   = data['survey_lc2_spec']  
        hd_gdf[f'lc_eo4bk_{year}']                   = self.eo4bkclass
        hd_gdf[f'survey_wm_{year}']                  = data['survey_wm']  
        hd_gdf[f'survey_wm_type_{year}']             = data['survey_wm_type']  
        hd_gdf[f'survey_wm_source_{year}']           = data['survey_wm_source']  
        hd_gdf[f'survey_wm_delivery_{year}']         = data['survey_wm_delivery']  
        try:
            hd_gdf[f'survey_wm_reclaim_signs_only_{year}']    = data['survey_wm_reclaim_signs'] 
            hd_gdf[f'survey_inspire_unvegetated_only_{year}']  = data['survey_inspire_unvegetated']  
            hd_gdf[f'survey_lm_stand_veget_only_{year}']       = data['survey_lm_stand_veget'] 
            hd_gdf[f'survey_lm_by_veget_only_{year}']          = data['survey_lm_by_veget']  
            hd_gdf[f'survey_lm_crop_resid_perc_only_{year}']   = data['survey_lm_crop_resid_perc']  
        except KeyError:
            pass
        hd_gdf[f'survey_lc_lu_special_remark_{year}'] = data['survey_lc_lu_special_remark']
        hd_gdf[f'survey_lm_crop_resid_{year}']        = data['survey_lm_crop_resid']  
        hd_gdf['geometry']                    = data.geometry

        return hd_gdf

        

# 5. Create Function to merge low detail level and high detail level

In [8]:
def merge_gdfs(ld_gdf, hd_gdf):
    eo4bk_gdf = pd.concat([ld_gdf, hd_gdf], ignore_index = True, sort = False)
    return eo4bk_gdf


# 6. Create final EO4BKLUCAS dataset

In [None]:
# new classdict after the changes made in the nomenclature according to the D1.1 document from the 05.11.2024

classdic = {"Wheat" : {'lc1': 'B11',
                       'lc2':'B12'},
            "Barley":{'lc1':'B13'},
            "Oats":{'lc1':'B15'},
            "Maize":{'lc1':'B16'},
            "Rice":{'lc1':'B17'},
            "Flax":{'lcspec1':'B35a'},
            "Other_cereals":{'lc1':'B19', 
                             'lc2':'B18'},
            "Potatoes":{'lc1':'B21'},
            "Sugar_beet":{'lc1':'B22'},
            "Other_root_crops":{'lc1':'B23'},
            "Sunflower":{'lc1':'B31'},
            "Rapeseed":{'lc1':'B32'},
            "Soybean": {'lc1' : 'B33'},
            "Cotton": {'lc1':'B34'},
            "Sugarcane":{'lcspec1' : 'B37e'},
            "Coffee":{'lcspec1':'B84c'},
            "Grapes": {'lc1' : 'B82'},
            "Fodder_crops": {'lc1' : 'B53', 
                             'lc2': 'B54',
                             'lc3':'B51',
                             'lc4':'B52'},
            "Other_permanent_crop":{'lc1' : 'B36',
                                    'lc2':'B84'},
            "Other_single_crops":{'lc1':'B42',
                                  'lc2':'B44',
                                  'lc3':'B45'},
            "Protein_crops":{'lc1':'B41'},
            "Olive_groves":{'lc1':'B81'},
            "Other_grassland":{'lc1':'E20',
                               'lc2':'B55'}
            
            

            }
gdf_dict18 = {}
gdf_dict22 = {}


for crop, lcs  in classdic.items():
    lc1 = lcs.get('lc1')
    lc2 = lcs.get('lc2')
    lc3 = lcs.get('lc3')
    lc4 = lcs.get('lc4')

    lcspec1 = lcs.get('lcspec1')
    hd_class_function = highdetail(lcs2018_hd_data, eo4bkclass=f'{crop}',year = '2018', lc1 = lc1, lc2 = lc2, lc3 = lc3, lc4 = lc4, lcspec1 = lcspec1)
    hd_class = hd_class_function.create_hd_gdf(hd_class_function.hd_sglcrp)
    gdf_dict18[f'{crop}_hd'] = hd_class

for crop, lcs  in classdic.items():
    lc1 = lcs.get('lc1')
    lc2 = lcs.get('lc2')
    lc3 = lcs.get('lc3')
    lc4 = lcs.get('lc4')
    lcspec1 = lcs.get('lcspec1')

    hd_class_function = highdetail(lcs2022_hd_data, eo4bkclass=f'{crop}',year = '2022', lc1 = lc1, lc2 = lc2, lc3 = lc3, lc4 = lc4, lcspec1 = lcspec1)
    hd_class = hd_class_function.create_hd_gdf(hd_class_function.hd_sglcrp)
    # because Flax, Sugarcane and Coffee dont have low detail class
    if not (crop in ['Flax','Sugarcane','Coffee']):
        ld_class_function = lowdetail(lcs2022_ld_data, eo4bkclass=f'{crop}',year = '2022', lc1 = lc1, lc2 = lc2, lc3 = lc3, lc4 = lc4)
        ld_class = ld_class_function.create_ld_gdf()
        
        gdf_dict22[f'{crop}_ld'] = ld_class
    gdf_dict22[f'{crop}_hd'] = hd_class



## Fruit and Nut Orchards 2018

fruitandnut_hd_class18 = highdetail(lcs2018_hd_data, eo4bkclass='Fruit_and_nut',year = '2018', lc1 = 'B71', lc2 = 'B72', lc3 = 'B73', lc4 = 'B74', lc5 = 'B75', lc6 = 'B76', lc7 = 'B77')
gdf_dict18['Fruit_and_nut_hd'] = fruitandnut_hd_class18.create_hd_gdf(fruitandnut_hd_class18.hd_sglcrp)


## Fruit and Nut Orchards 2022

fruitandnut_ld_class22 = lowdetail(lcs2022_ld_data, eo4bkclass='Fruit_and_nut',year = '2022', lc1 = 'B71', lc2 = 'B72', lc3 = 'B73', lc4 = 'B74', lc5 = 'B75', lc6 = 'B76', lc7 = 'B77')
fruitandnut_hd_class22 = highdetail(lcs2022_hd_data, eo4bkclass='Fruit_and_nut',year = '2022', lc1 = 'B71', lc2 = 'B72', lc3 = 'B73', lc4 = 'B74', lc5 = 'B75', lc6 = 'B76', lc7 = 'B77')
gdf_dict22['Fruit_and_nut_ld'] = fruitandnut_ld_class22.create_ld_gdf()
gdf_dict22['Fruit_and_nut_hd'] = fruitandnut_hd_class22.create_hd_gdf(fruitandnut_hd_class22.hd_sglcrp)

classdic.update({'Fruit_and_nut':[]})



# Mask out conflicting LUCAS Classes
Only for HD, consequently LD and HD have different Other Cereals, Other Root Crops, and other Fodder Crop classes

In [None]:
# Subset Relevant B19 class, 
# for the moment B19 includes B19a and B19c, which are now an exclusive class
lc18_b19 = gdf_dict18['Other_cereals_hd'][gdf_dict18['Other_cereals_hd']['lc1_2018']=='B19']

# # Exclude B19a and B19c 
lc18_b19_without_sorghum_millet = lc18_b19[(lc18_b19['lc1_spec_2018'] !='B19a')&(lc18_b19['lc1_spec_2018'] !='B19c')]
lc18_sorghum = lc18_b19[lc18_b19['lc1_spec_2018'] =='B19a']
lc18_millet = lc18_b19[lc18_b19['lc1_spec_2018'] =='B19c']

# # Get the complete Other Cereals Class 
lc18_other_cereals = gdf_dict18['Other_cereals_hd']

# # Exclude lc22_b19_without_sorghum_millet to clean other cereals from Sorghum and Millet
lc18_other_cereals_final = lc18_other_cereals[lc18_other_cereals['point_id'].isin(lc18_b19_without_sorghum_millet['point_id'])]

# Same for B23 Other Root Crops 

lc18_b23 = gdf_dict18['Other_root_crops_hd'][gdf_dict18['Other_root_crops_hd']['lc1_2018']=='B23']
lc18_b23_without_fodder_crops = lc18_b23[(lc18_b23['lc1_spec_2018'] != 'B23a') & 
                                         (lc18_b23['lc1_spec_2018'] != 'B23b') & 
                                         (lc18_b23['lc1_spec_2018'] != 'B23h')]

# # subset fodder crops to join eo4bkclass Fodder_crops_hd

b23_fodder_crops =  lc18_b23[(lc18_b23['lc1_spec_2018'] == 'B23a') |
                                         (lc18_b23['lc1_spec_2018'] == 'B23b') |
                                         (lc18_b23['lc1_spec_2018'] == 'B23h')]

fodder18_crops = [gdf_dict18['Fodder_crops_hd'], b23_fodder_crops]
fodder18_crops = pd.concat(fodder18_crops)

# # Rename lc_eo4bk_2022 classes 
fodder18_crops['lc_eo4bk_2018'] = 'Fodder_crops'
lc18_sorghum['lc_eo4bk_2018'] = 'Sorghum'
lc18_millet['lc_eo4bk_2018'] = 'Millet'

# Subset Relevant B19 class, 
# for the moment B19 includes B19a and B19c, which are now an exclusive class
lc22_b19 = gdf_dict22['Other_cereals_hd'][gdf_dict22['Other_cereals_hd']['lc1_2022']=='B19 - Other cereals']

# Exclude B19a and B19c 
lc22_b19_without_sorghum_millet = lc22_b19[(lc22_b19['lc1_spec_2022'] !='B19a - Sorghum (Sorghum bicolor)')&(lc22_b19['lc1_spec_2022'] !='B19c - Common, golden or proso millet (Panicum miliaceum L.)')]
lc22_sorghum = lc22_b19[lc22_b19['lc1_spec_2022'] =='B19a - Sorghum (Sorghum bicolor)']
lc22_millet = lc22_b19[lc22_b19['lc1_spec_2022'] =='B19c - Common, golden or proso millet (Panicum miliaceum L.)']

# Get the complete Other Cereals Class 
lc22_other_cereals = gdf_dict22['Other_cereals_hd']

# Exclude lc22_b19_without_sorghum_millet to clean other cereals from Sorghum and Millet
lc22_other_cereals_final = lc22_other_cereals[lc22_other_cereals['point_id'].isin(lc22_b19_without_sorghum_millet['point_id'])]

# Same for B23 Other Root Crops 

lc22_b23 = gdf_dict22['Other_root_crops_hd'][gdf_dict22['Other_root_crops_hd']['lc1_2022']=='B23 - Other root crops']
lc22_b23_without_fodder_crops = lc22_b23[(lc22_b23['lc1_spec_2022'] != 'B23a - Fodder beet (roots of Beta vulgaris)') & 
                                         (lc22_b23['lc1_spec_2022'] != 'B23b - Fodder kale (Brassica oleracea L.)') & 
                                         (lc22_b23['lc1_spec_2022'] != 'B23h - Fodder parsnips (Pastinaca sativa L.)')]

# subset fodder crops to join eo4bkclass Fodder_crops_hd

b23_fodder_crops =  lc22_b23[(lc22_b23['lc1_spec_2022'] == 'B23a - Fodder beet (roots of Beta vulgaris)') |
                                         (lc22_b23['lc1_spec_2022'] == 'B23b - Fodder kale (Brassica oleracea L.)') |
                                         (lc22_b23['lc1_spec_2022'] == 'B23h - Fodder parsnips (Pastinaca sativa L.)')]

fodder22_crops = [gdf_dict22['Fodder_crops_hd'], b23_fodder_crops]
fodder22_crops = pd.concat(fodder22_crops)

# Rename lc_eo4bk_2022 classes 
fodder22_crops['lc_eo4bk_2022'] = 'Fodder_crops'
lc22_sorghum['lc_eo4bk_2022'] = 'Sorghum'
lc22_millet['lc_eo4bk_2022'] = 'Millet'

In [None]:
# Replace old Fodder Crop with new (including B23a, B23b, B23h); 
# Replace old Other Cereals with new (excluding B19a Sorghum and B19c Millet)
# Replace old Other Root Crops with new (lc18_b23_without_fodder_crops = excluding B23a, B23b, B23h)
# Insert Millet and Sorghum in Dictionary 

gdf_dict18['Fodder_crops_hd'] = fodder18_crops

gdf_dict18['Other_cereals_hd'] = lc18_other_cereals_final

gdf_dict18['Other_root_crops_hd'] = lc18_b23_without_fodder_crops

gdf_dict18['Millet_hd'] = lc18_millet

gdf_dict18['Sorghum_hd'] = lc18_sorghum

gdf_dict22['Fodder_crops_hd'] = fodder22_crops

gdf_dict22['Other_cereals_hd'] = lc22_other_cereals_final

gdf_dict22['Other_root_crops_hd'] = lc22_b23_without_fodder_crops

gdf_dict22['Millet_hd'] = lc22_millet

gdf_dict22['Sorghum_hd'] = lc22_sorghum

# 7. Save EO4BKLUCAS dataset

In [None]:
names_list = list(classdic.keys())

names_list.append('Millet')
names_list.append('Sorghum')

for name in names_list:
    hd_gdf = gdf_dict22.get(f"{name}_hd")
    ld_gdf = gdf_dict22.get(f"{name}_ld")
    if hd_gdf is not None:
        hd_gdf.to_file(f"{LUCAS}/2022/{name}_2022_eo4bk.gpkg", driver='GPKG', layer='hd_data')
    
    if ld_gdf is not None:
        ld_gdf.to_file(f"{LUCAS}/2022/{name}_2022_eo4bk.gpkg", driver='GPKG', layer='ld_data')

for name in names_list:
    hd_gdf = gdf_dict18.get(f"{name}_hd")
    if hd_gdf is not None:
        hd_gdf.to_file(f"{LUCAS}/2018/{name}_2018_eo4bk.gpkg", driver = 'GPKG', layer = 'hd_data')
