## Seasonality Understanding

In [1]:
import pandas as pd
pd.set_option('display.max_colwidth', None)

df_zc = pd.read_csv("../data/labels_hierarchy.csv")
df_seasonality = pd.read_csv("../data/labels_hierarchy_seasonality.csv")

# make a set of the lnf codes in both df (the value in the colum is a list of ints)
set_zc = set([int(x) for x in df_zc["lnf_codes"].apply(eval).sum()])
set_seasonality = set([int(x) for x in df_seasonality["lnf_codes"].apply(eval).sum()])

print(f"Number of unique lnf codes in ZC: {len(set_zc)}")
print(f"Number of unique lnf codes in Seasonality: {len(set_seasonality)}")

# get the intersection of the two sets
intersection = set_zc.intersection(set_seasonality)

print(f"Number of common lnf codes in ZC and Seasonality: {len(intersection)}")

Number of unique lnf codes in ZC: 80
Number of unique lnf codes in Seasonality: 138
Number of common lnf codes in ZC and Seasonality: 79


In [2]:
def get_label_slug_by_lnf_code(df, lnf_code):
    df['lnf_codes_list'] = df["lnf_codes"].apply(eval)
    df["lnf_codes_list"] = df["lnf_codes_list"].apply(lambda x: [int(i) for i in x])
    return df[df["lnf_codes_list"].apply(lambda x: int(lnf_code) in x)]['label_slug'].values[0]

df_lnf_lookup = pd.read_csv("../archive/input/LNF_Katalog_Nutzungsart.csv")

**Which LNF Codes are new in Seasonality?**

In [3]:
# get the ones new in seasonality
new_in_seasonality = set_seasonality - set_zc
print(f"Number of new lnf codes in Seasonality: {len(new_in_seasonality)}")

Number of new lnf codes in Seasonality: 59


In [4]:
df_new_in_seasonality = pd.DataFrame(columns=["lnf_code", "label_slug", "nutzung_de"])
rows = []

for lnf_code in new_in_seasonality:
    label_slug = get_label_slug_by_lnf_code(df_seasonality, lnf_code)
    nutzung_de = df_lnf_lookup[df_lnf_lookup["LNF_Code"] == lnf_code]["Nutzung_DE"].values[0]
    rows.append({"lnf_code": lnf_code, "label_slug": label_slug, "nutzung_de": nutzung_de})

df_new_in_seasonality = pd.concat([df_new_in_seasonality, pd.DataFrame(rows)], ignore_index=True)
df_new_in_seasonality

Unnamed: 0,lnf_code,label_slug,nutzung_de
0,520,Vegetation_AnnualCrop_DryRice,Trockenreis
1,650,Vegetation_Permaculture_Grassland,"Übrige Dauerwiesen, beitragsberechtigt aggregiert"
2,529,Vegetation_AnnualCrop_WetRice,Nassreis
3,660,Vegetation_Permaculture_Grassland,"Übrige Dauerweiden, beitragsberechtigt aggregiert"
4,921,Vegetation_Permaculture_Orchards,Hochstamm-Feldobstbäume (Punkte oder Flächen)
5,922,Vegetation_Permaculture_Orchards,Nussbäume (Punkte oder Flächen)
6,923,Vegetation_Permaculture_Chestnuts,Edelkastanienbäume
7,540,Vegetation_AnnualCrop_Chickpeas,Kichererbsen
8,797,Vegetation_Permaculture_Grassland,"Übrige Flächen mit Dauerkulturen, beitragsberechtigt"
9,798,Vegetation_Permaculture_Grassland,"Übrige Flächen mit Dauerkulturen, nicht beitragsberechtigt"


**Which LNF Codes are missing in Seasonality?**

In [5]:
# the ones missing in seasonality
missing_in_seasonality = set_zc - set_seasonality
print(f"Number of lnf codes missing in Seasonality: {len(missing_in_seasonality)}")

Number of lnf codes missing in Seasonality: 1


In [6]:
df_missing_in_seasonality = pd.DataFrame(columns=["lnf_code", "label_slug_zc", "nutzung_de"])
rows = []

for lnf_code in missing_in_seasonality:
    label_slug = get_label_slug_by_lnf_code(df_zc, lnf_code)
    nutzung_de = df_lnf_lookup[df_lnf_lookup["LNF_Code"] == lnf_code]["Nutzung_DE"].values[0]
    rows.append({"lnf_code": lnf_code, "label_slug_zc": label_slug, "nutzung_de": nutzung_de})

df_missing_in_seasonality = pd.concat([df_missing_in_seasonality, pd.DataFrame(rows)], ignore_index=True)

df_missing_in_seasonality

Unnamed: 0,lnf_code,label_slug_zc,nutzung_de
0,901,Vegetation_Forest_Forest_Forest,Wald
