## Seasonality Understanding

In [13]:
import pandas as pd
pd.set_option('display.max_colwidth', None)

df_zc = pd.read_csv("../data/labels_hierarchy.csv")
df_seasonality = pd.read_csv("../data/labels_hierarchy_seasonality.csv")
df_seasonality_reduced = pd.read_csv("../data/labels_hierarchy_seasonality_reduced.csv")

# make a set of the lnf codes in both df (the value in the colum is a list of ints)
set_zc = set([int(x) for x in df_zc["lnf_codes"].apply(eval).sum()])
set_seasonality = set([int(x) for x in df_seasonality["lnf_codes"].apply(eval).sum()])
set_seasonality_reduced = set([int(x) for x in df_seasonality_reduced["lnf_codes"].apply(eval).sum()])

print(f"Number of unique lnf codes in ZC: {len(set_zc)}")
print(f"Number of unique lnf codes in Seasonality: {len(set_seasonality)}")
print(f"Number of unique lnf codes in Seasonality reduced: {len(set_seasonality_reduced)}")

# get the intersection of the two sets
intersection = set_zc.intersection(set_seasonality)
intersection_reduced = set_zc.intersection(set_seasonality_reduced)

print(f"Number of common lnf codes in ZC and Seasonality: {len(intersection)}")
print(f"Number of common lnf codes in ZC and Seasonality reduced: {len(intersection_reduced)}")

Number of unique lnf codes in ZC: 80
Number of unique lnf codes in Seasonality: 133
Number of unique lnf codes in Seasonality reduced: 104
Number of common lnf codes in ZC and Seasonality: 79
Number of common lnf codes in ZC and Seasonality reduced: 64


In [8]:
def get_label_slug_by_lnf_code(df, lnf_code):
    df['lnf_codes_list'] = df["lnf_codes"].apply(eval)
    df["lnf_codes_list"] = df["lnf_codes_list"].apply(lambda x: [int(i) for i in x])
    return df[df["lnf_codes_list"].apply(lambda x: int(lnf_code) in x)]['label_slug'].values[0]

df_lnf_lookup = pd.read_csv("../archive/input/LNF_Katalog_Nutzungsart.csv")

**Which LNF Codes are new in Seasonality?**

In [9]:
# get the ones new in seasonality
new_in_seasonality = set_seasonality - set_zc
print(f"Number of new lnf codes in Seasonality: {len(new_in_seasonality)}")

Number of new lnf codes in Seasonality: 54


In [10]:
df_new_in_seasonality = pd.DataFrame(columns=["lnf_code", "label_slug", "nutzung_de"])
rows = []

for lnf_code in new_in_seasonality:
    label_slug = get_label_slug_by_lnf_code(df_seasonality, lnf_code)
    nutzung_de = df_lnf_lookup[df_lnf_lookup["LNF_Code"] == lnf_code]["Nutzung_DE"].values[0]
    rows.append({"lnf_code": lnf_code, "label_slug": label_slug, "nutzung_de": nutzung_de})

df_new_in_seasonality = pd.concat([df_new_in_seasonality, pd.DataFrame(rows)], ignore_index=True)
df_new_in_seasonality

Unnamed: 0,lnf_code,label_slug,nutzung_de
0,650,Vegetation_Permaculture_Grassland,"Übrige Dauerwiesen, beitragsberechtigt aggregiert"
1,660,Vegetation_Permaculture_Grassland,"Übrige Dauerweiden, beitragsberechtigt aggregiert"
2,921,Vegetation_Permaculture_Orchards,Hochstamm-Feldobstbäume (Punkte oder Flächen)
3,922,Vegetation_Permaculture_Orchards,Nussbäume (Punkte oder Flächen)
4,923,Vegetation_Permaculture_Chestnuts,Edelkastanienbäume
5,924,Vegetation_Permaculture_Orchards,Einheimische standortgerechte Einzelbäume und Alleen (Punkte oder Flächen)
6,797,Vegetation_Permaculture_Grassland,"Übrige Flächen mit Dauerkulturen, beitragsberechtigt"
7,798,Vegetation_Permaculture_Grassland,"Übrige Flächen mit Dauerkulturen, nicht beitragsberechtigt"
8,927,Vegetation_Permaculture_Orchards,Andere Bäume (regionsspezifische Biodiversitätsförderfläche)
9,933,Vegetation_Permaculture_Grassland,Gemeinschaftsweiden


**Which LNF Codes are missing in Seasonality?**

In [11]:
# the ones missing in seasonality
missing_in_seasonality = set_zc - set_seasonality
print(f"Number of lnf codes missing in Seasonality: {len(missing_in_seasonality)}")

Number of lnf codes missing in Seasonality: 1


In [12]:
df_missing_in_seasonality = pd.DataFrame(columns=["lnf_code", "label_slug_zc", "nutzung_de"])
rows = []

for lnf_code in missing_in_seasonality:
    label_slug = get_label_slug_by_lnf_code(df_zc, lnf_code)
    nutzung_de = df_lnf_lookup[df_lnf_lookup["LNF_Code"] == lnf_code]["Nutzung_DE"].values[0]
    rows.append({"lnf_code": lnf_code, "label_slug_zc": label_slug, "nutzung_de": nutzung_de})

df_missing_in_seasonality = pd.concat([df_missing_in_seasonality, pd.DataFrame(rows)], ignore_index=True)

df_missing_in_seasonality

Unnamed: 0,lnf_code,label_slug_zc,nutzung_de
0,901,Vegetation_Forest_Forest_Forest,Wald


# Cross Check with Original ZueriCrop labels.csv

In [12]:
import pandas as pd
df_zc_original = pd.read_csv("../archive/zueri-crop/labels.csv")

# drop whre 4th_tier_ENG is NaN
df_zc_original = df_zc_original.dropna(subset=["4th_tier_ENG"])

# unique values in 1st_tier
df_zc_original["1st_tier"].unique()

# Drop where 1st_tier is not Vegetation
df_zc_original = df_zc_original[df_zc_original["1st_tier"] == "Vegetation"]

df_zc_original

# Set of LNF_code
set_zc_original = set(df_zc_original["LNF_code"].values)

# count of unique LNF_code
print(f"Number of unique lnf codes in ZC original: {len(set_zc_original)}")

Number of unique lnf codes in ZC original: 80


Our adaption of the labels.csv is correct, 80 LNF Codes are considered in the original ZueriCrop dataset.