Metric for importance assesment
-Zonation 2
-Zonation 3
-Zonation 6

Source data variable description
https://www.metsakeskus.fi/sites/default/files/document/mvjaete-koodisto-ja-tietokantakuvaus_0.xlsx


The Finnish Forest Center forest stand classes
Luokat Suomen Metsäkeskuksen metsävarakuvioista
fertilityclass	1	Lehto, letto ja lehtomainen suo (ja ruohoturvekangas)
fertilityclass	2	Lehtomainen kangas, vastaava suo ja ruohoturvekangas
fertilityclass	3	Tuore kangas, vastaava suo ja mustikkaturvekangas
fertilityclass	4	Kuivahko kangas, vastaava suo ja puolukkaturvekangas
fertilityclass	5	Kuiva kangas, vastaava suo ja varputurvekangas
fertilityclass	6	Karukkokangas, vastaava suo (ja jäkäläturvekangas)
fertilityclass	7	Kalliomaa ja hietikko
fertilityclass	8	Lakimetsä ja tunturi

treestand
697	type	1	Inventointi-tyyppi
697	type	2	Laskenta-tyyppi.
697	type	3	Ennuste-tyyppi
(type 3 jätetty pois ja otettu korkein arvo luokasta 1 ja 2)

join standid to treestandsummary by treestandsummaryid
join meanage from treestandsummary by (added) standid

Ageclasses
-40 young (age_class = 1)
-40-140 middle (age_class = 2)
-over 140 old (age_class = 3)

Region of interest
-3 maakuntaa. Pirkanmaa, Kainuu ja Uusimaa
downloaded 12.2.2024 from https://avoin.metsakeskus.fi/aineistot/Metsavarakuviot/Maakunta/

Thresholds
-Zonation-arvo > zonation.percentile(0.7) best 30 %
-Zonation-arvo > zonation.percentile(0.9) best 10 %

# Zonation1

## Protection distribution

In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np
import dask_geopandas as dask_gpd
from scipy.spatial import cKDTree
from rasterstats import zonal_stats
import rasterio
from scipy import stats
import fiona

#to see numbers as non scientific
np.set_printoptions(suppress=True)

filenameZonation = r"YuorPathToFileWithZonationValues"

filename = r"YourPath\MV_Uusimaa.gpkg"

#stand polygons
stand = dask_gpd.read_file(filenameZonation, npartitions=2)
stand = stand.compute()
print("stand",len(stand))
#standids to add age etc
treestand = dask_gpd.read_file(filename, layer='treestand', npartitions=2)
treestand = treestand.compute()
condition = treestand.type.isin([2,1])
treestand = treestand[condition]
print("treestand",len(treestand))
#wanted data to add
treestandsummary = dask_gpd.read_file(filename,layer='treestandsummary', npartitions=2)
treestandsummary = treestandsummary.compute()
print("treestandsummary",len(treestandsummary))
#wanted data to add
specialfeature = dask_gpd.read_file(filename,layer='specialfeature', npartitions=2)
specialfeature = specialfeature.compute()
print("specialfeature",len(treestandsummary))
columns = ['standid', 'featurecode']
featurecode = specialfeature[columns]
featurecode = featurecode.groupby('standid').min()
featurecode.reset_index(inplace=True)
 

#add standid to treestandsummaries and join to stand polygons based on that
joined_layer = treestandsummary.merge(treestand, left_on='treestandid', right_on='treestandid', how='left')
joined_layer = stand.merge(joined_layer, left_on='standid', right_on='standid', how='left')
len(joined_layer)

#select the columns to add+join key
columns = ['standid', 'meanage']
age = joined_layer[columns]

#one polygon includes many rows of subtable. add aggregation method, here its max
#select the larger one of inventointi- and laskenta-tyyppi
max_age = age.groupby('standid').max()
max_age.reset_index(inplace=True)
#print(max_age)

#add the desired column
stand_age = stand.merge(max_age, left_on='standid', right_on='standid', how='left')
print("number of empty ages",stand_age.meanage.isna().sum())
stand_age = stand_age.merge(featurecode, left_on='standid', right_on='standid', how='left')
print(len(stand_age))

#divide into age classes
conditions = [
    (stand_age['meanage'] < 40),
    (stand_age['meanage'] >= 40) & (stand_age['meanage'] <= 140),
    (stand_age['meanage'] > 140)
]
choices = [1,2,3]
stand_age['age_class'] = np.select(conditions, choices, default=0) #0 when nodata


out_file = r"YourPath"
combined = dask_gpd.read_parquet(out_file, npartitions=2)
combined = combined.compute()

joined = gpd.sjoin(stand_age, combined)
# Count the number of points in each polygon. use an unique id column to group
print(f"groupingvariable (standid) is unique {stand_age.standid.is_unique}")
counts = joined.groupby('standid').size()

# Convert the counts Series to a DataFrame
counts_df = counts.reset_index()
counts_df.columns = ['standid', 'count']

# Merge the counts with the original GeoDataFrame
stand_age = pd.merge(stand_age, counts_df, on='standid', how='left')

# Fill NaN values with 0 (assuming no overlap means a count of 0)
stand_age['protected'] = stand_age['count'].fillna(0)

#divide into protected classes
stand_age['protection_class']  = np.where(stand_age['protected']  > 0, 1, 0)


COI = [ 'maingroup', 'subgroup', 'fertilityclass',
       'soiltype', 'drainagestate', 'ditchingyear', 'thinningyear',
       'developmentclass', 'standquality', 'maintreespecies', 'area',
       'areadecrease', 'creationtime', 'updatetime', 'Zonation_mean', 'Zonation_median',
        'Zonation_min', 'Zonation_max',
        'meanage', 'protection_class','age_class']

print("number of Nulls")
print(stand_age[COI].isna().sum())

COI_continuous_sum = ['fertilityclass','area']
condition = (stand_age.Zonation_mean > stand_age.Zonation_mean.quantile(0.9)) & (stand_age.protection_class == 1)
grouped = stand_age[condition][COI_continuous_sum].groupby('fertilityclass')
result = grouped.sum()
result = result.rename(columns={'area': 'Area_Protected10'})

condition = (stand_age.Zonation_mean > stand_age.Zonation_mean.quantile(0.9)) & (stand_age.protection_class == 0)
grouped = stand_age[condition][COI_continuous_sum].groupby('fertilityclass')
result2 = grouped.sum()
result2 = result2.rename(columns={'area': 'Area_UnProtected10'})

condition = (stand_age.Zonation_mean > stand_age.Zonation_mean.quantile(0.7)) & (stand_age.protection_class == 1)
grouped = stand_age[condition][COI_continuous_sum].groupby('fertilityclass')
result3 = grouped.sum()
result3 = result3.rename(columns={'area': 'Area_Protected30'})

condition = (stand_age.Zonation_mean > stand_age.Zonation_mean.quantile(0.7)) & (stand_age.protection_class == 0)
grouped = stand_age[condition][COI_continuous_sum].groupby('fertilityclass')
result4 = grouped.sum()
result4 = result4.rename(columns={'area': 'Area_UnProtected30'})


#combine most occurring categorical values with mean continuous values
merged_df = result.merge(result2, left_on='fertilityclass', right_index=True)
merged_df = merged_df.merge(result3, left_on='fertilityclass', right_index=True)
merged_df = merged_df.merge(result4, left_on='fertilityclass', right_index=True)


# As percentages
percentage_df = merged_df.apply(lambda x: x / x.sum() * 100)
percentage_df = percentage_df.rename(columns={'Area_Protected10': 'AP10_perc', 'Area_UnProtected10': 'AU10_perc', 'Area_Protected30': 'AP30_perc', 'Area_UnProtected30': 'AU30_perc'})

# combne with areas
merged_df2 = merged_df.merge(percentage_df, left_on='fertilityclass', right_index=True)

merged_df2['ProtPerc10'] = merged_df2.apply(lambda row: (row['Area_Protected10'] / (row['Area_UnProtected10']+row['Area_Protected10'])) * 100, axis=1)
merged_df2['ProtPerc30'] = merged_df2.apply(lambda row: (row['Area_Protected30'] / (row['Area_UnProtected30']+row['Area_Protected30'])) * 100, axis=1)
order = ['Area_Protected10', 'Area_UnProtected10', 'ProtPerc10','Area_Protected30','Area_UnProtected30','ProtPerc30']

Suojeltu10 = merged_df2.Area_Protected10.sum()/merged_df2.Area_UnProtected10.sum()*100
Suojeltu30 = merged_df2.Area_Protected30.sum()/merged_df2.Area_UnProtected30.sum()*100

print(f"Zonation raja-arvo top 10%:lle on {stand_age.Zonation_mean.quantile(0.9)}")
print(f"Zonation raja-arvo top 30%:lle on {stand_age.Zonation_mean.quantile(0.7)}")
print(f"Suojellun osuus parhaasta 10 %:sta on {Suojeltu10} %")
print(f"Suojellun osuus parhaasta 30 %:sta on {Suojeltu30} %")

merged_df2[order]

stand 421151
treestand 821547
treestandsummary 835517
specialfeature 835517
number of empty ages 6421
421151
groupingvariable (standid) is unique True
number of Nulls
maingroup                0
subgroup                 1
fertilityclass           1
soiltype                 1
drainagestate           44
ditchingyear        410554
thinningyear        421151
developmentclass     31245
standquality        421151
maintreespecies       6097
area                     0
areadecrease             0
creationtime             0
updatetime               0
Zonation_mean           29
Zonation_median         29
Zonation_min            29
Zonation_max            29
meanage               6421
protection_class         0
age_class                0
dtype: int64
Zonation raja-arvo top 10%:lle on 0.9577555265008583
Zonation raja-arvo top 30%:lle on 0.8983575892542326
Suojellun osuus parhaasta 10 %:sta on 36.59430677916196 %
Suojellun osuus parhaasta 30 %:sta on 22.34258186443472 %


Unnamed: 0_level_0,Area_Protected10,Area_UnProtected10,ProtPerc10,Area_Protected30,Area_UnProtected30,ProtPerc30
fertilityclass,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1.0,621.949,1175.942,34.593254,757.936,1627.045,31.77954
2.0,4543.895,13539.788,25.127044,8152.331,38356.373,17.528614
3.0,6105.827,14339.486,29.864189,13070.007,55636.036,19.023082
4.0,960.612,3706.445,20.582821,2942.31,17193.372,14.612418
5.0,604.814,2359.566,20.402715,1389.455,6829.886,16.904701
6.0,248.602,776.392,24.253996,593.382,2154.088,21.597397
7.0,734.624,1868.701,28.218682,1637.537,5954.586,21.568894


## General

In [2]:
#the most occurring (mode) value within categorical variables, COI is ColumnsOfInterest
COI_categorical = [ 'maingroup', 'subgroup', 'fertilityclass','soiltype', 'developmentclass', 'maintreespecies']
grouped = stand_age[COI_categorical].groupby('fertilityclass')
result = grouped.apply(lambda x: x.mode().iloc[0])
result = result.drop(columns='fertilityclass')

#mean values for continuous variable
COI_continuous = ['fertilityclass','area','Zonation_mean', 'Zonation_median','Zonation_min', 'Zonation_max','meanage', 'protection_class']
grouped = stand_age[COI_continuous].groupby('fertilityclass')
result2 = grouped.mean().assign(size=grouped.size())

COI_continuous_sum = ['fertilityclass','area']
grouped = stand_age[COI_continuous_sum].groupby('fertilityclass')
result3 = grouped.sum()
result3 = result3.rename(columns={'area': 'area_sum'})


#combine most occurring categorical values with mean continuous values
merged_df = result.merge(result2, left_on='fertilityclass', right_index=True)
merged_df = merged_df.merge(result3, left_on='fertilityclass', right_index=True)

merged_df


Unnamed: 0_level_0,maingroup,subgroup,soiltype,developmentclass,maintreespecies,area,Zonation_mean,Zonation_median,Zonation_min,Zonation_max,meanage,protection_class,size,area_sum
fertilityclass,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1.0,1,1.0,20.0,3.0,2.0,0.795596,0.911854,0.900321,0.763318,0.957586,62.280314,0.247998,4121,3278.65
2.0,1,1.0,20.0,3.0,2.0,1.111601,0.793638,0.785294,0.62349,0.889375,49.605609,0.122348,107905,119947.313
3.0,1,1.0,10.0,3.0,2.0,1.230421,0.753411,0.750418,0.618981,0.857389,55.547827,0.118834,202123,248696.37
4.0,1,1.0,10.0,3.0,1.0,1.380873,0.746067,0.742401,0.640823,0.840775,58.451587,0.108414,61846,85401.459
5.0,1,1.0,50.0,3.0,1.0,1.329877,0.810248,0.802833,0.704925,0.881627,66.523487,0.163116,16994,22599.933
6.0,1,1.0,50.0,3.0,1.0,1.123608,0.822142,0.815256,0.70308,0.896315,65.888926,0.187637,6390,7179.853
7.0,2,1.0,50.0,,1.0,0.984417,0.794278,0.787593,0.683067,0.872066,83.038338,0.160134,21763,21423.876
8.0,2,1.0,50.0,,1.0,0.460375,0.777842,0.7466,0.611158,0.860618,73.285714,0.0,8,3.683


In [3]:
#the most occurring (mode) value within categorical variables
COI_categorical = ['age_class', 'maingroup', 'subgroup', 'fertilityclass','soiltype', 'developmentclass', 'maintreespecies']
grouped = stand_age[COI_categorical].groupby('age_class')
result = grouped.apply(lambda x: x.mode().iloc[0])
result = result.drop(columns='age_class')

#mean values for continuous variable
COI_continuous = ['age_class','area','Zonation_mean', 'Zonation_median','Zonation_min', 'Zonation_max','meanage', 'protection_class']
grouped = stand_age[COI_continuous].groupby('age_class')
result2 = grouped.mean().assign(size=grouped.size())

COI_continuous_sum = ['age_class','area']
grouped = stand_age[COI_continuous_sum].groupby('age_class')
result3 = grouped.sum()
result3 = result3.rename(columns={'area': 'area_sum'})


#combine most occurring categorical values with mean continuous values
merged_df = result.merge(result2, left_on='age_class', right_index=True)
merged_df = merged_df.merge(result3, left_on='age_class', right_index=True)

merged_df

Unnamed: 0_level_0,maingroup,subgroup,fertilityclass,soiltype,developmentclass,maintreespecies,area,Zonation_mean,Zonation_median,Zonation_min,Zonation_max,meanage,protection_class,size,area_sum
age_class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
0,1,1.0,3.0,10.0,A0,2.0,0.92994,0.755666,0.749253,0.621906,0.851922,,0.14328,6421,5971.147
1,1,1.0,3.0,10.0,02,2.0,1.378734,0.709268,0.706526,0.571933,0.829149,22.056465,0.090369,112760,155465.994
2,1,1.0,3.0,10.0,03,2.0,1.148427,0.792856,0.787055,0.655903,0.881024,68.63857,0.136467,299903,344416.735
3,2,1.0,7.0,50.0,04,1.0,1.295882,0.737928,0.735112,0.637322,0.832403,161.990324,0.178036,2067,2678.588


In [4]:
#the most occurring (mode) value within categorical variables
COI_categorical = ['protection_class', 'maingroup', 'subgroup', 'fertilityclass','soiltype', 'developmentclass', 'maintreespecies']
grouped = stand_age[COI_categorical].groupby('protection_class')
result = grouped.apply(lambda x: x.mode().iloc[0])
result = result.drop(columns='protection_class')

#mean values for continuous variable
COI_continuous = ['protection_class','area','Zonation_mean', 'Zonation_median','Zonation_min', 'Zonation_max','meanage']
grouped = stand_age[COI_continuous].groupby('protection_class')
result2 = grouped.mean().assign(size=grouped.size())

COI_continuous_sum = ['protection_class','area']
grouped = stand_age[COI_continuous_sum].groupby('protection_class')
result3 = grouped.sum()
result3 = result3.rename(columns={'area': 'area_sum'})


#combine most occurring categorical values with mean continuous values
merged_df = result.merge(result2, left_on='protection_class', right_index=True)
merged_df = merged_df.merge(result3, left_on='protection_class', right_index=True)

merged_df

Unnamed: 0_level_0,maingroup,subgroup,fertilityclass,soiltype,developmentclass,maintreespecies,area,Zonation_mean,Zonation_median,Zonation_min,Zonation_max,meanage,size,area_sum
protection_class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
0,1,1.0,3.0,10.0,3,2.0,1.18703,0.765583,0.760387,0.628852,0.863217,55.530832,368746,437712.418
1,1,1.0,3.0,10.0,3,2.0,1.351399,0.798174,0.794743,0.660664,0.889216,62.844265,52405,70820.046


## saving

In [None]:
# Not protected
out_file = r"YourPath\Uusimaa_Top10_Zonation1_NotProtected.parquet"
condition = (stand_age.Zonation_mean > stand_age.Zonation_mean.quantile(0.9)) & (stand_age.protection_class == 0)
print(f"Top 10% polygon count is {len(stand_age[condition])}")
print(f"Zonation top 10% threshold is {stand_age[condition].Zonation_mean.quantile(0.9)}")
print(f"Zonation top 30% threshold is {stand_age[condition].Zonation_mean.quantile(0.7)}")
stand_age[condition].to_parquet(out_file)
print(f"saving to {out_file}")

out_file = r"YourPath\Uusimaa_Top30_Zonation1_NotProtected.parquet"
condition = (stand_age.Zonation_mean > stand_age.Zonation_mean.quantile(0.7)) & (stand_age.protection_class == 0)
print(f"Top 30% polygon count is {len(stand_age[condition])}")
print(f"Zonation top 10% threshold is {stand_age[condition].Zonation_mean.quantile(0.9)}")
print(f"Zonation top 30% threshold is {stand_age[condition].Zonation_mean.quantile(0.7)}")
stand_age[condition].to_parquet(out_file)
print(f"saving to {out_file}")

# Protected
out_file = r"YourPath\Uusimaa_Top10_Zonation1.parquet"
condition = (stand_age.Zonation_mean > stand_age.Zonation_mean.quantile(0.9))
print(f"Top 10% polygon count is {len(stand_age[condition])}")
print(f"Zonation top 10% threshold is {stand_age[condition].Zonation_mean.quantile(0.9)}")
print(f"Zonation top 30% threshold is {stand_age[condition].Zonation_mean.quantile(0.7)}")
stand_age[condition].to_parquet(out_file)
print(f"saving to {out_file}")

out_file = r"YourPath\Uusimaa_Top30_Zonation1.parquet"
condition = (stand_age.Zonation_mean > stand_age.Zonation_mean.quantile(0.7))
print(f"Top 30% polygon count is {len(stand_age[condition])}")
print(f"Zonation top 10% threshold is {stand_age[condition].Zonation_mean.quantile(0.9)}")
print(f"Zonation top 30% threshold is {stand_age[condition].Zonation_mean.quantile(0.7)}")
stand_age[condition].to_parquet(out_file)
print(f"saving to {out_file}")

# Zonation2

## Protection distribution

In [3]:
import geopandas as gpd
import pandas as pd
import numpy as np
import dask_geopandas as dask_gpd
from scipy.spatial import cKDTree
from rasterstats import zonal_stats
import rasterio
from scipy import stats
import fiona

#to see numbers as non scientific
np.set_printoptions(suppress=True)

filenameZonation = r"YuorPathToFileWithZonationValues"

filename = r"YourPath\MV_Uusimaa.gpkg"

#stand polygons
stand = dask_gpd.read_file(filenameZonation, npartitions=2)
stand = stand.compute()
print("stand",len(stand))
#standids to add age etc
treestand = dask_gpd.read_file(filename, layer='treestand', npartitions=2)
treestand = treestand.compute()
condition = treestand.type.isin([2,1])
treestand = treestand[condition]
print("treestand",len(treestand))
#wanted data to add
treestandsummary = dask_gpd.read_file(filename,layer='treestandsummary', npartitions=2)
treestandsummary = treestandsummary.compute()
print("treestandsummary",len(treestandsummary))
#wanted data to add
specialfeature = dask_gpd.read_file(filename,layer='specialfeature', npartitions=2)
specialfeature = specialfeature.compute()
print("specialfeature",len(treestandsummary))
columns = ['standid', 'featurecode']
featurecode = specialfeature[columns]
featurecode = featurecode.groupby('standid').min()
featurecode.reset_index(inplace=True)
 

#add standid to treestandsummaries and join to stand polygons based on that
joined_layer = treestandsummary.merge(treestand, left_on='treestandid', right_on='treestandid', how='left')
joined_layer = stand.merge(joined_layer, left_on='standid', right_on='standid', how='left')
len(joined_layer)

#select the columns to add+join key
columns = ['standid', 'meanage']
age = joined_layer[columns]

#one polygon includes many rows of subtable. add aggregation method, here its max
#select the larger one of inventointi- and laskenta-tyyppi
max_age = age.groupby('standid').max()
max_age.reset_index(inplace=True)
#print(max_age)

#add the desired column
stand_age = stand.merge(max_age, left_on='standid', right_on='standid', how='left')
print("number of empty ages",stand_age.meanage.isna().sum())
stand_age = stand_age.merge(featurecode, left_on='standid', right_on='standid', how='left')
print(len(stand_age))

#divide into age classes
conditions = [
    (stand_age['meanage'] < 40),
    (stand_age['meanage'] >= 40) & (stand_age['meanage'] <= 140),
    (stand_age['meanage'] > 140)
]
choices = [1,2,3]
stand_age['age_class'] = np.select(conditions, choices, default=0) #0 when nodata


out_file = r"YourPath"
combined = dask_gpd.read_parquet(out_file, npartitions=2)
combined = combined.compute()

joined = gpd.sjoin(stand_age, combined)
# Count the number of points in each polygon. use an unique id column to group
print(f"groupingvariable (standid) is unique {stand_age.standid.is_unique}")
counts = joined.groupby('standid').size()

# Convert the counts Series to a DataFrame
counts_df = counts.reset_index()
counts_df.columns = ['standid', 'count']

# Merge the counts with the original GeoDataFrame
stand_age = pd.merge(stand_age, counts_df, on='standid', how='left')

# Fill NaN values with 0 (assuming no overlap means a count of 0)
stand_age['protected'] = stand_age['count'].fillna(0)

#divide into protected classes
stand_age['protection_class']  = np.where(stand_age['protected']  > 0, 1, 0)


COI = [ 'maingroup', 'subgroup', 'fertilityclass',
       'soiltype', 'drainagestate', 'ditchingyear', 'thinningyear',
       'developmentclass', 'standquality', 'maintreespecies', 'area',
       'areadecrease', 'creationtime', 'updatetime', 'Zonation_mean', 'Zonation_median',
        'Zonation_min', 'Zonation_max',
        'meanage', 'protection_class','age_class']

print("number of Nulls")
print(stand_age[COI].isna().sum())

COI_continuous_sum = ['fertilityclass','area']
condition = (stand_age.Zonation_mean > stand_age.Zonation_mean.quantile(0.9)) & (stand_age.protection_class == 1)
grouped = stand_age[condition][COI_continuous_sum].groupby('fertilityclass')
result = grouped.sum()
result = result.rename(columns={'area': 'Area_Protected10'})

condition = (stand_age.Zonation_mean > stand_age.Zonation_mean.quantile(0.9)) & (stand_age.protection_class == 0)
grouped = stand_age[condition][COI_continuous_sum].groupby('fertilityclass')
result2 = grouped.sum()
result2 = result2.rename(columns={'area': 'Area_UnProtected10'})

condition = (stand_age.Zonation_mean > stand_age.Zonation_mean.quantile(0.7)) & (stand_age.protection_class == 1)
grouped = stand_age[condition][COI_continuous_sum].groupby('fertilityclass')
result3 = grouped.sum()
result3 = result3.rename(columns={'area': 'Area_Protected30'})

condition = (stand_age.Zonation_mean > stand_age.Zonation_mean.quantile(0.7)) & (stand_age.protection_class == 0)
grouped = stand_age[condition][COI_continuous_sum].groupby('fertilityclass')
result4 = grouped.sum()
result4 = result4.rename(columns={'area': 'Area_UnProtected30'})


#combine most occurring categorical values with mean continuous values
merged_df = result.merge(result2, left_on='fertilityclass', right_index=True)
merged_df = merged_df.merge(result3, left_on='fertilityclass', right_index=True)
merged_df = merged_df.merge(result4, left_on='fertilityclass', right_index=True)


# As percentages
percentage_df = merged_df.apply(lambda x: x / x.sum() * 100)
percentage_df = percentage_df.rename(columns={'Area_Protected10': 'AP10_perc', 'Area_UnProtected10': 'AU10_perc', 'Area_Protected30': 'AP30_perc', 'Area_UnProtected30': 'AU30_perc'})

# combne with areas
merged_df2 = merged_df.merge(percentage_df, left_on='fertilityclass', right_index=True)

merged_df2['ProtPerc10'] = merged_df2.apply(lambda row: (row['Area_Protected10'] / (row['Area_UnProtected10']+row['Area_Protected10'])) * 100, axis=1)
merged_df2['ProtPerc30'] = merged_df2.apply(lambda row: (row['Area_Protected30'] / (row['Area_UnProtected30']+row['Area_Protected30'])) * 100, axis=1)
order = ['Area_Protected10', 'Area_UnProtected10', 'ProtPerc10','Area_Protected30','Area_UnProtected30','ProtPerc30']

Suojeltu10 = merged_df2.Area_Protected10.sum()/merged_df2.Area_UnProtected10.sum()*100
Suojeltu30 = merged_df2.Area_Protected30.sum()/merged_df2.Area_UnProtected30.sum()*100

print(f"Zonation raja-arvo top 10%:lle on {stand_age.Zonation_mean.quantile(0.9)}")
print(f"Zonation raja-arvo top 30%:lle on {stand_age.Zonation_mean.quantile(0.7)}")
print(f"Suojellun osuus parhaasta 10 %:sta on {Suojeltu10} %")
print(f"Suojellun osuus parhaasta 30 %:sta on {Suojeltu30} %")

merged_df2[order]

stand 421151
treestand 821547
treestandsummary 835517
specialfeature 835517
number of empty ages 6421
421151
groupingvariable (standid) is unique True
number of Nulls
maingroup                0
subgroup                 1
fertilityclass           1
soiltype                 1
drainagestate           44
ditchingyear        410554
thinningyear        421151
developmentclass     31245
standquality        421151
maintreespecies       6097
area                     0
areadecrease             0
creationtime             0
updatetime               0
Zonation_mean           29
Zonation_median         29
Zonation_min            29
Zonation_max            29
meanage               6421
protection_class         0
age_class                0
dtype: int64
Zonation raja-arvo top 10%:lle on 0.9536609224340822
Zonation raja-arvo top 30%:lle on 0.8712530213026969
Suojellun osuus parhaasta 10 %:sta on 48.180832432905135 %
Suojellun osuus parhaasta 30 %:sta on 28.195699190006028 %


Unnamed: 0_level_0,Area_Protected10,Area_UnProtected10,ProtPerc10,Area_Protected30,Area_UnProtected30,ProtPerc30
fertilityclass,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1.0,635.066,1028.347,38.178492,764.518,1583.321,32.562625
2.0,4545.974,11334.146,28.626824,8091.042,33090.897,19.647064
3.0,7643.739,12269.031,38.386116,15120.604,47965.938,23.968034
4.0,1226.489,3085.923,28.440905,3361.891,14653.394,18.661326
5.0,724.652,2362.563,23.472677,1830.017,6701.137,21.45099
6.0,386.254,900.993,30.006207,869.941,2429.539,26.366003
7.0,1024.684,2615.05,28.152717,2170.87,7809.093,21.752285


## General

In [4]:
#the most occurring (mode) value within categorical variables, COI is ColumnsOfInterest
COI_categorical = [ 'maingroup', 'subgroup', 'fertilityclass','soiltype', 'developmentclass', 'maintreespecies']
grouped = stand_age[COI_categorical].groupby('fertilityclass')
result = grouped.apply(lambda x: x.mode().iloc[0])
result = result.drop(columns='fertilityclass')

#mean values for continuous variable
COI_continuous = ['fertilityclass','area','Zonation_mean', 'Zonation_median','Zonation_min', 'Zonation_max','meanage', 'protection_class']
grouped = stand_age[COI_continuous].groupby('fertilityclass')
result2 = grouped.mean().assign(size=grouped.size())

COI_continuous_sum = ['fertilityclass','area']
grouped = stand_age[COI_continuous_sum].groupby('fertilityclass')
result3 = grouped.sum()
result3 = result3.rename(columns={'area': 'area_sum'})


#combine most occurring categorical values with mean continuous values
merged_df = result.merge(result2, left_on='fertilityclass', right_index=True)
merged_df = merged_df.merge(result3, left_on='fertilityclass', right_index=True)

merged_df


Unnamed: 0_level_0,maingroup,subgroup,soiltype,developmentclass,maintreespecies,area,Zonation_mean,Zonation_median,Zonation_min,Zonation_max,meanage,protection_class,size,area_sum
fertilityclass,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1.0,1,1.0,20.0,3.0,2.0,0.795596,0.878526,0.86626,0.715837,0.938225,62.280314,0.247998,4121,3278.65
2.0,1,1.0,20.0,3.0,2.0,1.111601,0.72609,0.716435,0.546787,0.845572,49.605609,0.122348,107905,119947.313
3.0,1,1.0,10.0,3.0,2.0,1.230421,0.694203,0.690344,0.546805,0.818769,55.547827,0.118834,202123,248696.37
4.0,1,1.0,10.0,3.0,1.0,1.380873,0.69262,0.688555,0.571936,0.804984,58.451587,0.108414,61846,85401.459
5.0,1,1.0,50.0,3.0,1.0,1.329877,0.785246,0.777401,0.666259,0.867405,66.523487,0.163116,16994,22599.933
6.0,1,1.0,50.0,3.0,1.0,1.123608,0.807301,0.800806,0.675558,0.891456,65.888926,0.187637,6390,7179.853
7.0,2,1.0,50.0,,1.0,0.984417,0.787513,0.779345,0.662594,0.870757,83.038338,0.160134,21763,21423.876
8.0,2,1.0,50.0,,1.0,0.460375,0.748771,0.737653,0.628807,0.869965,73.285714,0.0,8,3.683


In [5]:
#the most occurring (mode) value within categorical variables
COI_categorical = ['age_class', 'maingroup', 'subgroup', 'fertilityclass','soiltype', 'developmentclass', 'maintreespecies']
grouped = stand_age[COI_categorical].groupby('age_class')
result = grouped.apply(lambda x: x.mode().iloc[0])
result = result.drop(columns='age_class')

#mean values for continuous variable
COI_continuous = ['age_class','area','Zonation_mean', 'Zonation_median','Zonation_min', 'Zonation_max','meanage', 'protection_class']
grouped = stand_age[COI_continuous].groupby('age_class')
result2 = grouped.mean().assign(size=grouped.size())

COI_continuous_sum = ['age_class','area']
grouped = stand_age[COI_continuous_sum].groupby('age_class')
result3 = grouped.sum()
result3 = result3.rename(columns={'area': 'area_sum'})


#combine most occurring categorical values with mean continuous values
merged_df = result.merge(result2, left_on='age_class', right_index=True)
merged_df = merged_df.merge(result3, left_on='age_class', right_index=True)

merged_df

Unnamed: 0_level_0,maingroup,subgroup,fertilityclass,soiltype,developmentclass,maintreespecies,area,Zonation_mean,Zonation_median,Zonation_min,Zonation_max,meanage,protection_class,size,area_sum
age_class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
0,1,1.0,3.0,10.0,A0,2.0,0.92994,0.699808,0.691877,0.557491,0.811835,,0.14328,6421,5971.147
1,1,1.0,3.0,10.0,02,2.0,1.378734,0.599208,0.595501,0.455353,0.750111,22.056465,0.090369,112760,155465.994
2,1,1.0,3.0,10.0,03,2.0,1.148427,0.757511,0.75085,0.605885,0.861129,68.63857,0.136467,299903,344416.735
3,2,1.0,7.0,50.0,04,1.0,1.295882,0.739436,0.734368,0.618132,0.836917,161.990324,0.178036,2067,2678.588


In [6]:
#the most occurring (mode) value within categorical variables
COI_categorical = ['protection_class', 'maingroup', 'subgroup', 'fertilityclass','soiltype', 'developmentclass', 'maintreespecies']
grouped = stand_age[COI_categorical].groupby('protection_class')
result = grouped.apply(lambda x: x.mode().iloc[0])
result = result.drop(columns='protection_class')

#mean values for continuous variable
COI_continuous = ['protection_class','area','Zonation_mean', 'Zonation_median','Zonation_min', 'Zonation_max','meanage']
grouped = stand_age[COI_continuous].groupby('protection_class')
result2 = grouped.mean().assign(size=grouped.size())

COI_continuous_sum = ['protection_class','area']
grouped = stand_age[COI_continuous_sum].groupby('protection_class')
result3 = grouped.sum()
result3 = result3.rename(columns={'area': 'area_sum'})


#combine most occurring categorical values with mean continuous values
merged_df = result.merge(result2, left_on='protection_class', right_index=True)
merged_df = merged_df.merge(result3, left_on='protection_class', right_index=True)

merged_df

Unnamed: 0_level_0,maingroup,subgroup,fertilityclass,soiltype,developmentclass,maintreespecies,area,Zonation_mean,Zonation_median,Zonation_min,Zonation_max,meanage,size,area_sum
protection_class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
0,1,1.0,3.0,10.0,3,2.0,1.18703,0.704601,0.698365,0.553801,0.823899,55.530832,368746,437712.418
1,1,1.0,3.0,10.0,3,2.0,1.351399,0.78139,0.778008,0.643015,0.877212,62.844265,52405,70820.046


# Zonation 3

## Protection distribution

In [8]:
import geopandas as gpd
import pandas as pd
import numpy as np
import dask_geopandas as dask_gpd
from scipy.spatial import cKDTree
from rasterstats import zonal_stats
import rasterio
from scipy import stats
import fiona

#to see numbers as non scientific
np.set_printoptions(suppress=True)

filenameZonation = r"YuorPathToFileWithZonationValues"

filename = r"YourPath\MV_Uusimaa.gpkg"

#stand polygons
stand = dask_gpd.read_file(filenameZonation, npartitions=2)
stand = stand.compute()
print("stand",len(stand))
#standids to add age etc
treestand = dask_gpd.read_file(filename, layer='treestand', npartitions=2)
treestand = treestand.compute()
condition = treestand.type.isin([2,1])
treestand = treestand[condition]
print("treestand",len(treestand))
#wanted data to add
treestandsummary = dask_gpd.read_file(filename,layer='treestandsummary', npartitions=2)
treestandsummary = treestandsummary.compute()
print("treestandsummary",len(treestandsummary))
#wanted data to add
specialfeature = dask_gpd.read_file(filename,layer='specialfeature', npartitions=2)
specialfeature = specialfeature.compute()
print("specialfeature",len(treestandsummary))
columns = ['standid', 'featurecode']
featurecode = specialfeature[columns]
featurecode = featurecode.groupby('standid').min()
featurecode.reset_index(inplace=True)
 

#add standid to treestandsummaries and join to stand polygons based on that
joined_layer = treestandsummary.merge(treestand, left_on='treestandid', right_on='treestandid', how='left')
joined_layer = stand.merge(joined_layer, left_on='standid', right_on='standid', how='left')
len(joined_layer)

#select the columns to add+join key
columns = ['standid', 'meanage']
age = joined_layer[columns]

#one polygon includes many rows of subtable. add aggregation method, here its max
#select the larger one of inventointi- and laskenta-tyyppi
max_age = age.groupby('standid').max()
max_age.reset_index(inplace=True)
#print(max_age)

#add the desired column
stand_age = stand.merge(max_age, left_on='standid', right_on='standid', how='left')
print("number of empty ages",stand_age.meanage.isna().sum())
stand_age = stand_age.merge(featurecode, left_on='standid', right_on='standid', how='left')
print(len(stand_age))

#divide into age classes
conditions = [
    (stand_age['meanage'] < 40),
    (stand_age['meanage'] >= 40) & (stand_age['meanage'] <= 140),
    (stand_age['meanage'] > 140)
]
choices = [1,2,3]
stand_age['age_class'] = np.select(conditions, choices, default=0) #0 when nodata


out_file = r"YourPath"
combined = dask_gpd.read_parquet(out_file, npartitions=2)
combined = combined.compute()

joined = gpd.sjoin(stand_age, combined)
# Count the number of points in each polygon. use an unique id column to group
print(f"groupingvariable (standid) is unique {stand_age.standid.is_unique}")
counts = joined.groupby('standid').size()

# Convert the counts Series to a DataFrame
counts_df = counts.reset_index()
counts_df.columns = ['standid', 'count']

# Merge the counts with the original GeoDataFrame
stand_age = pd.merge(stand_age, counts_df, on='standid', how='left')

# Fill NaN values with 0 (assuming no overlap means a count of 0)
stand_age['protected'] = stand_age['count'].fillna(0)

#divide into protected classes
stand_age['protection_class']  = np.where(stand_age['protected']  > 0, 1, 0)


COI = [ 'maingroup', 'subgroup', 'fertilityclass',
       'soiltype', 'drainagestate', 'ditchingyear', 'thinningyear',
       'developmentclass', 'standquality', 'maintreespecies', 'area',
       'areadecrease', 'creationtime', 'updatetime', 'Zonation_mean', 'Zonation_median',
        'Zonation_min', 'Zonation_max',
        'meanage', 'protection_class','age_class']

print("number of Nulls")
print(stand_age[COI].isna().sum())

COI_continuous_sum = ['fertilityclass','area']
condition = (stand_age.Zonation_mean > stand_age.Zonation_mean.quantile(0.9)) & (stand_age.protection_class == 1)
grouped = stand_age[condition][COI_continuous_sum].groupby('fertilityclass')
result = grouped.sum()
result = result.rename(columns={'area': 'Area_Protected10'})

condition = (stand_age.Zonation_mean > stand_age.Zonation_mean.quantile(0.9)) & (stand_age.protection_class == 0)
grouped = stand_age[condition][COI_continuous_sum].groupby('fertilityclass')
result2 = grouped.sum()
result2 = result2.rename(columns={'area': 'Area_UnProtected10'})

condition = (stand_age.Zonation_mean > stand_age.Zonation_mean.quantile(0.7)) & (stand_age.protection_class == 1)
grouped = stand_age[condition][COI_continuous_sum].groupby('fertilityclass')
result3 = grouped.sum()
result3 = result3.rename(columns={'area': 'Area_Protected30'})

condition = (stand_age.Zonation_mean > stand_age.Zonation_mean.quantile(0.7)) & (stand_age.protection_class == 0)
grouped = stand_age[condition][COI_continuous_sum].groupby('fertilityclass')
result4 = grouped.sum()
result4 = result4.rename(columns={'area': 'Area_UnProtected30'})


#combine most occurring categorical values with mean continuous values
merged_df = result.merge(result2, left_on='fertilityclass', right_index=True)
merged_df = merged_df.merge(result3, left_on='fertilityclass', right_index=True)
merged_df = merged_df.merge(result4, left_on='fertilityclass', right_index=True)


# As percentages
percentage_df = merged_df.apply(lambda x: x / x.sum() * 100)
percentage_df = percentage_df.rename(columns={'Area_Protected10': 'AP10_perc', 'Area_UnProtected10': 'AU10_perc', 'Area_Protected30': 'AP30_perc', 'Area_UnProtected30': 'AU30_perc'})

# combne with areas
merged_df2 = merged_df.merge(percentage_df, left_on='fertilityclass', right_index=True)

merged_df2['ProtPerc10'] = merged_df2.apply(lambda row: (row['Area_Protected10'] / (row['Area_UnProtected10']+row['Area_Protected10'])) * 100, axis=1)
merged_df2['ProtPerc30'] = merged_df2.apply(lambda row: (row['Area_Protected30'] / (row['Area_UnProtected30']+row['Area_Protected30'])) * 100, axis=1)
order = ['Area_Protected10', 'Area_UnProtected10', 'ProtPerc10','Area_Protected30','Area_UnProtected30','ProtPerc30']

Suojeltu10 = merged_df2.Area_Protected10.sum()/merged_df2.Area_UnProtected10.sum()*100
Suojeltu30 = merged_df2.Area_Protected30.sum()/merged_df2.Area_UnProtected30.sum()*100

print(f"Zonation raja-arvo top 10%:lle on {stand_age.Zonation_mean.quantile(0.9)}")
print(f"Zonation raja-arvo top 30%:lle on {stand_age.Zonation_mean.quantile(0.7)}")
print(f"Suojellun osuus parhaasta 10 %:sta on {Suojeltu10} %")
print(f"Suojellun osuus parhaasta 30 %:sta on {Suojeltu30} %")

merged_df2[order]

stand 421151
treestand 821547
treestandsummary 835517
specialfeature 835517
number of empty ages 6421
421151
groupingvariable (standid) is unique True
number of Nulls
maingroup                0
subgroup                 1
fertilityclass           1
soiltype                 1
drainagestate           44
ditchingyear        410554
thinningyear        421151
developmentclass     31245
standquality        421151
maintreespecies       6097
area                     0
areadecrease             0
creationtime             0
updatetime               0
Zonation_mean           29
Zonation_median         29
Zonation_min            29
Zonation_max            29
meanage               6421
protection_class         0
age_class                0
dtype: int64
Zonation raja-arvo top 10%:lle on 0.9620743811130524
Zonation raja-arvo top 30%:lle on 0.887726285566235
Suojellun osuus parhaasta 10 %:sta on 59.365738236633256 %
Suojellun osuus parhaasta 30 %:sta on 29.973451911608233 %


Unnamed: 0_level_0,Area_Protected10,Area_UnProtected10,ProtPerc10,Area_Protected30,Area_UnProtected30,ProtPerc30
fertilityclass,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1.0,625.593,932.006,40.163932,765.201,1514.115,33.571519
2.0,4755.632,9967.427,32.30057,8157.528,31371.063,20.637032
3.0,9324.965,12325.84,43.06983,16229.413,49290.692,24.770127
4.0,1562.508,3066.796,33.752547,3672.133,15088.006,19.574125
5.0,840.303,2035.618,29.21857,1981.975,6313.415,23.892487
6.0,468.544,794.678,37.091184,945.393,2355.283,28.642405
7.0,1248.94,2590.346,32.530528,2298.505,7668.449,23.061258


## General

In [9]:
#the most occurring (mode) value within categorical variables, COI is ColumnsOfInterest
COI_categorical = [ 'maingroup', 'subgroup', 'fertilityclass','soiltype', 'developmentclass', 'maintreespecies']
grouped = stand_age[COI_categorical].groupby('fertilityclass')
result = grouped.apply(lambda x: x.mode().iloc[0])
result = result.drop(columns='fertilityclass')

#mean values for continuous variable
COI_continuous = ['fertilityclass','area','Zonation_mean', 'Zonation_median','Zonation_min', 'Zonation_max','meanage', 'protection_class']
grouped = stand_age[COI_continuous].groupby('fertilityclass')
result2 = grouped.mean().assign(size=grouped.size())

COI_continuous_sum = ['fertilityclass','area']
grouped = stand_age[COI_continuous_sum].groupby('fertilityclass')
result3 = grouped.sum()
result3 = result3.rename(columns={'area': 'area_sum'})


#combine most occurring categorical values with mean continuous values
merged_df = result.merge(result2, left_on='fertilityclass', right_index=True)
merged_df = merged_df.merge(result3, left_on='fertilityclass', right_index=True)

merged_df

Unnamed: 0_level_0,maingroup,subgroup,soiltype,developmentclass,maintreespecies,area,Zonation_mean,Zonation_median,Zonation_min,Zonation_max,meanage,protection_class,size,area_sum
fertilityclass,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1.0,1,1.0,20.0,3.0,2.0,0.795596,0.8811,0.869889,0.743284,0.934081,62.280314,0.247998,4121,3278.65
2.0,1,1.0,20.0,3.0,2.0,1.111601,0.737115,0.728899,0.57748,0.84328,49.605609,0.122348,107905,119947.313
3.0,1,1.0,10.0,3.0,2.0,1.230421,0.718042,0.71472,0.587603,0.824015,55.547827,0.118834,202123,248696.37
4.0,1,1.0,10.0,3.0,1.0,1.380873,0.716611,0.713406,0.611036,0.811373,58.451587,0.108414,61846,85401.459
5.0,1,1.0,50.0,3.0,1.0,1.329877,0.798083,0.791739,0.697428,0.868195,66.523487,0.163116,16994,22599.933
6.0,1,1.0,50.0,3.0,1.0,1.123608,0.822181,0.817026,0.71021,0.893115,65.888926,0.187637,6390,7179.853
7.0,2,1.0,50.0,,1.0,0.984417,0.804108,0.797719,0.699244,0.874128,83.038338,0.160134,21763,21423.876
8.0,2,1.0,50.0,,1.0,0.460375,0.749606,0.740679,0.649262,0.862712,73.285714,0.0,8,3.683


In [10]:
COI_categorical = ['age_class', 'maingroup', 'subgroup', 'fertilityclass','soiltype', 'developmentclass', 'maintreespecies']
grouped = stand_age[COI_categorical].groupby('age_class')
result = grouped.apply(lambda x: x.mode().iloc[0])
result = result.drop(columns='age_class')

#mean values for continuous variable
COI_continuous = ['age_class','area','Zonation_mean', 'Zonation_median','Zonation_min', 'Zonation_max','meanage', 'protection_class']
grouped = stand_age[COI_continuous].groupby('age_class')
result2 = grouped.mean().assign(size=grouped.size())

COI_continuous_sum = ['age_class','area']
grouped = stand_age[COI_continuous_sum].groupby('age_class')
result3 = grouped.sum()
result3 = result3.rename(columns={'area': 'area_sum'})


#combine most occurring categorical values with mean continuous values
merged_df = result.merge(result2, left_on='age_class', right_index=True)
merged_df = merged_df.merge(result3, left_on='age_class', right_index=True)

merged_df

Unnamed: 0_level_0,maingroup,subgroup,fertilityclass,soiltype,developmentclass,maintreespecies,area,Zonation_mean,Zonation_median,Zonation_min,Zonation_max,meanage,protection_class,size,area_sum
age_class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
0,1,1.0,3.0,10.0,A0,2.0,0.92994,0.717345,0.71108,0.591982,0.814733,,0.14328,6421,5971.147
1,1,1.0,3.0,10.0,02,2.0,1.378734,0.619608,0.617245,0.489076,0.753932,22.056465,0.090369,112760,155465.994
2,1,1.0,3.0,10.0,03,2.0,1.148427,0.776619,0.770723,0.644387,0.863913,68.63857,0.136467,299903,344416.735
3,2,1.0,7.0,50.0,04,1.0,1.295882,0.755381,0.751772,0.650835,0.83889,161.990324,0.178036,2067,2678.588


In [11]:
#the most occurring (mode) value within categorical variables
COI_categorical = ['protection_class', 'maingroup', 'subgroup', 'fertilityclass','soiltype', 'developmentclass', 'maintreespecies']
grouped = stand_age[COI_categorical].groupby('protection_class')
result = grouped.apply(lambda x: x.mode().iloc[0])
result = result.drop(columns='protection_class')

#mean values for continuous variable
COI_continuous = ['protection_class','area','Zonation_mean', 'Zonation_median','Zonation_min', 'Zonation_max','meanage']
grouped = stand_age[COI_continuous].groupby('protection_class')
result2 = grouped.mean().assign(size=grouped.size())

COI_continuous_sum = ['protection_class','area']
grouped = stand_age[COI_continuous_sum].groupby('protection_class')
result3 = grouped.sum()
result3 = result3.rename(columns={'area': 'area_sum'})


#combine most occurring categorical values with mean continuous values
merged_df = result.merge(result2, left_on='protection_class', right_index=True)
merged_df = merged_df.merge(result3, left_on='protection_class', right_index=True)

merged_df

Unnamed: 0_level_0,maingroup,subgroup,fertilityclass,soiltype,developmentclass,maintreespecies,area,Zonation_mean,Zonation_median,Zonation_min,Zonation_max,meanage,size,area_sum
protection_class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
0,1,1.0,3.0,10.0,3,2.0,1.18703,0.723602,0.718403,0.589862,0.826764,55.530832,368746,437712.418
1,1,1.0,3.0,10.0,3,2.0,1.351399,0.803716,0.800559,0.687683,0.881639,62.844265,52405,70820.046


# Zonation 6

## Protection distribution

In [13]:
import geopandas as gpd
import pandas as pd
import numpy as np
import dask_geopandas as dask_gpd
from scipy.spatial import cKDTree
from rasterstats import zonal_stats
import rasterio
from scipy import stats
import fiona

#to see numbers as non scientific
np.set_printoptions(suppress=True)

filenameZonation = r"YuorPathToFileWithZonationValues"

filename = r"YourPath\MV_Uusimaa.gpkg"

#stand polygons
stand = dask_gpd.read_file(filenameZonation, npartitions=2)
stand = stand.compute()
print("stand",len(stand))
#standids to add age etc
treestand = dask_gpd.read_file(filename, layer='treestand', npartitions=2)
treestand = treestand.compute()
condition = treestand.type.isin([2,1])
treestand = treestand[condition]
print("treestand",len(treestand))
#wanted data to add
treestandsummary = dask_gpd.read_file(filename,layer='treestandsummary', npartitions=2)
treestandsummary = treestandsummary.compute()
print("treestandsummary",len(treestandsummary))
#wanted data to add
specialfeature = dask_gpd.read_file(filename,layer='specialfeature', npartitions=2)
specialfeature = specialfeature.compute()
print("specialfeature",len(treestandsummary))
columns = ['standid', 'featurecode']
featurecode = specialfeature[columns]
featurecode = featurecode.groupby('standid').min()
featurecode.reset_index(inplace=True)
 

#add standid to treestandsummaries and join to stand polygons based on that
joined_layer = treestandsummary.merge(treestand, left_on='treestandid', right_on='treestandid', how='left')
joined_layer = stand.merge(joined_layer, left_on='standid', right_on='standid', how='left')
len(joined_layer)

#select the columns to add+join key
columns = ['standid', 'meanage']
age = joined_layer[columns]

#one polygon includes many rows of subtable. add aggregation method, here its max
#select the larger one of inventointi- and laskenta-tyyppi
max_age = age.groupby('standid').max()
max_age.reset_index(inplace=True)
#print(max_age)

#add the desired column
stand_age = stand.merge(max_age, left_on='standid', right_on='standid', how='left')
print("number of empty ages",stand_age.meanage.isna().sum())
stand_age = stand_age.merge(featurecode, left_on='standid', right_on='standid', how='left')
print(len(stand_age))

#divide into age classes
conditions = [
    (stand_age['meanage'] < 40),
    (stand_age['meanage'] >= 40) & (stand_age['meanage'] <= 140),
    (stand_age['meanage'] > 140)
]
choices = [1,2,3]
stand_age['age_class'] = np.select(conditions, choices, default=0) #0 when nodata


out_file = r"YourPath"
combined = dask_gpd.read_parquet(out_file, npartitions=2)
combined = combined.compute()

joined = gpd.sjoin(stand_age, combined)
# Count the number of points in each polygon. use an unique id column to group
print(f"groupingvariable (standid) is unique {stand_age.standid.is_unique}")
counts = joined.groupby('standid').size()

# Convert the counts Series to a DataFrame
counts_df = counts.reset_index()
counts_df.columns = ['standid', 'count']

# Merge the counts with the original GeoDataFrame
stand_age = pd.merge(stand_age, counts_df, on='standid', how='left')

# Fill NaN values with 0 (assuming no overlap means a count of 0)
stand_age['protected'] = stand_age['count'].fillna(0)

#divide into protected classes
stand_age['protection_class']  = np.where(stand_age['protected']  > 0, 1, 0)


COI = [ 'maingroup', 'subgroup', 'fertilityclass',
       'soiltype', 'drainagestate', 'ditchingyear', 'thinningyear',
       'developmentclass', 'standquality', 'maintreespecies', 'area',
       'areadecrease', 'creationtime', 'updatetime', 'Zonation_mean', 'Zonation_median',
        'Zonation_min', 'Zonation_max',
        'meanage', 'protection_class','age_class']

print("number of Nulls")
print(stand_age[COI].isna().sum())

COI_continuous_sum = ['fertilityclass','area']
condition = (stand_age.Zonation_mean > stand_age.Zonation_mean.quantile(0.9)) & (stand_age.protection_class == 1)
grouped = stand_age[condition][COI_continuous_sum].groupby('fertilityclass')
result = grouped.sum()
result = result.rename(columns={'area': 'Area_Protected10'})

condition = (stand_age.Zonation_mean > stand_age.Zonation_mean.quantile(0.9)) & (stand_age.protection_class == 0)
grouped = stand_age[condition][COI_continuous_sum].groupby('fertilityclass')
result2 = grouped.sum()
result2 = result2.rename(columns={'area': 'Area_UnProtected10'})

condition = (stand_age.Zonation_mean > stand_age.Zonation_mean.quantile(0.7)) & (stand_age.protection_class == 1)
grouped = stand_age[condition][COI_continuous_sum].groupby('fertilityclass')
result3 = grouped.sum()
result3 = result3.rename(columns={'area': 'Area_Protected30'})

condition = (stand_age.Zonation_mean > stand_age.Zonation_mean.quantile(0.7)) & (stand_age.protection_class == 0)
grouped = stand_age[condition][COI_continuous_sum].groupby('fertilityclass')
result4 = grouped.sum()
result4 = result4.rename(columns={'area': 'Area_UnProtected30'})


#combine most occurring categorical values with mean continuous values
merged_df = result.merge(result2, left_on='fertilityclass', right_index=True)
merged_df = merged_df.merge(result3, left_on='fertilityclass', right_index=True)
merged_df = merged_df.merge(result4, left_on='fertilityclass', right_index=True)


# As percentages
percentage_df = merged_df.apply(lambda x: x / x.sum() * 100)
percentage_df = percentage_df.rename(columns={'Area_Protected10': 'AP10_perc', 'Area_UnProtected10': 'AU10_perc', 'Area_Protected30': 'AP30_perc', 'Area_UnProtected30': 'AU30_perc'})

# combne with areas
merged_df2 = merged_df.merge(percentage_df, left_on='fertilityclass', right_index=True)

merged_df2['ProtPerc10'] = merged_df2.apply(lambda row: (row['Area_Protected10'] / (row['Area_UnProtected10']+row['Area_Protected10'])) * 100, axis=1)
merged_df2['ProtPerc30'] = merged_df2.apply(lambda row: (row['Area_Protected30'] / (row['Area_UnProtected30']+row['Area_Protected30'])) * 100, axis=1)
order = ['Area_Protected10', 'Area_UnProtected10', 'ProtPerc10','Area_Protected30','Area_UnProtected30','ProtPerc30']

Suojeltu10 = merged_df2.Area_Protected10.sum()/merged_df2.Area_UnProtected10.sum()*100
Suojeltu30 = merged_df2.Area_Protected30.sum()/merged_df2.Area_UnProtected30.sum()*100

print(f"Zonation raja-arvo top 10%:lle on {stand_age.Zonation_mean.quantile(0.9)}")
print(f"Zonation raja-arvo top 30%:lle on {stand_age.Zonation_mean.quantile(0.7)}")
print(f"Suojellun osuus parhaasta 10 %:sta on {Suojeltu10} %")
print(f"Suojellun osuus parhaasta 30 %:sta on {Suojeltu30} %")

merged_df2[order]

stand 421151
treestand 821547
treestandsummary 835517
specialfeature 835517
number of empty ages 6421
421151
groupingvariable (standid) is unique True
number of Nulls
maingroup                0
subgroup                 1
fertilityclass           1
soiltype                 1
drainagestate           44
ditchingyear        410554
thinningyear        421151
developmentclass     31245
standquality        421151
maintreespecies       6097
area                     0
areadecrease             0
creationtime             0
updatetime               0
Zonation_mean           28
Zonation_median         28
Zonation_min            28
Zonation_max            28
meanage               6421
protection_class         0
age_class                0
dtype: int64
Zonation raja-arvo top 10%:lle on 0.9663586782223552
Zonation raja-arvo top 30%:lle on 0.8976882100105285
Suojellun osuus parhaasta 10 %:sta on 74.77823552981471 %
Suojellun osuus parhaasta 30 %:sta on 33.374616055985726 %


Unnamed: 0_level_0,Area_Protected10,Area_UnProtected10,ProtPerc10,Area_Protected30,Area_UnProtected30,ProtPerc30
fertilityclass,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1.0,607.049,702.003,46.373177,748.689,1344.643,35.765421
2.0,4851.087,7637.34,38.84466,8280.203,27857.127,22.913157
3.0,10303.465,11165.791,47.991719,17635.863,48505.596,26.663855
4.0,2111.297,3505.799,37.586984,4015.656,14987.121,21.131943
5.0,1042.565,1666.947,38.477962,2147.395,5975.111,26.437592
6.0,527.528,579.046,47.672185,969.589,2243.13,30.179701
7.0,1397.532,2612.845,34.847896,2430.97,7637.929,24.143355


## General

In [15]:
#the most occurring (mode) value within categorical variables, COI is ColumnsOfInterest
COI_categorical = [ 'maingroup', 'subgroup', 'fertilityclass','soiltype', 'developmentclass', 'maintreespecies']
grouped = stand_age[COI_categorical].groupby('fertilityclass')
result = grouped.apply(lambda x: x.mode().iloc[0])
result = result.drop(columns='fertilityclass')

#mean values for continuous variable
COI_continuous = ['fertilityclass','area','Zonation_mean', 'Zonation_median','Zonation_min', 'Zonation_max','meanage', 'protection_class']
grouped = stand_age[COI_continuous].groupby('fertilityclass')
result2 = grouped.mean().assign(size=grouped.size())

COI_continuous_sum = ['fertilityclass','area']
grouped = stand_age[COI_continuous_sum].groupby('fertilityclass')
result3 = grouped.sum()
result3 = result3.rename(columns={'area': 'area_sum'})


#combine most occurring categorical values with mean continuous values
merged_df = result.merge(result2, left_on='fertilityclass', right_index=True)
merged_df = merged_df.merge(result3, left_on='fertilityclass', right_index=True)

merged_df

Unnamed: 0_level_0,maingroup,subgroup,soiltype,developmentclass,maintreespecies,area,Zonation_mean,Zonation_median,Zonation_min,Zonation_max,meanage,protection_class,size,area_sum
fertilityclass,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1.0,1,1.0,20.0,3.0,2.0,0.795596,0.873907,0.866675,0.783262,0.915903,62.280314,0.247998,4121,3278.65
2.0,1,1.0,20.0,3.0,2.0,1.111601,0.743985,0.738257,0.624352,0.824551,49.605609,0.122348,107905,119947.313
3.0,1,1.0,10.0,3.0,2.0,1.230421,0.740785,0.738259,0.644406,0.81578,55.547827,0.118834,202123,248696.37
4.0,1,1.0,10.0,3.0,1.0,1.380873,0.741838,0.739678,0.665437,0.807646,58.451587,0.108414,61846,85401.459
5.0,1,1.0,50.0,3.0,1.0,1.329877,0.812601,0.808344,0.743347,0.861722,66.523487,0.163116,16994,22599.933
6.0,1,1.0,50.0,3.0,1.0,1.123608,0.838885,0.835179,0.761172,0.887924,65.888926,0.187637,6390,7179.853
7.0,2,1.0,50.0,,1.0,0.984417,0.830406,0.826622,0.760036,0.877305,83.038338,0.160134,21763,21423.876
8.0,2,1.0,50.0,,1.0,0.460375,0.700219,0.690084,0.639745,0.798544,73.285714,0.0,8,3.683


In [16]:
COI_categorical = ['age_class', 'maingroup', 'subgroup', 'fertilityclass','soiltype', 'developmentclass', 'maintreespecies']
grouped = stand_age[COI_categorical].groupby('age_class')
result = grouped.apply(lambda x: x.mode().iloc[0])
result = result.drop(columns='age_class')

#mean values for continuous variable
COI_continuous = ['age_class','area','Zonation_mean', 'Zonation_median','Zonation_min', 'Zonation_max','meanage', 'protection_class']
grouped = stand_age[COI_continuous].groupby('age_class')
result2 = grouped.mean().assign(size=grouped.size())

COI_continuous_sum = ['age_class','area']
grouped = stand_age[COI_continuous_sum].groupby('age_class')
result3 = grouped.sum()
result3 = result3.rename(columns={'area': 'area_sum'})


#combine most occurring categorical values with mean continuous values
merged_df = result.merge(result2, left_on='age_class', right_index=True)
merged_df = merged_df.merge(result3, left_on='age_class', right_index=True)

merged_df

Unnamed: 0_level_0,maingroup,subgroup,fertilityclass,soiltype,developmentclass,maintreespecies,area,Zonation_mean,Zonation_median,Zonation_min,Zonation_max,meanage,protection_class,size,area_sum
age_class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
0,1,1.0,3.0,10.0,A0,2.0,0.92994,0.755294,0.751765,0.664923,0.823189,,0.14328,6421,5971.147
1,1,1.0,3.0,10.0,02,2.0,1.378734,0.649827,0.649198,0.549829,0.749545,22.056465,0.090369,112760,155465.994
2,1,1.0,3.0,10.0,03,2.0,1.148427,0.790112,0.785534,0.694434,0.851709,68.63857,0.136467,299903,344416.735
3,2,1.0,7.0,50.0,04,1.0,1.295882,0.801991,0.799969,0.732402,0.855634,161.990324,0.178036,2067,2678.588


In [17]:
#the most occurring (mode) value within categorical variables
COI_categorical = ['protection_class', 'maingroup', 'subgroup', 'fertilityclass','soiltype', 'developmentclass', 'maintreespecies']
grouped = stand_age[COI_categorical].groupby('protection_class')
result = grouped.apply(lambda x: x.mode().iloc[0])
result = result.drop(columns='protection_class')

#mean values for continuous variable
COI_continuous = ['protection_class','area','Zonation_mean', 'Zonation_median','Zonation_min', 'Zonation_max','meanage']
grouped = stand_age[COI_continuous].groupby('protection_class')
result2 = grouped.mean().assign(size=grouped.size())

COI_continuous_sum = ['protection_class','area']
grouped = stand_age[COI_continuous_sum].groupby('protection_class')
result3 = grouped.sum()
result3 = result3.rename(columns={'area': 'area_sum'})


#combine most occurring categorical values with mean continuous values
merged_df = result.merge(result2, left_on='protection_class', right_index=True)
merged_df = merged_df.merge(result3, left_on='protection_class', right_index=True)

merged_df

Unnamed: 0_level_0,maingroup,subgroup,fertilityclass,soiltype,developmentclass,maintreespecies,area,Zonation_mean,Zonation_median,Zonation_min,Zonation_max,meanage,size,area_sum
protection_class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
0,1,1.0,3.0,10.0,3,2.0,1.18703,0.742115,0.738508,0.643222,0.816636,55.530832,368746,437712.418
1,1,1.0,3.0,10.0,3,2.0,1.351399,0.822177,0.819496,0.741511,0.875327,62.844265,52405,70820.046
