In [1]:
import pandas as pd

In [2]:
# Read feature groups
feature_groups = pd.read_csv("../source_data/feature_groups.csv")

In [3]:
feature_groups

Unnamed: 0,feature,group
0,arable_prop,arable
1,arable_prop_buff_100,arable
2,arable_prop_buff_1000,arable
3,arable_prop_buff_500,arable
4,area,area
...,...,...
77,twi_mean,twi
78,twi_std,twi
79,urban_prop,urban
80,water_prop,water


In [4]:
# Add column for workshop ML input selection
feature_groups["workshop_ml_input"] = 0

In [5]:
feature_groups.head()

Unnamed: 0,feature,group,workshop_ml_input
0,arable_prop,arable,0
1,arable_prop_buff_100,arable,0
2,arable_prop_buff_1000,arable,0
3,arable_prop_buff_500,arable,0
4,area,area,0


In [6]:
# Read SHAP values of models and merge them with feature groups
for i in range(1, 5):
    model = f"tn_model_v{i}"
    df = pd.read_csv(f"../source_data/{model}_shap.csv")
    df = df.rename({"abs_mean_shap": f"{model}_abs_mean_shap"}, axis=1)
    feature_groups = feature_groups.merge(df, how="left", on="feature")

In [7]:
feature_groups

Unnamed: 0,feature,group,workshop_ml_input,tn_model_v1_abs_mean_shap,tn_model_v2_abs_mean_shap,tn_model_v3_abs_mean_shap,tn_model_v4_abs_mean_shap
0,arable_prop,arable,0,0.183095,0.240194,0.249932,0.280003
1,arable_prop_buff_100,arable,0,,,,
2,arable_prop_buff_1000,arable,0,,,,
3,arable_prop_buff_500,arable,0,,,,
4,area,area,0,0.015212,0.013442,0.017646,0.030337
...,...,...,...,...,...,...,...
77,twi_mean,twi,0,0.112151,0.058157,0.106335,0.084019
78,twi_std,twi,0,,,,
79,urban_prop,urban,0,0.066393,0.038074,0.061920,0.040733
80,water_prop,water,0,0.096020,0.117541,0.063534,0.071921


# Create Excel file for feature selection

In [8]:
# Add sheet with features
writer = pd.ExcelWriter("workshop_feature_selection.xlsx", engine="xlsxwriter")
feature_groups.to_excel(writer, sheet_name="workshop_feature_selection", index=False)

# Add dropdown values for feature selection
n_rows = feature_groups.shape[0]
workbook = writer.book
worksheet = writer.sheets["workshop_feature_selection"]
worksheet.data_validation("C2:C" + str(1 + n_rows), {"validate": "list", "source": [0, 1]})

# Autofit column width and close
worksheet.autofit()
workbook.close()