In [1]:
import pandas as pd
import numpy as np
import rioxarray
import json, os

from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2, f_classif, mutual_info_classif
from sklearn.metrics import f1_score, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier, StackingClassifier

from imblearn.over_sampling import RandomOverSampler, SMOTE

In [2]:
seed = 42
verbose = False

In [3]:
INVASIVE_BIRDS_PATH = 'Datasets/Machine Learning/1km Rasters/Birds'
# Use this if using coordinates as separate columns
# df_1km = pd.read_csv('Datasets/Machine Learning/Dataframes/1km_All_Birds_DF.csv')

# Use this if using coordinates as indices
df_1km = pd.read_csv('Datasets/Machine Learning/Dataframes/1km_All_Birds_DF.csv', index_col=[0,1])

total_birds = (df_1km['Occurrence']==1).sum()
df_dicts = []

for file in os.listdir(INVASIVE_BIRDS_PATH):
    filename = os.fsdecode(file)
    if not filename.endswith('.tif') or filename.endswith('All_Invasive_Birds_1km.tif') :
         continue



    bird_name = filename[:-4].replace('_', ' ')

    bird_dataset = rioxarray.open_rasterio(f'{INVASIVE_BIRDS_PATH}/{file}')
    bird_dataset.name = 'data'
    bird_df = bird_dataset.squeeze().drop("spatial_ref").drop("band").to_dataframe()

    # Check if index matches
    if not df_1km.index.equals(bird_df.index):
        print('Warning: Index does not match')
        continue

    bird_df['Occurrence'] = [0 if x == -1 else 1 for x in bird_df['data']]
    bird_df = df_1km.drop(columns='Occurrence').join(bird_df.drop(columns='data'))
    
    bird_dict = {'name' : bird_name, 'dataframe' : bird_df }
    df_dicts.append(bird_dict)
    display(bird_df.sample(5))


Unnamed: 0_level_0,Unnamed: 1_level_0,Deciduous woodland,Coniferous woodland,Arable,Improve grassland,Neutral grassland,Calcareous grassland,Acid grassland,Fen,Heather,Heather grassland,...,Glyphosate,Mancozeb,Mecoprop-P,Metamitron,Pendimethalin,PropamocarbHydrochloride,Prosulfocarb,Sulphur,Tri-allate,Occurrence
y,x,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
653500.0,144500.0,0,0,0,0,0,0,0,0,0,100,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
294500.0,186500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
1267500.0,262500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
989500.0,435500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
978500.0,299500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0


Unnamed: 0_level_0,Unnamed: 1_level_0,Deciduous woodland,Coniferous woodland,Arable,Improve grassland,Neutral grassland,Calcareous grassland,Acid grassland,Fen,Heather,Heather grassland,...,Glyphosate,Mancozeb,Mecoprop-P,Metamitron,Pendimethalin,PropamocarbHydrochloride,Prosulfocarb,Sulphur,Tri-allate,Occurrence
y,x,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
347500.0,462500.0,2,0,39,36,0,0,0,0,0,0,...,8.818063,0.2670333,2.814865,-3.4000000000000003e+38,5.133586,-3.4000000000000003e+38,5.596289,2.809593,2.056187,0
246500.0,428500.0,1,0,38,52,0,0,0,0,0,0,...,8.034465,0.3647437,1.646565,0.2811041,5.809071,0.1311784,2.640456,5.60728,1.555872,0
1280500.0,501500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
1118500.0,394500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
973500.0,289500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0


Unnamed: 0_level_0,Unnamed: 1_level_0,Deciduous woodland,Coniferous woodland,Arable,Improve grassland,Neutral grassland,Calcareous grassland,Acid grassland,Fen,Heather,Heather grassland,...,Glyphosate,Mancozeb,Mecoprop-P,Metamitron,Pendimethalin,PropamocarbHydrochloride,Prosulfocarb,Sulphur,Tri-allate,Occurrence
y,x,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
912500.0,585500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
332500.0,625500.0,3,0,71,12,0,0,0,0,0,0,...,23.2925,9.086796,4.139837,16.03763,17.99913,4.549512,7.225822,7.373776,8.77857,0
550500.0,56500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
613500.0,655500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
1176500.0,62500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0


Unnamed: 0_level_0,Unnamed: 1_level_0,Deciduous woodland,Coniferous woodland,Arable,Improve grassland,Neutral grassland,Calcareous grassland,Acid grassland,Fen,Heather,Heather grassland,...,Glyphosate,Mancozeb,Mecoprop-P,Metamitron,Pendimethalin,PropamocarbHydrochloride,Prosulfocarb,Sulphur,Tri-allate,Occurrence
y,x,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
831500.0,133500.0,0,0,0,4,0,0,11,0,0,84,...,0.03611739,0.004510448,0.01928792,-3.4000000000000003e+38,0.01154389,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
820500.0,538500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
218500.0,295500.0,0,0,0,0,0,0,100,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
234500.0,517500.0,1,0,80,14,0,0,0,0,0,0,...,48.44122,15.31423,5.449437,4.64266,12.60467,5.594527,17.56232,8.214181,16.09901,0
202500.0,453500.0,3,0,0,3,8,0,0,0,0,0,...,0.2900374,0.04823495,0.08497092,0.02667059,0.1045926,0.0152072,0.0390262,0.007261924,0.03543094,0


Unnamed: 0_level_0,Unnamed: 1_level_0,Deciduous woodland,Coniferous woodland,Arable,Improve grassland,Neutral grassland,Calcareous grassland,Acid grassland,Fen,Heather,Heather grassland,...,Glyphosate,Mancozeb,Mecoprop-P,Metamitron,Pendimethalin,PropamocarbHydrochloride,Prosulfocarb,Sulphur,Tri-allate,Occurrence
y,x,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
100500.0,593500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
1086500.0,575500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
1073500.0,213500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
75500.0,475500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
273500.0,559500.0,0,0,22,14,0,0,0,1,0,0,...,9.55534,2.975117,1.655059,3.988291,7.107273,1.076231,4.031778,2.242249,4.193403,0


Unnamed: 0_level_0,Unnamed: 1_level_0,Deciduous woodland,Coniferous woodland,Arable,Improve grassland,Neutral grassland,Calcareous grassland,Acid grassland,Fen,Heather,Heather grassland,...,Glyphosate,Mancozeb,Mecoprop-P,Metamitron,Pendimethalin,PropamocarbHydrochloride,Prosulfocarb,Sulphur,Tri-allate,Occurrence
y,x,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
101500.0,147500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
1210500.0,697500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
785500.0,476500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
1290500.0,330500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
162500.0,155500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0


Unnamed: 0_level_0,Unnamed: 1_level_0,Deciduous woodland,Coniferous woodland,Arable,Improve grassland,Neutral grassland,Calcareous grassland,Acid grassland,Fen,Heather,Heather grassland,...,Glyphosate,Mancozeb,Mecoprop-P,Metamitron,Pendimethalin,PropamocarbHydrochloride,Prosulfocarb,Sulphur,Tri-allate,Occurrence
y,x,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1267500.0,530500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
1053500.0,600500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
1009500.0,667500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
479500.0,149500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
789500.0,348500.0,0,0,0,0,0,0,0,0,100,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0


Unnamed: 0_level_0,Unnamed: 1_level_0,Deciduous woodland,Coniferous woodland,Arable,Improve grassland,Neutral grassland,Calcareous grassland,Acid grassland,Fen,Heather,Heather grassland,...,Glyphosate,Mancozeb,Mecoprop-P,Metamitron,Pendimethalin,PropamocarbHydrochloride,Prosulfocarb,Sulphur,Tri-allate,Occurrence
y,x,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
356500.0,622500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
141500.0,382500.0,3,5,28,35,0,24,0,0,0,0,...,7.464071,1.569011,2.048008,1.62284,5.361934,0.8524584,4.508813,0.5514222,3.80446,0
1067500.0,353500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
993500.0,528500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
958500.0,38500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0


Unnamed: 0_level_0,Unnamed: 1_level_0,Deciduous woodland,Coniferous woodland,Arable,Improve grassland,Neutral grassland,Calcareous grassland,Acid grassland,Fen,Heather,Heather grassland,...,Glyphosate,Mancozeb,Mecoprop-P,Metamitron,Pendimethalin,PropamocarbHydrochloride,Prosulfocarb,Sulphur,Tri-allate,Occurrence
y,x,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
569500.0,618500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
1165500.0,468500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
939500.0,217500.0,3,0,0,0,0,0,0,0,1,88,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
1259500.0,697500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
1156500.0,276500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0


Unnamed: 0_level_0,Unnamed: 1_level_0,Deciduous woodland,Coniferous woodland,Arable,Improve grassland,Neutral grassland,Calcareous grassland,Acid grassland,Fen,Heather,Heather grassland,...,Glyphosate,Mancozeb,Mecoprop-P,Metamitron,Pendimethalin,PropamocarbHydrochloride,Prosulfocarb,Sulphur,Tri-allate,Occurrence
y,x,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
964500.0,352500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
1096500.0,279500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
69500.0,228500.0,20,0,0,49,0,0,9,0,0,0,...,0.201327,0.03093979,0.2140653,-3.4000000000000003e+38,0.09861873,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
697500.0,5500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
221500.0,1500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0


Unnamed: 0_level_0,Unnamed: 1_level_0,Deciduous woodland,Coniferous woodland,Arable,Improve grassland,Neutral grassland,Calcareous grassland,Acid grassland,Fen,Heather,Heather grassland,...,Glyphosate,Mancozeb,Mecoprop-P,Metamitron,Pendimethalin,PropamocarbHydrochloride,Prosulfocarb,Sulphur,Tri-allate,Occurrence
y,x,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
876500.0,39500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
556500.0,278500.0,20,7,0,69,0,0,0,0,0,2,...,0.5160189,0.07756853,0.3353682,-3.4000000000000003e+38,0.2161183,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
1129500.0,440500.0,0,0,0,0,0,0,0,0,0,78,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
643500.0,118500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
1060500.0,51500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0


Unnamed: 0_level_0,Unnamed: 1_level_0,Deciduous woodland,Coniferous woodland,Arable,Improve grassland,Neutral grassland,Calcareous grassland,Acid grassland,Fen,Heather,Heather grassland,...,Glyphosate,Mancozeb,Mecoprop-P,Metamitron,Pendimethalin,PropamocarbHydrochloride,Prosulfocarb,Sulphur,Tri-allate,Occurrence
y,x,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
831500.0,582500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
140500.0,200500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
15500.0,509500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
414500.0,87500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
403500.0,485500.0,0,0,91,0,9,0,0,0,0,0,...,28.0718,11.21038,6.162427,18.25872,14.60866,6.654371,17.10833,15.80072,9.995781,0


Unnamed: 0_level_0,Unnamed: 1_level_0,Deciduous woodland,Coniferous woodland,Arable,Improve grassland,Neutral grassland,Calcareous grassland,Acid grassland,Fen,Heather,Heather grassland,...,Glyphosate,Mancozeb,Mecoprop-P,Metamitron,Pendimethalin,PropamocarbHydrochloride,Prosulfocarb,Sulphur,Tri-allate,Occurrence
y,x,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
62500.0,382500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
654500.0,485500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
907500.0,500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
940500.0,546500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
1161500.0,444500.0,0,0,0,0,0,0,0,0,23,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0


Unnamed: 0_level_0,Unnamed: 1_level_0,Deciduous woodland,Coniferous woodland,Arable,Improve grassland,Neutral grassland,Calcareous grassland,Acid grassland,Fen,Heather,Heather grassland,...,Glyphosate,Mancozeb,Mecoprop-P,Metamitron,Pendimethalin,PropamocarbHydrochloride,Prosulfocarb,Sulphur,Tri-allate,Occurrence
y,x,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1049500.0,427500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
1149500.0,101500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
1063500.0,153500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
424500.0,346500.0,1,0,69,24,0,0,0,0,0,0,...,0.9001539,0.0930474,0.4441246,-3.4000000000000003e+38,0.9713068,-3.4000000000000003e+38,0.2311267,0.07226533,0.1353259,0
366500.0,62500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0


Unnamed: 0_level_0,Unnamed: 1_level_0,Deciduous woodland,Coniferous woodland,Arable,Improve grassland,Neutral grassland,Calcareous grassland,Acid grassland,Fen,Heather,Heather grassland,...,Glyphosate,Mancozeb,Mecoprop-P,Metamitron,Pendimethalin,PropamocarbHydrochloride,Prosulfocarb,Sulphur,Tri-allate,Occurrence
y,x,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1212500.0,116500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
181500.0,11500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
371500.0,203500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
225500.0,41500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
775500.0,6500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0


Unnamed: 0_level_0,Unnamed: 1_level_0,Deciduous woodland,Coniferous woodland,Arable,Improve grassland,Neutral grassland,Calcareous grassland,Acid grassland,Fen,Heather,Heather grassland,...,Glyphosate,Mancozeb,Mecoprop-P,Metamitron,Pendimethalin,PropamocarbHydrochloride,Prosulfocarb,Sulphur,Tri-allate,Occurrence
y,x,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1213500.0,564500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
815500.0,28500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
742500.0,406500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
488500.0,259500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
874500.0,660500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0


Unnamed: 0_level_0,Unnamed: 1_level_0,Deciduous woodland,Coniferous woodland,Arable,Improve grassland,Neutral grassland,Calcareous grassland,Acid grassland,Fen,Heather,Heather grassland,...,Glyphosate,Mancozeb,Mecoprop-P,Metamitron,Pendimethalin,PropamocarbHydrochloride,Prosulfocarb,Sulphur,Tri-allate,Occurrence
y,x,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1064500.0,644500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
854500.0,447500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
1053500.0,243500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
1293500.0,358500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
471500.0,436500.0,13,0,79,8,0,0,0,0,0,0,...,15.95924,31.73957,4.868417,16.09438,19.54666,19.22021,25.31172,3.868119,10.51691,0


Unnamed: 0_level_0,Unnamed: 1_level_0,Deciduous woodland,Coniferous woodland,Arable,Improve grassland,Neutral grassland,Calcareous grassland,Acid grassland,Fen,Heather,Heather grassland,...,Glyphosate,Mancozeb,Mecoprop-P,Metamitron,Pendimethalin,PropamocarbHydrochloride,Prosulfocarb,Sulphur,Tri-allate,Occurrence
y,x,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1137500.0,153500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
357500.0,110500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
703500.0,123500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
1290500.0,602500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
457500.0,55500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0


Unnamed: 0_level_0,Unnamed: 1_level_0,Deciduous woodland,Coniferous woodland,Arable,Improve grassland,Neutral grassland,Calcareous grassland,Acid grassland,Fen,Heather,Heather grassland,...,Glyphosate,Mancozeb,Mecoprop-P,Metamitron,Pendimethalin,PropamocarbHydrochloride,Prosulfocarb,Sulphur,Tri-allate,Occurrence
y,x,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1244500.0,104500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
988500.0,672500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
912500.0,300500.0,0,0,0,7,0,0,0,0,0,0,...,0.005895692,0.001060366,0.004969717,-3.4000000000000003e+38,0.003122055,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
587500.0,202500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
1156500.0,20500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0


Unnamed: 0_level_0,Unnamed: 1_level_0,Deciduous woodland,Coniferous woodland,Arable,Improve grassland,Neutral grassland,Calcareous grassland,Acid grassland,Fen,Heather,Heather grassland,...,Glyphosate,Mancozeb,Mecoprop-P,Metamitron,Pendimethalin,PropamocarbHydrochloride,Prosulfocarb,Sulphur,Tri-allate,Occurrence
y,x,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1010500.0,132500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
594500.0,637500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
244500.0,353500.0,1,0,64,34,0,0,0,0,0,0,...,15.99536,34.05397,3.313028,2.322196,8.170328,6.756846,5.917507,5.760455,2.614203,0
343500.0,440500.0,9,1,5,77,0,0,0,0,0,0,...,0.9321775,0.08756886,0.7443675,-3.4000000000000003e+38,0.686553,-3.4000000000000003e+38,0.5989163,0.3734603,0.1657389,0
677500.0,539500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0


In [4]:
# Data Cleaning
for dict in df_dicts:
    cur_df = dict["dataframe"]
    cur_df_name = dict["name"]

    print(f'{cur_df_name} data before drop: \n {cur_df.value_counts("Occurrence")} \n')
    
    no_occurences = cur_df[cur_df['Occurrence']==0].index
    sample_size = sum(cur_df['Occurrence']==0) - total_birds + sum(cur_df['Occurrence']==1)
    random_indices = np.random.choice(no_occurences, sample_size, replace=False)
    dict["dataframe"] =  cur_df.drop(random_indices)
    
    print(f'{cur_df_name} data after drop: \n {dict["dataframe"].value_counts("Occurrence")} \n')


# for dict in df_dicts:
#     cur_df = dict["dataframe"]
#     cur_df_name = dict["name"]

#     print(f'{cur_df_name} data before drop: \n {cur_df.value_counts("Occurrence")} \n')
    
#     no_occurences = cur_df[cur_df['Occurrence']==0].index
#     sample_size = sum(cur_df['Occurrence']==0) - sum(cur_df['Occurrence']==1)
#     random_indices = np.random.choice(no_occurences, sample_size, replace=False)
#     dict["dataframe"] =  cur_df.drop(random_indices)
    
#     print(f'{cur_df_name} data after drop: \n {dict["dataframe"].value_counts("Occurrence")} \n')

Barnacle Goose 1km data before drop: 
 Occurrence
0    909231
1       769
dtype: int64 

Barnacle Goose 1km data after drop: 
 Occurrence
0    32315
1      769
dtype: int64 

Canada Goose 1km data before drop: 
 Occurrence
0    899853
1     10147
dtype: int64 

Canada Goose 1km data after drop: 
 Occurrence
0    22937
1    10147
dtype: int64 

Egyptian Goose 1km data before drop: 
 Occurrence
0    909137
1       863
dtype: int64 

Egyptian Goose 1km data after drop: 
 Occurrence
0    32221
1      863
dtype: int64 

Gadwall 1km data before drop: 
 Occurrence
0    907795
1      2205
dtype: int64 

Gadwall 1km data after drop: 
 Occurrence
0    30879
1     2205
dtype: int64 

Goshawk 1km data before drop: 
 Occurrence
0    909554
1       446
dtype: int64 

Goshawk 1km data after drop: 
 Occurrence
0    32638
1      446
dtype: int64 

Grey Partridge 1km data before drop: 
 Occurrence
0    907877
1      2123
dtype: int64 

Grey Partridge 1km data after drop: 
 Occurrence
0    30961
1     21

In [5]:
# Standardisation
def standardise(X):
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Add headers back
    X_scaled_df = pd.DataFrame(X_scaled, columns=X.columns)

    # Revert 'Surface type' back to non-standardised column as it is a categorical feature
    X_scaled_df['Surface type'] = X['Surface type'].values
    return X_scaled_df

In [6]:
# Feature Selection

# Check if any columns have NaN in them
# nan_columns = []
# for column in X_scaled_df:
#     if X_scaled_df[column].isnull().values.any():
#         nan_columns.append(column)
# print(nan_columns if len(nan_columns)!= 0 else 'None')


# Using ANOVA F-Score as a feature selection method
def feature_select(X, y):
    k_nums = [10, 15, 20, 25, 30, 35]
    kbest_dict = {}
    for num in k_nums:
        # Needs to be 1d array, y.values.ravel() converts y into a 1d array
        best_X = SelectKBest(f_classif, k=num).fit(X, y.values.ravel())
        kbest_dict[str(num)] = best_X.get_feature_names_out().tolist()
    kbest_dict['40'] = list(X.columns)

    best_X = SelectKBest(f_classif, k='all').fit(X, y.values.ravel())

    feat_scores = pd.DataFrame()
    feat_scores["F Score"] = best_X.scores_
    feat_scores["P Value"] = best_X.pvalues_
    feat_scores["Attribute"] = X.columns
    kbest_dict['Dataframe'] = feat_scores.sort_values(["F Score", "P Value"], ascending=[False, False])

    print(f'K-Best Features Dataframe: \n{kbest_dict["Dataframe"]} \n')
    # print(json.dumps(kbest_dict, indent=4))
    return kbest_dict

In [7]:
# Resample (upsample) minority data
# for dict in df_dicts:
#     if sum(dict['dataframe']['Occurence']==1) > sum(dict['dataframe']['Occurence']==0):
#         continue

# from sklearn.utils import resample

# def upsample(X, y):
#     X_1 = X[y['Occurrence'] == 1] # Getting positive occurrences (minority)
#     X_0 = X[y['Occurrence'] == 0] # Getting negative occurrences (majority)
    
#     X_1_upsampled = resample(X_1 ,random_state=seed,n_samples=total_birds/2,replace=True)


#     print(f'Resampling: \n {y.value_counts()} \n')


In [8]:
def oversample(X_train, y_train):
    over = RandomOverSampler(sampling_strategy=1, random_state=seed)
    smote = SMOTE(random_state=seed, sampling_strategy='minority')
    X_smote, y_smote = smote.fit_resample(X_train, y_train)

    print(f'Resampled Value Counts: \n {y_smote.value_counts()} \n')

    return X_smote, y_smote

In [9]:
All_bird_occurrences = pd.DataFrame([(dict['name'],sum(dict['dataframe']['Occurrence'] == 1)) for dict in df_dicts], columns=['Name', 'Occurrence Count'])
All_bird_occurrences['Percentage'] = All_bird_occurrences['Occurrence Count']/total_birds

All_bird_occurrences.sort_values('Occurrence Count', ascending=False)

Unnamed: 0,Name,Occurrence Count,Percentage
9,Mute Swan 1km,19124,0.578044
1,Canada Goose 1km,10147,0.306704
10,Pheasant 1km,5855,0.176974
16,Rock Dove 1km,3919,0.118456
7,Little Owl 1km,3548,0.107242
14,Red-legged Partridge 1km,2953,0.089258
11,Pink-footed Goose 1km,2646,0.079978
19,Wigeon 1km,2317,0.070034
3,Gadwall 1km,2205,0.066649
5,Grey Partridge 1km,2123,0.06417


In [10]:
# Add model pipeline
estimators = [
    ('lr', LogisticRegression(max_iter=10000, solver='saga', random_state=seed, penalty='l2', verbose=verbose)),
    ('knn', KNeighborsClassifier(n_neighbors=5, weights='distance')),
    ('sgd', SGDClassifier( max_iter=10000, loss='modified_huber', random_state=seed, penalty='l2', verbose=verbose)),
    ('rf', RandomForestClassifier(n_estimators=20,max_features=None, random_state=seed, verbose=verbose))
]


for dict in df_dicts:
    print(f'Training with {dict["name"]} cells... \n')
    # Use this if using coordinates as separate columns
    # coords, X, y = data['dataframe'].iloc[:, :2], data['dataframe'].iloc[:, 2:-1], data['dataframe'].iloc[:, [-1]]
    # data['coords'] = coords
    
    # Use this if using coordinates as indices
    X, y = dict['dataframe'].iloc[:, 0:-1], dict['dataframe'].iloc[:, [-1]], 

    dict['X'] = standardise(X)
    dict['y'] = y
    dict['kbest'] = feature_select(X, y)

    X_train, X_test, y_train, y_test = train_test_split(dict['X'], dict['y'], random_state=seed)
    dict['X_train'], dict['X_test'], dict['y_train'], dict['y_test'] = X_train, X_test, y_train, y_test # for debugging purposes

    dict['X_smote'], dict['y_smote'] = oversample(X_train, y_train)

    stack_clf = StackingClassifier(
        estimators=estimators, 
        final_estimator=GradientBoostingClassifier(n_estimators=20, learning_rate=0.5, max_features=None, max_depth=2, random_state=seed)
    )

    stack_clf.fit(dict['X_smote'], dict['y_smote'])
    y_pred = stack_clf.predict(X_test)
    
    dict['predictions'] = y_pred
    dict['report'] = classification_report(y_test, y_pred, output_dict=True)
    
    print(f'Classification report: \n {json.dumps(dict["report"], indent=4)} \n')



Training with Barnacle Goose 1km cells... 

K-Best Features Dataframe: 
        F Score        P Value                      Attribute
29  1592.655375   0.000000e+00                 Chlorothalonil
30  1592.655375   0.000000e+00                     Glyphosate
31  1592.655375   0.000000e+00                       Mancozeb
32  1592.655375   0.000000e+00                     Mecoprop-P
34  1592.655375   0.000000e+00                  Pendimethalin
25  1588.727338   0.000000e+00   Inflowing drainage direction
23  1422.870245  6.634046e-305                   Surface type
22  1387.756006  1.384297e-297      Cumulative catchment area
18  1301.289099  1.575422e-279                      Saltmarsh
24  1227.846893  3.716570e-264  Outflowing drainage direction
21  1209.088956  3.176146e-260                      Elevation
13  1169.697230  5.831442e-252                     Freshwater
15  1085.742719  2.575851e-234         Supralittoral sediment
17   950.441801  8.749551e-206              Littoral sedimen

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Classification report: 
 {
    "0": {
        "precision": 0.9843403205918619,
        "recall": 0.9859207113745831,
        "f1-score": 0.9851298821496884,
        "support": 8097
    },
    "1": {
        "precision": 0.2919254658385093,
        "recall": 0.27011494252873564,
        "f1-score": 0.28059701492537314,
        "support": 174
    },
    "accuracy": 0.9708620481199371,
    "macro avg": {
        "precision": 0.6381328932151856,
        "recall": 0.6280178269516594,
        "f1-score": 0.6328634485375308,
        "support": 8271
    },
    "weighted avg": {
        "precision": 0.9697737404048129,
        "recall": 0.9708620481199371,
        "f1-score": 0.9703083709784842,
        "support": 8271
    }
} 

Training with Canada Goose 1km cells... 

K-Best Features Dataframe: 
         F Score        P Value                      Attribute
29  31133.773751   0.000000e+00                 Chlorothalonil
31  31133.773751   0.000000e+00                       Mancozeb
32  31133.7

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Classification report: 
 {
    "0": {
        "precision": 0.8842141269026851,
        "recall": 0.903056768558952,
        "f1-score": 0.8935361216730037,
        "support": 5725
    },
    "1": {
        "precision": 0.7710396039603961,
        "recall": 0.7340926944226237,
        "f1-score": 0.752112676056338,
        "support": 2546
    },
    "accuracy": 0.8510458227542014,
    "macro avg": {
        "precision": 0.8276268654315406,
        "recall": 0.8185747314907879,
        "f1-score": 0.8228243988646708,
        "support": 8271
    },
    "weighted avg": {
        "precision": 0.8493764609117447,
        "recall": 0.8510458227542014,
        "f1-score": 0.8500028013320498,
        "support": 8271
    }
} 

Training with Egyptian Goose 1km cells... 

K-Best Features Dataframe: 
        F Score        P Value                      Attribute
26  4897.758011   0.000000e+00                   Fertiliser K
27  4897.758011   0.000000e+00                   Fertiliser N
28  4897.758011

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Classification report: 
 {
    "0": {
        "precision": 0.984215069675669,
        "recall": 0.9914285714285714,
        "f1-score": 0.9878086515254657,
        "support": 8050
    },
    "1": {
        "precision": 0.5740740740740741,
        "recall": 0.42081447963800905,
        "f1-score": 0.48563968668407315,
        "support": 221
    },
    "accuracy": 0.9761818401644299,
    "macro avg": {
        "precision": 0.7791445718748715,
        "recall": 0.7061215255332902,
        "f1-score": 0.7367241691047695,
        "support": 8271
    },
    "weighted avg": {
        "precision": 0.9732561578115713,
        "recall": 0.9761818401644299,
        "f1-score": 0.9743907647850537,
        "support": 8271
    }
} 

Training with Gadwall 1km cells... 

K-Best Features Dataframe: 
        F Score        P Value                      Attribute
26  8253.517389   0.000000e+00                   Fertiliser K
27  8253.517389   0.000000e+00                   Fertiliser N
28  8253.517389   0.

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Classification report: 
 {
    "0": {
        "precision": 0.9625477707006369,
        "recall": 0.9764797105195141,
        "f1-score": 0.9694636900179624,
        "support": 7738
    },
    "1": {
        "precision": 0.5676959619952494,
        "recall": 0.44840525328330205,
        "f1-score": 0.5010482180293501,
        "support": 533
    },
    "accuracy": 0.9424495224277596,
    "macro avg": {
        "precision": 0.7651218663479431,
        "recall": 0.7124424819014081,
        "f1-score": 0.7352559540236563,
        "support": 8271
    },
    "weighted avg": {
        "precision": 0.9371027200368753,
        "recall": 0.9424495224277596,
        "f1-score": 0.9392780478259747,
        "support": 8271
    }
} 

Training with Goshawk 1km cells... 

K-Best Features Dataframe: 
        F Score        P Value                      Attribute
23  1215.399920  1.509541e-261                   Surface type
21  1122.626402  4.502081e-242                      Elevation
29  1089.411835  4.3

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Classification report: 
 {
    "0": {
        "precision": 0.9867557715674362,
        "recall": 0.9953425664909915,
        "f1-score": 0.991030569284276,
        "support": 8159
    },
    "1": {
        "precision": 0.07317073170731707,
        "recall": 0.026785714285714284,
        "f1-score": 0.0392156862745098,
        "support": 112
    },
    "accuracy": 0.9822270583968081,
    "macro avg": {
        "precision": 0.5299632516373767,
        "recall": 0.5110641403883529,
        "f1-score": 0.515123127779393,
        "support": 8271
    },
    "weighted avg": {
        "precision": 0.9743846526623058,
        "recall": 0.9822270583968081,
        "f1-score": 0.9781417690307282,
        "support": 8271
    }
} 

Training with Grey Partridge 1km cells... 

K-Best Features Dataframe: 
        F Score        P Value                      Attribute
26  8816.231258   0.000000e+00                   Fertiliser K
27  8816.231258   0.000000e+00                   Fertiliser N
28  8816.2312

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Classification report: 
 {
    "0": {
        "precision": 0.9516294585578651,
        "recall": 0.9612493519958528,
        "f1-score": 0.956415215989684,
        "support": 7716
    },
    "1": {
        "precision": 0.3731656184486373,
        "recall": 0.3207207207207207,
        "f1-score": 0.3449612403100775,
        "support": 555
    },
    "accuracy": 0.9182686494982469,
    "macro avg": {
        "precision": 0.6623975385032512,
        "recall": 0.6409850363582867,
        "f1-score": 0.6506882281498808,
        "support": 8271
    },
    "weighted avg": {
        "precision": 0.9128134228595672,
        "recall": 0.9182686494982469,
        "f1-score": 0.9153854787750568,
        "support": 8271
    }
} 

Training with Indian Peafowl 1km cells... 

K-Best Features Dataframe: 
        F Score        P Value                      Attribute
26  1509.661172  5.928788e-323                   Fertiliser K
27  1509.661172  5.928788e-323                   Fertiliser N
28  1509.661172

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Classification report: 
 {
    "0": {
        "precision": 0.9903053805138148,
        "recall": 0.9978021978021978,
        "f1-score": 0.994039654543243,
        "support": 8190
    },
    "1": {
        "precision": 0.05263157894736842,
        "recall": 0.012345679012345678,
        "f1-score": 0.02,
        "support": 81
    },
    "accuracy": 0.9881513722645388,
    "macro avg": {
        "precision": 0.5214684797305916,
        "recall": 0.5050739384072718,
        "f1-score": 0.5070198272716214,
        "support": 8271
    },
    "weighted avg": {
        "precision": 0.9811225032405851,
        "recall": 0.9881513722645388,
        "f1-score": 0.9845006372517422,
        "support": 8271
    }
} 

Training with Little Owl 1km cells... 

K-Best Features Dataframe: 
         F Score        P Value                      Attribute
26  19251.945273   0.000000e+00                   Fertiliser K
27  19251.945273   0.000000e+00                   Fertiliser N
28  19251.945273   0.000000e

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Classification report: 
 {
    "0": {
        "precision": 0.9258195115580514,
        "recall": 0.9593991067803491,
        "f1-score": 0.9423102485710487,
        "support": 7389
    },
    "1": {
        "precision": 0.511400651465798,
        "recall": 0.35600907029478457,
        "f1-score": 0.4197860962566845,
        "support": 882
    },
    "accuracy": 0.8950550114859146,
    "macro avg": {
        "precision": 0.7186100815119247,
        "recall": 0.6577040885375669,
        "f1-score": 0.6810481724138666,
        "support": 8271
    },
    "weighted avg": {
        "precision": 0.8816268583599657,
        "recall": 0.8950550114859146,
        "f1-score": 0.8865895010990055,
        "support": 8271
    }
} 

Training with Mandarin Duck 1km cells... 

K-Best Features Dataframe: 
        F Score        P Value                      Attribute
26  4832.339557   0.000000e+00                   Fertiliser K
27  4832.339557   0.000000e+00                   Fertiliser N
28  4832.339557

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Classification report: 
 {
    "0": {
        "precision": 0.9784668389319552,
        "recall": 0.9895470383275261,
        "f1-score": 0.9839757470766565,
        "support": 8036
    },
    "1": {
        "precision": 0.4166666666666667,
        "recall": 0.2553191489361702,
        "f1-score": 0.316622691292876,
        "support": 235
    },
    "accuracy": 0.9686857695562809,
    "macro avg": {
        "precision": 0.6975667527993109,
        "recall": 0.6224330936318482,
        "f1-score": 0.6502992191847663,
        "support": 8271
    },
    "weighted avg": {
        "precision": 0.962504677103598,
        "recall": 0.9686857695562809,
        "f1-score": 0.9650145612334466,
        "support": 8271
    }
} 

Training with Mute Swan 1km cells... 

K-Best Features Dataframe: 
         F Score        P Value                      Attribute
30  44547.855280   0.000000e+00                     Glyphosate
34  44547.855280   0.000000e+00                  Pendimethalin
29  44530.119154  

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Classification report: 
 {
    "0": {
        "precision": 0.9015748031496063,
        "recall": 0.8486316989737742,
        "f1-score": 0.874302496328928,
        "support": 3508
    },
    "1": {
        "precision": 0.8931374522036627,
        "recall": 0.9317656938904052,
        "f1-score": 0.9120427455815865,
        "support": 4763
    },
    "accuracy": 0.8965058638616854,
    "macro avg": {
        "precision": 0.8973561276766344,
        "recall": 0.8901986964320897,
        "f1-score": 0.8931726209552573,
        "support": 8271
    },
    "weighted avg": {
        "precision": 0.8967160070481035,
        "recall": 0.8965058638616854,
        "f1-score": 0.8960358788933594,
        "support": 8271
    }
} 

Training with Pheasant 1km cells... 

K-Best Features Dataframe: 
         F Score       P Value                      Attribute
29  16499.459522  0.000000e+00                 Chlorothalonil
30  16499.459522  0.000000e+00                     Glyphosate
31  16499.459522  0.

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Classification report: 
 {
    "0": {
        "precision": 0.8772901576480614,
        "recall": 0.9133520626940707,
        "f1-score": 0.8949579831932772,
        "support": 6763
    },
    "1": {
        "precision": 0.5235772357723577,
        "recall": 0.4270557029177719,
        "f1-score": 0.4704163623082542,
        "support": 1508
    },
    "accuracy": 0.8246886712610325,
    "macro avg": {
        "precision": 0.7004336967102096,
        "recall": 0.6702038828059212,
        "f1-score": 0.6826871727507657,
        "support": 8271
    },
    "weighted avg": {
        "precision": 0.8127998800288424,
        "recall": 0.8246886712610325,
        "f1-score": 0.8175539493044349,
        "support": 8271
    }
} 

Training with Pink-footed Goose 1km cells... 

K-Best Features Dataframe: 
        F Score        P Value                      Attribute
29  7003.538445   0.000000e+00                 Chlorothalonil
31  7003.538445   0.000000e+00                       Mancozeb
32  7003.5

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Classification report: 
 {
    "0": {
        "precision": 0.9576040451186308,
        "recall": 0.9658689682228325,
        "f1-score": 0.96171875,
        "support": 7647
    },
    "1": {
        "precision": 0.532258064516129,
        "recall": 0.47596153846153844,
        "f1-score": 0.5025380710659899,
        "support": 624
    },
    "accuracy": 0.9289082335872325,
    "macro avg": {
        "precision": 0.74493105481738,
        "recall": 0.7209152533421854,
        "f1-score": 0.7321284105329949,
        "support": 8271
    },
    "weighted avg": {
        "precision": 0.9255141053415832,
        "recall": 0.9289082335872325,
        "f1-score": 0.927076174295149,
        "support": 8271
    }
} 

Training with Pintail 1km cells... 

K-Best Features Dataframe: 
        F Score        P Value                      Attribute
25  1312.351703  7.668398e-282   Inflowing drainage direction
29  1306.817507  1.100426e-280                 Chlorothalonil
30  1306.817507  1.100426e-280  

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Classification report: 
 {
    "0": {
        "precision": 0.9814862104187947,
        "recall": 0.9494812252964426,
        "f1-score": 0.9652184831742843,
        "support": 8096
    },
    "1": {
        "precision": 0.0683371298405467,
        "recall": 0.17142857142857143,
        "f1-score": 0.0977198697068404,
        "support": 175
    },
    "accuracy": 0.9330189819852497,
    "macro avg": {
        "precision": 0.5249116701296707,
        "recall": 0.5604548983625071,
        "f1-score": 0.5314691764405624,
        "support": 8271
    },
    "weighted avg": {
        "precision": 0.9621655612710261,
        "recall": 0.9330189819852497,
        "f1-score": 0.9468637186528477,
        "support": 8271
    }
} 

Training with Pochard 1km cells... 

K-Best Features Dataframe: 
        F Score        P Value                      Attribute
26  2527.073328   0.000000e+00                   Fertiliser K
27  2527.073328   0.000000e+00                   Fertiliser N
28  2527.073328   0.

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Classification report: 
 {
    "0": {
        "precision": 0.970019579050416,
        "recall": 0.9896379525593009,
        "f1-score": 0.9797305648251142,
        "support": 8010
    },
    "1": {
        "precision": 0.16161616161616163,
        "recall": 0.06130268199233716,
        "f1-score": 0.08888888888888889,
        "support": 261
    },
    "accuracy": 0.9603433683955991,
    "macro avg": {
        "precision": 0.5658178703332888,
        "recall": 0.525470317275819,
        "f1-score": 0.5343097268570015,
        "support": 8271
    },
    "weighted avg": {
        "precision": 0.9445095691422621,
        "recall": 0.9603433683955991,
        "f1-score": 0.951619130002317,
        "support": 8271
    }
} 

Training with Red-legged Partridge 1km cells... 

K-Best Features Dataframe: 
         F Score        P Value                      Attribute
26  13095.535859   0.000000e+00                   Fertiliser K
27  13095.535859   0.000000e+00                   Fertiliser N
28  1

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Classification report: 
 {
    "0": {
        "precision": 0.9255010714735913,
        "recall": 0.9745155295991506,
        "f1-score": 0.9493760910325209,
        "support": 7534
    },
    "1": {
        "precision": 0.4319526627218935,
        "recall": 0.19810040705563092,
        "f1-score": 0.27162790697674416,
        "support": 737
    },
    "accuracy": 0.9053318824809575,
    "macro avg": {
        "precision": 0.6787268670977424,
        "recall": 0.5863079683273907,
        "f1-score": 0.6105019990046325,
        "support": 8271
    },
    "weighted avg": {
        "precision": 0.8815226919245644,
        "recall": 0.9053318824809575,
        "f1-score": 0.8889843111208889,
        "support": 8271
    }
} 

Training with Ring-necked Parakeet 1km cells... 

K-Best Features Dataframe: 
        F Score        P Value                      Attribute
20  5088.050333   0.000000e+00                       Suburban
19  5040.841216   0.000000e+00                          Urban
26  24

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Classification report: 
 {
    "0": {
        "precision": 0.9893682023707686,
        "recall": 0.9948390267879086,
        "f1-score": 0.9920960725445744,
        "support": 8138
    },
    "1": {
        "precision": 0.5227272727272727,
        "recall": 0.3458646616541353,
        "f1-score": 0.41628959276018096,
        "support": 133
    },
    "accuracy": 0.9844033369604642,
    "macro avg": {
        "precision": 0.7560477375490207,
        "recall": 0.6703518442210219,
        "f1-score": 0.7041928326523776,
        "support": 8271
    },
    "weighted avg": {
        "precision": 0.9818644853301949,
        "recall": 0.9844033369604642,
        "f1-score": 0.9828369428370028,
        "support": 8271
    }
} 

Training with Rock Dove 1km cells... 

K-Best Features Dataframe: 
         F Score       P Value                      Attribute
29  12090.235165  0.000000e+00                 Chlorothalonil
30  12090.235165  0.000000e+00                     Glyphosate
31  12090.235165  

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Classification report: 
 {
    "0": {
        "precision": 0.9200164068908941,
        "recall": 0.926221610461115,
        "f1-score": 0.9231085808354482,
        "support": 7265
    },
    "1": {
        "precision": 0.4399164054336468,
        "recall": 0.41848906560636184,
        "f1-score": 0.42893530310748856,
        "support": 1006
    },
    "accuracy": 0.864466207230081,
    "macro avg": {
        "precision": 0.6799664061622704,
        "recall": 0.6723553380337384,
        "f1-score": 0.6760219419714684,
        "support": 8271
    },
    "weighted avg": {
        "precision": 0.8616219441335503,
        "recall": 0.864466207230081,
        "f1-score": 0.8630023884289283,
        "support": 8271
    }
} 

Training with Ruddy Duck 1km cells... 

K-Best Features Dataframe: 
        F Score        P Value                      Attribute
13  1547.310000   0.000000e+00                     Freshwater
26   608.696172  3.448086e-133                   Fertiliser K
27   608.696172  3

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Classification report: 
 {
    "0": {
        "precision": 0.9983046742552676,
        "recall": 0.9990305380513815,
        "f1-score": 0.9986674742580254,
        "support": 8252
    },
    "1": {
        "precision": 0.38461538461538464,
        "recall": 0.2631578947368421,
        "f1-score": 0.3125,
        "support": 19
    },
    "accuracy": 0.9973401039777536,
    "macro avg": {
        "precision": 0.6914600294353261,
        "recall": 0.6310942163941118,
        "f1-score": 0.6555837371290127,
        "support": 8271
    },
    "weighted avg": {
        "precision": 0.9968949176958238,
        "recall": 0.9973401039777536,
        "f1-score": 0.9970912220502025,
        "support": 8271
    }
} 

Training with Whooper Swan 1km cells... 

K-Best Features Dataframe: 
        F Score        P Value                      Attribute
25  1667.619516   0.000000e+00   Inflowing drainage direction
23  1539.400308   0.000000e+00                   Surface type
21  1389.577692  5.771891e-2

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Classification report: 
 {
    "0": {
        "precision": 0.9743117010816126,
        "recall": 0.9865588052271314,
        "f1-score": 0.9803970069878177,
        "support": 8035
    },
    "1": {
        "precision": 0.2,
        "recall": 0.11440677966101695,
        "f1-score": 0.14555256064690028,
        "support": 236
    },
    "accuracy": 0.9616733164067223,
    "macro avg": {
        "precision": 0.5871558505408063,
        "recall": 0.5504827924440742,
        "f1-score": 0.562974783817359,
        "support": 8271
    },
    "weighted avg": {
        "precision": 0.9522179323166192,
        "recall": 0.9616733164067223,
        "f1-score": 0.9565760313698204,
        "support": 8271
    }
} 

Training with Wigeon 1km cells... 

K-Best Features Dataframe: 
        F Score        P Value                      Attribute
29  4767.972096   0.000000e+00                 Chlorothalonil
30  4767.972096   0.000000e+00                     Glyphosate
31  4767.972096   0.000000e+00      

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Classification report: 
 {
    "0": {
        "precision": 0.9446194225721785,
        "recall": 0.9351695465765882,
        "f1-score": 0.9398707318665536,
        "support": 7697
    },
    "1": {
        "precision": 0.23348694316436253,
        "recall": 0.26480836236933797,
        "f1-score": 0.24816326530612245,
        "support": 574
    },
    "accuracy": 0.8886470801595938,
    "macro avg": {
        "precision": 0.5890531828682705,
        "recall": 0.5999889544729631,
        "f1-score": 0.594016998586338,
        "support": 8271
    },
    "weighted avg": {
        "precision": 0.8952674647460285,
        "recall": 0.8886470801595938,
        "f1-score": 0.8918668525526028,
        "support": 8271
    }
} 



In [11]:
for dict in df_dicts:
    print(dict['name'], '\n',dict['report'])

Barnacle Goose 1km 
 {'0': {'precision': 0.9843403205918619, 'recall': 0.9859207113745831, 'f1-score': 0.9851298821496884, 'support': 8097}, '1': {'precision': 0.2919254658385093, 'recall': 0.27011494252873564, 'f1-score': 0.28059701492537314, 'support': 174}, 'accuracy': 0.9708620481199371, 'macro avg': {'precision': 0.6381328932151856, 'recall': 0.6280178269516594, 'f1-score': 0.6328634485375308, 'support': 8271}, 'weighted avg': {'precision': 0.9697737404048129, 'recall': 0.9708620481199371, 'f1-score': 0.9703083709784842, 'support': 8271}}
Canada Goose 1km 
 {'0': {'precision': 0.8842141269026851, 'recall': 0.903056768558952, 'f1-score': 0.8935361216730037, 'support': 5725}, '1': {'precision': 0.7710396039603961, 'recall': 0.7340926944226237, 'f1-score': 0.752112676056338, 'support': 2546}, 'accuracy': 0.8510458227542014, 'macro avg': {'precision': 0.8276268654315406, 'recall': 0.8185747314907879, 'f1-score': 0.8228243988646708, 'support': 8271}, 'weighted avg': {'precision': 0.849