In [1]:
import pandas as pd
import numpy as np
import rioxarray
import json, os

from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2, f_classif, mutual_info_classif
from sklearn.metrics import f1_score, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split


from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier, StackingClassifier

In [2]:
INVASIVE_BIRDS_PATH = 'Datasets/Machine Learning/1km Rasters/Birds'
df_1km = pd.read_csv('Datasets/Machine Learning/Dataframes/1km_All_Birds_DF.csv', index_col=[0,1])

total_birds = (df_1km['Occurrence']==1).sum()
df_dicts = []

for file in os.listdir(INVASIVE_BIRDS_PATH):
    filename = os.fsdecode(file)
    if not filename.endswith('.tif') or filename.endswith('All_Invasive_Birds_1km.tif') :
         continue



    bird_name = filename[:-4].replace('_', ' ')

    bird_dataset = rioxarray.open_rasterio(f'{INVASIVE_BIRDS_PATH}/{file}')
    bird_dataset.name = 'data'
    bird_df = bird_dataset.squeeze().drop("spatial_ref").drop("band").to_dataframe()

    # Check if index matches
    if not df_1km.index.equals(bird_df.index):
        print('Warning: Index does not match')
        continue

    bird_df['Occurrence'] = [0 if x == -1 else 1 for x in bird_df['data']]
    bird_df = df_1km.drop(columns='Occurrence').join(bird_df.drop(columns='data'))
    
    bird_dict = {'name' : bird_name, 'dataframe' : bird_df }
    df_dicts.append(bird_dict)
    display(bird_df.sample(5))


Unnamed: 0_level_0,Unnamed: 1_level_0,Deciduous woodland,Coniferous woodland,Arable,Improve grassland,Neutral grassland,Calcareous grassland,Acid grassland,Fen,Heather,Heather grassland,...,Glyphosate,Mancozeb,Mecoprop-P,Metamitron,Pendimethalin,PropamocarbHydrochloride,Prosulfocarb,Sulphur,Tri-allate,Occurrence
y,x,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
909500.0,696500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
1205500.0,632500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
490500.0,395500.0,0,0,0,84,0,13,0,0,0,0,...,0.2121255,0.04213508,0.2082516,-3.4000000000000003e+38,0.1470788,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
726500.0,248500.0,0,0,0,0,0,0,100,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
215500.0,675500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0


Unnamed: 0_level_0,Unnamed: 1_level_0,Deciduous woodland,Coniferous woodland,Arable,Improve grassland,Neutral grassland,Calcareous grassland,Acid grassland,Fen,Heather,Heather grassland,...,Glyphosate,Mancozeb,Mecoprop-P,Metamitron,Pendimethalin,PropamocarbHydrochloride,Prosulfocarb,Sulphur,Tri-allate,Occurrence
y,x,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
317500.0,114500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
952500.0,643500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
682500.0,450500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
13500.0,243500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
921500.0,511500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0


Unnamed: 0_level_0,Unnamed: 1_level_0,Deciduous woodland,Coniferous woodland,Arable,Improve grassland,Neutral grassland,Calcareous grassland,Acid grassland,Fen,Heather,Heather grassland,...,Glyphosate,Mancozeb,Mecoprop-P,Metamitron,Pendimethalin,PropamocarbHydrochloride,Prosulfocarb,Sulphur,Tri-allate,Occurrence
y,x,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1167500.0,267500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
801500.0,448500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
1105500.0,111500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
573500.0,393500.0,19,0,68,12,0,0,0,0,0,0,...,13.26719,3.381187,8.610271,4.714598,12.86398,2.385122,6.830944,3.232311,7.68837,0
1175500.0,411500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0


Unnamed: 0_level_0,Unnamed: 1_level_0,Deciduous woodland,Coniferous woodland,Arable,Improve grassland,Neutral grassland,Calcareous grassland,Acid grassland,Fen,Heather,Heather grassland,...,Glyphosate,Mancozeb,Mecoprop-P,Metamitron,Pendimethalin,PropamocarbHydrochloride,Prosulfocarb,Sulphur,Tri-allate,Occurrence
y,x,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
945500.0,262500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
225500.0,192500.0,2,0,0,98,0,0,0,0,0,0,...,0.9194584,0.1098393,1.355949,-3.4000000000000003e+38,0.5022678,-3.4000000000000003e+38,0.1761994,0.1365592,0.2296267,0
329500.0,619500.0,11,0,66,18,0,0,0,0,0,0,...,21.82448,55.5596,4.021284,30.81578,13.65216,21.29149,13.91516,9.202444,8.581655,0
641500.0,670500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
548500.0,6500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0


Unnamed: 0_level_0,Unnamed: 1_level_0,Deciduous woodland,Coniferous woodland,Arable,Improve grassland,Neutral grassland,Calcareous grassland,Acid grassland,Fen,Heather,Heather grassland,...,Glyphosate,Mancozeb,Mecoprop-P,Metamitron,Pendimethalin,PropamocarbHydrochloride,Prosulfocarb,Sulphur,Tri-allate,Occurrence
y,x,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
886500.0,690500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
1239500.0,580500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
254500.0,247500.0,13,5,0,81,0,0,0,0,0,0,...,0.8624901,0.1022491,0.6961395,-3.4000000000000003e+38,1.311996,-3.4000000000000003e+38,0.4560896,0.4708502,0.499624,0
1137500.0,684500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
625500.0,639500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0


Unnamed: 0_level_0,Unnamed: 1_level_0,Deciduous woodland,Coniferous woodland,Arable,Improve grassland,Neutral grassland,Calcareous grassland,Acid grassland,Fen,Heather,Heather grassland,...,Glyphosate,Mancozeb,Mecoprop-P,Metamitron,Pendimethalin,PropamocarbHydrochloride,Prosulfocarb,Sulphur,Tri-allate,Occurrence
y,x,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
207500.0,452500.0,0,0,0,4,57,0,0,0,0,0,...,0.9846133,0.03117771,0.09988491,-3.4000000000000003e+38,0.7951632,-3.4000000000000003e+38,0.4263253,1.240713,0.3921932,0
284500.0,183500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
227500.0,183500.0,4,0,6,86,0,0,0,0,0,1,...,0.3801611,0.08446837,0.5228598,-3.4000000000000003e+38,0.2601777,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
66500.0,670500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
285500.0,374500.0,21,0,32,41,0,0,0,0,0,0,...,6.21179,0.6733531,2.535817,0.05295391,6.014146,0.02396427,2.245071,3.803451,2.647878,0


Unnamed: 0_level_0,Unnamed: 1_level_0,Deciduous woodland,Coniferous woodland,Arable,Improve grassland,Neutral grassland,Calcareous grassland,Acid grassland,Fen,Heather,Heather grassland,...,Glyphosate,Mancozeb,Mecoprop-P,Metamitron,Pendimethalin,PropamocarbHydrochloride,Prosulfocarb,Sulphur,Tri-allate,Occurrence
y,x,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1142500.0,110500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
1103500.0,642500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
204500.0,61500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
125500.0,290500.0,22,0,8,69,0,0,0,0,0,0,...,1.186446,0.07590471,0.4946035,-3.4000000000000003e+38,1.245619,-3.4000000000000003e+38,0.6065602,0.1168546,0.4210978,0
1273500.0,432500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0


Unnamed: 0_level_0,Unnamed: 1_level_0,Deciduous woodland,Coniferous woodland,Arable,Improve grassland,Neutral grassland,Calcareous grassland,Acid grassland,Fen,Heather,Heather grassland,...,Glyphosate,Mancozeb,Mecoprop-P,Metamitron,Pendimethalin,PropamocarbHydrochloride,Prosulfocarb,Sulphur,Tri-allate,Occurrence
y,x,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
817500.0,133500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
192500.0,632500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
559500.0,596500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
878500.0,40500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
1259500.0,503500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0


Unnamed: 0_level_0,Unnamed: 1_level_0,Deciduous woodland,Coniferous woodland,Arable,Improve grassland,Neutral grassland,Calcareous grassland,Acid grassland,Fen,Heather,Heather grassland,...,Glyphosate,Mancozeb,Mecoprop-P,Metamitron,Pendimethalin,PropamocarbHydrochloride,Prosulfocarb,Sulphur,Tri-allate,Occurrence
y,x,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1052500.0,433500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
325500.0,178500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
367500.0,75500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
267500.0,412500.0,0,0,0,82,0,0,0,0,0,0,...,0.6637642,0.08211117,0.4103971,-3.4000000000000003e+38,0.2801343,-3.4000000000000003e+38,0.1009818,0.06597009,0.1256569,0
1177500.0,1500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0


Unnamed: 0_level_0,Unnamed: 1_level_0,Deciduous woodland,Coniferous woodland,Arable,Improve grassland,Neutral grassland,Calcareous grassland,Acid grassland,Fen,Heather,Heather grassland,...,Glyphosate,Mancozeb,Mecoprop-P,Metamitron,Pendimethalin,PropamocarbHydrochloride,Prosulfocarb,Sulphur,Tri-allate,Occurrence
y,x,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
699500.0,227500.0,14,39,0,0,0,0,0,0,0,42,...,0.1275387,0.01921859,0.0659483,-3.4000000000000003e+38,0.03575328,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
757500.0,351500.0,8,0,89,2,0,0,0,0,0,0,...,26.28178,126.0066,18.16816,26.17764,17.27526,35.54871,6.710961,28.05907,24.13024,0
158500.0,149500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
868500.0,588500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
1222500.0,458500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0


Unnamed: 0_level_0,Unnamed: 1_level_0,Deciduous woodland,Coniferous woodland,Arable,Improve grassland,Neutral grassland,Calcareous grassland,Acid grassland,Fen,Heather,Heather grassland,...,Glyphosate,Mancozeb,Mecoprop-P,Metamitron,Pendimethalin,PropamocarbHydrochloride,Prosulfocarb,Sulphur,Tri-allate,Occurrence
y,x,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
103500.0,133500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
487500.0,699500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
1178500.0,286500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
788500.0,230500.0,1,0,0,9,0,0,82,0,5,3,...,0.0003224105,5.820755e-05,0.0002351651,-3.4000000000000003e+38,0.0001169223,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
50500.0,484500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0


Unnamed: 0_level_0,Unnamed: 1_level_0,Deciduous woodland,Coniferous woodland,Arable,Improve grassland,Neutral grassland,Calcareous grassland,Acid grassland,Fen,Heather,Heather grassland,...,Glyphosate,Mancozeb,Mecoprop-P,Metamitron,Pendimethalin,PropamocarbHydrochloride,Prosulfocarb,Sulphur,Tri-allate,Occurrence
y,x,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
341500.0,383500.0,4,0,32,63,0,0,0,0,0,0,...,9.627043,1.351183,3.312051,0.2550458,7.007259,0.1362472,4.358396,4.124463,3.608174,0
741500.0,292500.0,0,89,0,0,0,0,5,0,6,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
897500.0,392500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
406500.0,324500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
759500.0,401500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0


Unnamed: 0_level_0,Unnamed: 1_level_0,Deciduous woodland,Coniferous woodland,Arable,Improve grassland,Neutral grassland,Calcareous grassland,Acid grassland,Fen,Heather,Heather grassland,...,Glyphosate,Mancozeb,Mecoprop-P,Metamitron,Pendimethalin,PropamocarbHydrochloride,Prosulfocarb,Sulphur,Tri-allate,Occurrence
y,x,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
737500.0,89500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
301500.0,135500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
1179500.0,399500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
123500.0,95500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
338500.0,68500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0


Unnamed: 0_level_0,Unnamed: 1_level_0,Deciduous woodland,Coniferous woodland,Arable,Improve grassland,Neutral grassland,Calcareous grassland,Acid grassland,Fen,Heather,Heather grassland,...,Glyphosate,Mancozeb,Mecoprop-P,Metamitron,Pendimethalin,PropamocarbHydrochloride,Prosulfocarb,Sulphur,Tri-allate,Occurrence
y,x,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
580500.0,396500.0,0,0,0,89,0,0,10,0,0,1,...,0.278381,0.04016541,0.2789213,-3.4000000000000003e+38,0.1633772,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
569500.0,444500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
1148500.0,680500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
734500.0,166500.0,0,0,0,0,0,0,84,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
1199500.0,28500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0


Unnamed: 0_level_0,Unnamed: 1_level_0,Deciduous woodland,Coniferous woodland,Arable,Improve grassland,Neutral grassland,Calcareous grassland,Acid grassland,Fen,Heather,Heather grassland,...,Glyphosate,Mancozeb,Mecoprop-P,Metamitron,Pendimethalin,PropamocarbHydrochloride,Prosulfocarb,Sulphur,Tri-allate,Occurrence
y,x,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1218500.0,292500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
817500.0,169500.0,15,35,0,2,0,0,0,0,0,48,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
844500.0,609500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
45500.0,17500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
611500.0,30500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0


Unnamed: 0_level_0,Unnamed: 1_level_0,Deciduous woodland,Coniferous woodland,Arable,Improve grassland,Neutral grassland,Calcareous grassland,Acid grassland,Fen,Heather,Heather grassland,...,Glyphosate,Mancozeb,Mecoprop-P,Metamitron,Pendimethalin,PropamocarbHydrochloride,Prosulfocarb,Sulphur,Tri-allate,Occurrence
y,x,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
500500.0,111500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
946500.0,325500.0,0,51,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
1207500.0,393500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
284500.0,387500.0,5,0,65,21,0,0,0,0,0,0,...,14.84014,48.46837,5.742386,21.92155,14.56819,13.43816,15.00157,6.773377,19.4797,0
126500.0,355500.0,0,0,87,13,0,0,0,0,0,0,...,30.01548,4.629825,4.704039,4.689373,16.93968,2.371009,13.98251,1.799003,9.419214,0


Unnamed: 0_level_0,Unnamed: 1_level_0,Deciduous woodland,Coniferous woodland,Arable,Improve grassland,Neutral grassland,Calcareous grassland,Acid grassland,Fen,Heather,Heather grassland,...,Glyphosate,Mancozeb,Mecoprop-P,Metamitron,Pendimethalin,PropamocarbHydrochloride,Prosulfocarb,Sulphur,Tri-allate,Occurrence
y,x,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1114500.0,272500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
125500.0,70500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
71500.0,637500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
575500.0,77500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
596500.0,642500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0


Unnamed: 0_level_0,Unnamed: 1_level_0,Deciduous woodland,Coniferous woodland,Arable,Improve grassland,Neutral grassland,Calcareous grassland,Acid grassland,Fen,Heather,Heather grassland,...,Glyphosate,Mancozeb,Mecoprop-P,Metamitron,Pendimethalin,PropamocarbHydrochloride,Prosulfocarb,Sulphur,Tri-allate,Occurrence
y,x,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
672500.0,570500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
46500.0,186500.0,4,0,14,38,0,19,0,1,0,0,...,0.6125534,0.07539666,0.7223202,-3.4000000000000003e+38,0.2855014,-3.4000000000000003e+38,0.04064706,0.02992747,0.07541182,0
1099500.0,197500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
190500.0,654500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
925500.0,130500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0


Unnamed: 0_level_0,Unnamed: 1_level_0,Deciduous woodland,Coniferous woodland,Arable,Improve grassland,Neutral grassland,Calcareous grassland,Acid grassland,Fen,Heather,Heather grassland,...,Glyphosate,Mancozeb,Mecoprop-P,Metamitron,Pendimethalin,PropamocarbHydrochloride,Prosulfocarb,Sulphur,Tri-allate,Occurrence
y,x,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
123500.0,517500.0,12,0,42,44,0,0,0,0,0,0,...,19.96625,11.7806,3.83259,12.71289,12.11293,7.890797,19.66655,3.548662,15.9121,0
518500.0,574500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
553500.0,271500.0,8,0,0,84,0,0,0,0,0,4,...,0.5108674,0.07631438,0.3170867,-3.4000000000000003e+38,0.2142551,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
1267500.0,485500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
531500.0,520500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0


Unnamed: 0_level_0,Unnamed: 1_level_0,Deciduous woodland,Coniferous woodland,Arable,Improve grassland,Neutral grassland,Calcareous grassland,Acid grassland,Fen,Heather,Heather grassland,...,Glyphosate,Mancozeb,Mecoprop-P,Metamitron,Pendimethalin,PropamocarbHydrochloride,Prosulfocarb,Sulphur,Tri-allate,Occurrence
y,x,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
626500.0,331500.0,0,0,0,0,0,0,100,0,0,0,...,2.226481e-06,3.280333e-07,1.394949e-06,-3.4000000000000003e+38,8.527649e-07,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
980500.0,404500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
1090500.0,454500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
850500.0,603500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0
824500.0,613500.0,0,0,0,0,0,0,0,0,0,0,...,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,0


In [3]:
# Data Cleaning
for dict in df_dicts:
    cur_df = dict["dataframe"]
    cur_df_name = dict["name"]

    print(f'{cur_df_name} data before drop: \n {cur_df.value_counts("Occurrence")} \n')
    
    no_occurences = cur_df[cur_df['Occurrence']==0].index
    sample_size = sum(cur_df['Occurrence']==0) - total_birds + sum(cur_df['Occurrence']==1)
    random_indices = np.random.choice(no_occurences, sample_size, replace=False)
    dict["dataframe"] =  cur_df.drop(random_indices)
    
    print(f'{cur_df_name} data after drop: \n {dict["dataframe"].value_counts("Occurrence")} \n')


# for dict in df_dicts:
#     cur_df = dict["dataframe"]
#     cur_df_name = dict["name"]

#     print(f'{cur_df_name} data before drop: \n {cur_df.value_counts("Occurrence")} \n')
    
#     no_occurences = cur_df[cur_df['Occurrence']==0].index
#     sample_size = sum(cur_df['Occurrence']==0) - sum(cur_df['Occurrence']==1)
#     random_indices = np.random.choice(no_occurences, sample_size, replace=False)
#     dict["dataframe"] =  cur_df.drop(random_indices)
    
#     print(f'{cur_df_name} data after drop: \n {dict["dataframe"].value_counts("Occurrence")} \n')

Barnacle Goose 1km data before drop: 
 Occurrence
0    909231
1       769
dtype: int64 

Barnacle Goose 1km data after drop: 
 Occurrence
0    32315
1      769
dtype: int64 

Canada Goose 1km data before drop: 
 Occurrence
0    899853
1     10147
dtype: int64 

Canada Goose 1km data after drop: 
 Occurrence
0    22937
1    10147
dtype: int64 

Egyptian Goose 1km data before drop: 
 Occurrence
0    909137
1       863
dtype: int64 

Egyptian Goose 1km data after drop: 
 Occurrence
0    32221
1      863
dtype: int64 

Gadwall 1km data before drop: 
 Occurrence
0    907795
1      2205
dtype: int64 

Gadwall 1km data after drop: 
 Occurrence
0    30879
1     2205
dtype: int64 

Goshawk 1km data before drop: 
 Occurrence
0    909554
1       446
dtype: int64 

Goshawk 1km data after drop: 
 Occurrence
0    32638
1      446
dtype: int64 

Grey Partridge 1km data before drop: 
 Occurrence
0    907877
1      2123
dtype: int64 

Grey Partridge 1km data after drop: 
 Occurrence
0    30961
1     21

In [4]:
# Standardisation
def standardise(X):
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Add headers back
    X_scaled_df = pd.DataFrame(X_scaled, columns=X.columns)

    # Revert 'Surface type' back to non-standardised column as it is a categorical feature
    X_scaled_df['Surface type'] = X['Surface type'].values
    return X_scaled_df

In [5]:
# Feature Selection

# Check if any columns have NaN in them
# nan_columns = []
# for column in X_scaled_df:
#     if X_scaled_df[column].isnull().values.any():
#         nan_columns.append(column)
# print(nan_columns if len(nan_columns)!= 0 else 'None')


# Using ANOVA F-Score as a feature selection method
def feature_select(X, y):
    k_nums = [10, 15, 20, 25, 30, 35]
    kbest_dict = {}
    for num in k_nums:
        # Needs to be 1d array, y.values.ravel() converts y into a 1d array
        best_X = SelectKBest(f_classif, k=num).fit(X, y.values.ravel())
        kbest_dict[str(num)] = best_X.get_feature_names_out().tolist()
    kbest_dict['40'] = list(X.columns)

    best_X = SelectKBest(f_classif, k='all').fit(X, y.values.ravel())

    feat_scores = pd.DataFrame()
    feat_scores["F Score"] = best_X.scores_
    feat_scores["P Value"] = best_X.pvalues_
    feat_scores["Attribute"] = X.columns
    kbest_dict['Dataframe'] = feat_scores.sort_values(["F Score", "P Value"], ascending=[False, False])

    print(f'K-Best Features Dataframe: \n{kbest_dict["Dataframe"]} \n')
    # print(json.dumps(kbest_dict, indent=4))
    return kbest_dict

In [9]:
All_bird_occurrences = pd.DataFrame([(dict['name'],sum(dict['dataframe']['Occurrence'] == 1)) for dict in df_dicts], columns=['Name', 'Occurrence Count'])
All_bird_occurrences['Percentage'] = All_bird_occurrences['Occurrence Count']/total_birds

All_bird_occurrences.sort_values('Occurrence Count', ascending=False)

Unnamed: 0,Name,Occurrence Count,Percentage
9,Mute Swan 1km,19124,0.578044
1,Canada Goose 1km,10147,0.306704
10,Pheasant 1km,5855,0.176974
16,Rock Dove 1km,3919,0.118456
7,Little Owl 1km,3548,0.107242
14,Red-legged Partridge 1km,2953,0.089258
11,Pink-footed Goose 1km,2646,0.079978
19,Wigeon 1km,2317,0.070034
3,Gadwall 1km,2205,0.066649
5,Grey Partridge 1km,2123,0.06417


In [7]:
# Add model pipeline
seed = 42
verbose = False

estimators = [
    ('lr', LogisticRegression(max_iter=10000, solver='saga', random_state=seed, penalty='l2', verbose=verbose)),
    ('knn', KNeighborsClassifier(n_neighbors=5, weights='distance')),
    ('sgd', SGDClassifier( max_iter=10000, loss='modified_huber', random_state=seed, penalty='l2', verbose=verbose)),
    ('rf', RandomForestClassifier(n_estimators=20,max_features=None, random_state=seed, verbose=verbose))
]


for dict in df_dicts:
    print(f'Training with {dict["name"]} cells... \n')
    # coords, X, y = data['dataframe'].iloc[:, :2], data['dataframe'].iloc[:, 2:-1], data['dataframe'].iloc[:, [-1]]
    # data['coords'] = coords
    
    X, y = dict['dataframe'].iloc[:, 0:-1], dict['dataframe'].iloc[:, [-1]], 

    dict['X'] = standardise(X)
    dict['y'] = y
    dict['kbest'] = feature_select(X, y)

    X_train, X_test, y_train, y_test = train_test_split(dict['X'], dict['y'], random_state=seed)
    dict['X_train'], dict['X_test'], dict['y_train'], dict['y_test'] = X_train, X_test, y_train, y_test


    stack_clf = StackingClassifier(
        estimators=estimators, 
        final_estimator=GradientBoostingClassifier(n_estimators=20, learning_rate=0.5, max_features=None, max_depth=2, random_state=seed)
    )

    stack_clf.fit(X_train, y_train)
    y_pred = stack_clf.predict(X_test)
    
    dict['predictions'] = y_pred
    dict['report'] = classification_report(y_test, y_pred)
    
    print(f'Classification report: \n {dict["report"]} \n')



Training with Barnacle Goose 1km cells... 

K-Best Features Dataframe: 
        F Score        P Value                      Attribute
18  1572.310332   0.000000e+00                      Saltmarsh
25  1555.917994   0.000000e+00   Inflowing drainage direction
29  1553.503572   0.000000e+00                 Chlorothalonil
30  1553.503572   0.000000e+00                     Glyphosate
31  1553.503572   0.000000e+00                       Mancozeb
32  1553.503572   0.000000e+00                     Mecoprop-P
34  1553.503572   0.000000e+00                  Pendimethalin
23  1392.156996  1.672789e-298                   Surface type
22  1237.825824  3.015810e-266      Cumulative catchment area
24  1195.240618  2.546797e-257  Outflowing drainage direction
21  1187.867317  8.968456e-256                      Elevation
15  1056.190657  4.291752e-228         Supralittoral sediment
13  1055.674487  5.512584e-228                     Freshwater
17   924.239533  3.025910e-200              Littoral sedimen

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Classification report: 
               precision    recall  f1-score   support

           0       0.98      1.00      0.99      8080
           1       0.41      0.12      0.19       191

    accuracy                           0.98      8271
   macro avg       0.70      0.56      0.59      8271
weighted avg       0.97      0.98      0.97      8271
 

Training with Canada Goose 1km cells... 

K-Best Features Dataframe: 
         F Score        P Value                      Attribute
29  30961.185106   0.000000e+00                 Chlorothalonil
30  30961.185106   0.000000e+00                     Glyphosate
31  30961.185106   0.000000e+00                       Mancozeb
32  30961.185106   0.000000e+00                     Mecoprop-P
34  30961.185106   0.000000e+00                  Pendimethalin
23  27930.947660   0.000000e+00                   Surface type
26  27285.748416   0.000000e+00                   Fertiliser K
27  27285.748416   0.000000e+00                   Fertiliser N
28  27285

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Classification report: 
               precision    recall  f1-score   support

           0       0.94      0.86      0.90      5733
           1       0.74      0.88      0.81      2538

    accuracy                           0.87      8271
   macro avg       0.84      0.87      0.85      8271
weighted avg       0.88      0.87      0.87      8271
 

Training with Egyptian Goose 1km cells... 

K-Best Features Dataframe: 
        F Score        P Value                      Attribute
26  4895.111887   0.000000e+00                   Fertiliser K
27  4895.111887   0.000000e+00                   Fertiliser N
28  4895.111887   0.000000e+00                   Fertiliser P
22  4037.549196   0.000000e+00      Cumulative catchment area
29  3224.279226   0.000000e+00                 Chlorothalonil
30  3224.279226   0.000000e+00                     Glyphosate
31  3224.279226   0.000000e+00                       Mancozeb
32  3224.279226   0.000000e+00                     Mecoprop-P
34  3224.279226 

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Classification report: 
               precision    recall  f1-score   support

           0       0.99      1.00      0.99      8052
           1       0.76      0.46      0.57       219

    accuracy                           0.98      8271
   macro avg       0.87      0.73      0.78      8271
weighted avg       0.98      0.98      0.98      8271
 

Training with Gadwall 1km cells... 

K-Best Features Dataframe: 
        F Score        P Value                      Attribute
26  8423.681171   0.000000e+00                   Fertiliser K
27  8423.681171   0.000000e+00                   Fertiliser N
28  8423.681171   0.000000e+00                   Fertiliser P
32  7204.025331   0.000000e+00                     Mecoprop-P
29  7202.526421   0.000000e+00                 Chlorothalonil
30  7202.526421   0.000000e+00                     Glyphosate
31  7202.526421   0.000000e+00                       Mancozeb
34  7202.526421   0.000000e+00                  Pendimethalin
37  6669.832115   0.000

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Classification report: 
               precision    recall  f1-score   support

           0       0.97      0.98      0.97      7725
           1       0.67      0.50      0.57       546

    accuracy                           0.95      8271
   macro avg       0.82      0.74      0.77      8271
weighted avg       0.95      0.95      0.95      8271
 

Training with Goshawk 1km cells... 

K-Best Features Dataframe: 
        F Score        P Value                      Attribute
23  1230.786467  8.998959e-265                   Surface type
21  1136.658592  5.062553e-245                      Elevation
29  1093.763914  5.286183e-236                 Chlorothalonil
31  1093.763914  5.286183e-236                       Mancozeb
32  1093.763914  5.286183e-236                     Mecoprop-P
30  1093.534232  5.908334e-236                     Glyphosate
34  1093.534232  5.908334e-236                  Pendimethalin
24  1092.495315  9.773431e-236  Outflowing drainage direction
22  1024.446292  2.0993

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Classification report: 
               precision    recall  f1-score   support

           0       0.99      1.00      0.99      8166
           1       0.50      0.02      0.04       105

    accuracy                           0.99      8271
   macro avg       0.74      0.51      0.52      8271
weighted avg       0.98      0.99      0.98      8271
 

Training with Grey Partridge 1km cells... 

K-Best Features Dataframe: 
        F Score        P Value                      Attribute
26  8930.052071   0.000000e+00                   Fertiliser K
27  8930.052071   0.000000e+00                   Fertiliser N
28  8930.052071   0.000000e+00                   Fertiliser P
38  8601.028632   0.000000e+00                     Tri-allate
37  8583.983150   0.000000e+00                        Sulphur
36  8578.217049   0.000000e+00                   Prosulfocarb
2   8228.752418   0.000000e+00                         Arable
29  7819.171381   0.000000e+00                 Chlorothalonil
30  7819.171381 

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Classification report: 
               precision    recall  f1-score   support

           0       0.93      1.00      0.96      7701
           1       0.16      0.01      0.02       570

    accuracy                           0.93      8271
   macro avg       0.54      0.50      0.49      8271
weighted avg       0.88      0.93      0.90      8271
 

Training with Indian Peafowl 1km cells... 

K-Best Features Dataframe: 
        F Score        P Value                      Attribute
26  1454.334656  1.860180e-311                   Fertiliser K
27  1454.334656  1.860180e-311                   Fertiliser N
28  1454.334656  1.860180e-311                   Fertiliser P
36  1170.087237  4.829586e-252                   Prosulfocarb
37  1168.826240  8.883996e-252                        Sulphur
38  1165.686509  4.052467e-251                     Tri-allate
31  1136.820393  4.681363e-245                       Mancozeb
32  1136.820393  4.681363e-245                     Mecoprop-P
29  1136.605140 

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Classification report: 
               precision    recall  f1-score   support

           0       0.99      1.00      1.00      8197
           1       0.00      0.00      0.00        74

    accuracy                           0.99      8271
   macro avg       0.50      0.50      0.50      8271
weighted avg       0.98      0.99      0.99      8271
 

Training with Little Owl 1km cells... 

K-Best Features Dataframe: 
         F Score        P Value                      Attribute
26  19185.856510   0.000000e+00                   Fertiliser K
27  19185.856510   0.000000e+00                   Fertiliser N
28  19185.856510   0.000000e+00                   Fertiliser P
36  13986.404351   0.000000e+00                   Prosulfocarb
37  13970.376131   0.000000e+00                        Sulphur
38  13919.558480   0.000000e+00                     Tri-allate
29  13428.446713   0.000000e+00                 Chlorothalonil
30  13428.446713   0.000000e+00                     Glyphosate
31  13428.4

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Classification report: 
               precision    recall  f1-score   support

           0       0.94      0.95      0.95      7438
           1       0.53      0.49      0.51       833

    accuracy                           0.90      8271
   macro avg       0.73      0.72      0.73      8271
weighted avg       0.90      0.90      0.90      8271
 

Training with Mandarin Duck 1km cells... 

K-Best Features Dataframe: 
        F Score        P Value                      Attribute
26  5131.456760   0.000000e+00                   Fertiliser K
27  5131.456760   0.000000e+00                   Fertiliser N
28  5131.456760   0.000000e+00                   Fertiliser P
29  3878.004167   0.000000e+00                 Chlorothalonil
30  3878.004167   0.000000e+00                     Glyphosate
31  3878.004167   0.000000e+00                       Mancozeb
32  3878.004167   0.000000e+00                     Mecoprop-P
34  3878.004167   0.000000e+00                  Pendimethalin
0   3576.924171  

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Classification report: 
               precision    recall  f1-score   support

           0       0.98      0.99      0.99      8026
           1       0.53      0.32      0.40       245

    accuracy                           0.97      8271
   macro avg       0.75      0.65      0.69      8271
weighted avg       0.97      0.97      0.97      8271
 

Training with Mute Swan 1km cells... 

K-Best Features Dataframe: 
         F Score        P Value                      Attribute
30  44820.761242   0.000000e+00                     Glyphosate
34  44820.761242   0.000000e+00                  Pendimethalin
29  44802.890928   0.000000e+00                 Chlorothalonil
31  44785.031708   0.000000e+00                       Mancozeb
32  44767.183570   0.000000e+00                     Mecoprop-P
23  42947.806279   0.000000e+00                   Surface type
25  37956.118930   0.000000e+00   Inflowing drainage direction
21  32928.523256   0.000000e+00                      Elevation
24  25171.42

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Classification report: 
               precision    recall  f1-score   support

           0       0.93      0.83      0.88      3492
           1       0.88      0.96      0.92      4779

    accuracy                           0.90      8271
   macro avg       0.91      0.89      0.90      8271
weighted avg       0.90      0.90      0.90      8271
 

Training with Pheasant 1km cells... 

K-Best Features Dataframe: 
         F Score       P Value                      Attribute
29  16125.592311  0.000000e+00                 Chlorothalonil
30  16125.592311  0.000000e+00                     Glyphosate
31  16125.592311  0.000000e+00                       Mancozeb
32  16125.592311  0.000000e+00                     Mecoprop-P
34  16125.592311  0.000000e+00                  Pendimethalin
23  14918.912287  0.000000e+00                   Surface type
26  13591.397026  0.000000e+00                   Fertiliser K
27  13591.397026  0.000000e+00                   Fertiliser N
28  13591.397026  0.00

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Classification report: 
               precision    recall  f1-score   support

           0       0.91      0.89      0.90      6747
           1       0.57      0.63      0.60      1524

    accuracy                           0.84      8271
   macro avg       0.74      0.76      0.75      8271
weighted avg       0.85      0.84      0.85      8271
 

Training with Pink-footed Goose 1km cells... 

K-Best Features Dataframe: 
        F Score        P Value                      Attribute
29  6883.229428   0.000000e+00                 Chlorothalonil
30  6883.229428   0.000000e+00                     Glyphosate
31  6883.229428   0.000000e+00                       Mancozeb
32  6883.229428   0.000000e+00                     Mecoprop-P
34  6883.229428   0.000000e+00                  Pendimethalin
23  5677.204457   0.000000e+00                   Surface type
25  5654.731792   0.000000e+00   Inflowing drainage direction
37  5300.151096   0.000000e+00                        Sulphur
36  5275.9879

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Classification report: 
               precision    recall  f1-score   support

           0       0.95      0.98      0.96      7595
           1       0.63      0.43      0.51       676

    accuracy                           0.93      8271
   macro avg       0.79      0.71      0.74      8271
weighted avg       0.92      0.93      0.93      8271
 

Training with Pintail 1km cells... 

K-Best Features Dataframe: 
        F Score        P Value                      Attribute
29  1309.707036  2.738523e-281                 Chlorothalonil
30  1309.707036  2.738523e-281                     Glyphosate
31  1309.707036  2.738523e-281                       Mancozeb
32  1309.707036  2.738523e-281                     Mecoprop-P
34  1309.707036  2.738523e-281                  Pendimethalin
25  1296.961484  1.265648e-278   Inflowing drainage direction
23  1139.764453  1.126681e-245                   Surface type
36  1129.215730  1.855356e-243                   Prosulfocarb
37  1127.107684  5.1461

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Classification report: 
               precision    recall  f1-score   support

           0       0.98      1.00      0.99      8079
           1       0.33      0.02      0.03       192

    accuracy                           0.98      8271
   macro avg       0.66      0.51      0.51      8271
weighted avg       0.96      0.98      0.97      8271
 

Training with Pochard 1km cells... 

K-Best Features Dataframe: 
        F Score        P Value                      Attribute
26  2484.489301   0.000000e+00                   Fertiliser K
27  2484.489301   0.000000e+00                   Fertiliser N
28  2484.489301   0.000000e+00                   Fertiliser P
29  2380.704334   0.000000e+00                 Chlorothalonil
31  2380.704334   0.000000e+00                       Mancozeb
32  2380.704334   0.000000e+00                     Mecoprop-P
30  2380.194618   0.000000e+00                     Glyphosate
34  2380.194618   0.000000e+00                  Pendimethalin
37  2154.073576   0.000

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Classification report: 
               precision    recall  f1-score   support

           0       0.96      1.00      0.98      7976
           1       0.14      0.00      0.01       295

    accuracy                           0.96      8271
   macro avg       0.55      0.50      0.49      8271
weighted avg       0.94      0.96      0.95      8271
 

Training with Red-legged Partridge 1km cells... 

K-Best Features Dataframe: 
         F Score        P Value                      Attribute
26  13608.143410   0.000000e+00                   Fertiliser K
27  13608.143410   0.000000e+00                   Fertiliser N
28  13608.143410   0.000000e+00                   Fertiliser P
37  11722.706667   0.000000e+00                        Sulphur
38  11715.544994   0.000000e+00                     Tri-allate
36  11709.410362   0.000000e+00                   Prosulfocarb
29  10810.751816   0.000000e+00                 Chlorothalonil
30  10810.751816   0.000000e+00                     Glyphosate
3

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Classification report: 
               precision    recall  f1-score   support

           0       0.92      0.99      0.95      7538
           1       0.51      0.09      0.15       733

    accuracy                           0.91      8271
   macro avg       0.71      0.54      0.55      8271
weighted avg       0.88      0.91      0.88      8271
 

Training with Ring-necked Parakeet 1km cells... 

K-Best Features Dataframe: 
        F Score        P Value                      Attribute
19  5031.366743   0.000000e+00                          Urban
20  4777.694309   0.000000e+00                       Suburban
26  2539.786602   0.000000e+00                   Fertiliser K
27  2539.786602   0.000000e+00                   Fertiliser N
28  2539.786602   0.000000e+00                   Fertiliser P
22  2146.454037   0.000000e+00      Cumulative catchment area
29  1756.478050   0.000000e+00                 Chlorothalonil
30  1756.478050   0.000000e+00                     Glyphosate
31  1756.4

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Classification report: 
               precision    recall  f1-score   support

           0       1.00      0.98      0.99      8142
           1       0.34      0.71      0.46       129

    accuracy                           0.97      8271
   macro avg       0.67      0.84      0.72      8271
weighted avg       0.99      0.97      0.98      8271
 

Training with Rock Dove 1km cells... 

K-Best Features Dataframe: 
         F Score       P Value                      Attribute
29  12291.076586  0.000000e+00                 Chlorothalonil
31  12291.076586  0.000000e+00                       Mancozeb
32  12291.076586  0.000000e+00                     Mecoprop-P
30  12288.365426  0.000000e+00                     Glyphosate
34  12288.365426  0.000000e+00                  Pendimethalin
23   9854.175473  0.000000e+00                   Surface type
26   9684.742000  0.000000e+00                   Fertiliser K
27   9684.742000  0.000000e+00                   Fertiliser N
28   9684.742000  0.0

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Classification report: 
               precision    recall  f1-score   support

           0       0.90      0.98      0.94      7280
           1       0.62      0.22      0.33       991

    accuracy                           0.89      8271
   macro avg       0.76      0.60      0.63      8271
weighted avg       0.87      0.89      0.87      8271
 

Training with Ruddy Duck 1km cells... 

K-Best Features Dataframe: 
        F Score        P Value                      Attribute
13  1677.200441   0.000000e+00                     Freshwater
26   620.634967  9.733386e-136                   Fertiliser K
27   620.634967  9.733386e-136                   Fertiliser N
28   620.634967  9.733386e-136                   Fertiliser P
24   554.681726  1.212712e-121  Outflowing drainage direction
22   466.554370  9.254500e-103      Cumulative catchment area
29   419.923131   9.567252e-93                 Chlorothalonil
30   419.923131   9.567252e-93                     Glyphosate
31   419.923131   9.

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Classification report: 
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      8246
           1       0.00      0.00      0.00        25

    accuracy                           1.00      8271
   macro avg       0.50      0.50      0.50      8271
weighted avg       0.99      1.00      1.00      8271
 

Training with Whooper Swan 1km cells... 

K-Best Features Dataframe: 
        F Score        P Value                      Attribute
25  1669.363045   0.000000e+00   Inflowing drainage direction
23  1541.475695   0.000000e+00                   Surface type
21  1387.931811  1.272226e-297                      Elevation
29  1365.037363  7.601549e-293                 Chlorothalonil
30  1365.037363  7.601549e-293                     Glyphosate
31  1365.037363  7.601549e-293                       Mancozeb
32  1365.037363  7.601549e-293                     Mecoprop-P
34  1365.037363  7.601549e-293                  Pendimethalin
24  1295.195499  2

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Classification report: 
               precision    recall  f1-score   support

           0       0.97      1.00      0.98      8012
           1       0.15      0.01      0.01       259

    accuracy                           0.97      8271
   macro avg       0.56      0.50      0.50      8271
weighted avg       0.94      0.97      0.95      8271
 

Training with Wigeon 1km cells... 

K-Best Features Dataframe: 
        F Score        P Value                      Attribute
32  4778.562566   0.000000e+00                     Mecoprop-P
29  4777.502052   0.000000e+00                 Chlorothalonil
30  4777.502052   0.000000e+00                     Glyphosate
31  4777.502052   0.000000e+00                       Mancozeb
34  4777.502052   0.000000e+00                  Pendimethalin
25  4531.339838   0.000000e+00   Inflowing drainage direction
23  4346.704608   0.000000e+00                   Surface type
21  3770.373168   0.000000e+00                      Elevation
37  3295.059573   0.0000

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Classification report: 
               precision    recall  f1-score   support

           0       0.94      0.99      0.97      7704
           1       0.55      0.16      0.25       567

    accuracy                           0.93      8271
   macro avg       0.75      0.58      0.61      8271
weighted avg       0.91      0.93      0.92      8271
 



In [8]:
for dict in df_dicts:
    print(dict['name'], '\n',dict['report'])

Barnacle Goose 1km 
               precision    recall  f1-score   support

           0       0.98      1.00      0.99      8080
           1       0.41      0.12      0.19       191

    accuracy                           0.98      8271
   macro avg       0.70      0.56      0.59      8271
weighted avg       0.97      0.98      0.97      8271

Canada Goose 1km 
               precision    recall  f1-score   support

           0       0.94      0.86      0.90      5733
           1       0.74      0.88      0.81      2538

    accuracy                           0.87      8271
   macro avg       0.84      0.87      0.85      8271
weighted avg       0.88      0.87      0.87      8271

Egyptian Goose 1km 
               precision    recall  f1-score   support

           0       0.99      1.00      0.99      8052
           1       0.76      0.46      0.57       219

    accuracy                           0.98      8271
   macro avg       0.87      0.73      0.78      8271
weighted avg 