In [1]:
import rasterio
import rasterio.plot
import pandas as pd
import matplotlib
import geopandas as gpd
import numpy as np
import rioxarray

####Objective####
# Label data
# Start creating a machine learning pipeline
# Upsample data to 10m and 25m cells


# <<< INVASIVE SPECIES MAP >>>
INVASIVE_BIRDS_PATH = 'Datasets/birbs.tif'


# <<< LAND COVER MAP >>>
# Dimensions: 700000x1300000
LAND_COVER_MAP_PATH = 'Datasets/Digimap/a3ff9411-3a7a-47e1-9b3e-79f21648237d/data/gb2021lcm1km_percentage_target.tif'

# <<< FERTILISER >>>
# Dimensions: 651000x1216000
# The dataset consists of maps of the predicted average annual application rates (2010-2015) of three different inorganic 
# chemical fertilisers – nitrogen (N), phosphorus (P) and potassium (K) – in England across a six-year period, along with 
# their respective estimates of uncertainty, at a 1 km x 1 km resolution. 
FERTILISER_K_PATH = 'Datasets/Digimap/Land-Cover-plus-Fertilisers_4731497/fertiliser_k_prediction_uncertainty.tif'
FERTILISER_N_PATH = 'Datasets/Digimap/Land-Cover-plus-Fertilisers_4731497/fertiliser_n_prediction_uncertainty.tif'
FERTILISER_P_PATH = 'Datasets/Digimap/Land-Cover-plus-Fertilisers_4731497/fertiliser_p_prediction_uncertainty.tif'

# <<< PESTICIDE >>>
# Dimensions: 596000x1209000
PESTICIDE_PATH = 'Datasets\Digimap\Land-Cover-plus-Pesticides_4719434\Amidosulfuron.tif'


# <<< INTEGRATED HYDROLOGICAL DIGITAL TERRAIN MODEL >>>
# Dimensions: 700000x1300000
# These datasets all only have one band
ELEVATION_PATH = 'Datasets\Digimap\ihdtm-2016_4731496\HGHT_1km.tif'
CUMULATIVE_CATCHMENT_AREA_PATH = 'Datasets\Digimap\ihdtm-2016_4731496\CCAR_1km.tif'
SURFACE_TYPE_PATH = 'Datasets\Digimap\ihdtm-2016_4731496\SURF_1km.tif'
OUTFLOWING_DRAINAGE_DIRECTION_PATH = 'Datasets\Digimap\ihdtm-2016_4731496\OUTF_1km.tif'
INFLOWING_DRAINAGE_PATTERN_PATH = 'Datasets\Digimap\ihdtm-2016_4731496\INFL_1km.tif'

IHDTM = {
    'HGHT': ELEVATION_PATH, 
    'CCAR': CUMULATIVE_CATCHMENT_AREA_PATH, 
    'SURF': SURFACE_TYPE_PATH, 
    'OUTF': OUTFLOWING_DRAINAGE_DIRECTION_PATH, 
    'INFL': INFLOWING_DRAINAGE_PATTERN_PATH
}


# <<< Opening in rasterio >>>
# dataset = rasterio.open(FERTILISER_P_PATH)
# data = dataset.read()
# np.max(data)

# <<< Opening in rioxarray >>>
# dataset = rioxarray.open_rasterio(ELEVATION_PATH)
# dataset.name = 'data'
# df = dataset.to_dataframe()


In [2]:
lcm = rioxarray.open_rasterio(LAND_COVER_MAP_PATH)
lcm.name = 'data'
main_df = lcm.to_dataframe().drop(columns='spatial_ref')
print(main_df.value_counts())
main_df = main_df.unstack(level='band')

LCM_CLASSES = [
    'Deciduous woodland', 
    'Coniferous woodland', 
    'Arable', 
    'Improve grassland', 
    'Neutral grassland', 
    'Calcareous grassland', 
    'Acid grassland',  
    'Fen', 
    'Heather', 
    'Heather grassland', 
    'Bog',
    'Inland rock', 
    'Saltwater',
    'Freshwater',
    'Supralittoral rock',
    'Supralittoral sediment',
    'Littoral rock',
    'Littoral sediment',
    'Saltmarsh',
    'Urban',
    'Suburban'
    ]

main_df = main_df['data'].rename(columns={i+1: j for i, j in enumerate(LCM_CLASSES)})
main_df

data
0       18203710
1         123393
2          71220
3          51085
4          39528
          ...   
89          3191
93          3155
87          3143
82          3140
90          3129
Length: 101, dtype: int64


Unnamed: 0_level_0,band,Deciduous woodland,Coniferous woodland,Arable,Improve grassland,Neutral grassland,Calcareous grassland,Acid grassland,Fen,Heather,Heather grassland,...,Inland rock,Saltwater,Freshwater,Supralittoral rock,Supralittoral sediment,Littoral rock,Littoral sediment,Saltmarsh,Urban,Suburban
y,x,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1299500.0,500.0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1299500.0,1500.0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1299500.0,2500.0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1299500.0,3500.0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1299500.0,4500.0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
500.0,695500.0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
500.0,696500.0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
500.0,697500.0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
500.0,698500.0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [3]:
for key in IHDTM:
    ihdtm_data = rioxarray.open_rasterio(IHDTM[key])
    ihdtm_data = ihdtm_data.squeeze().drop("spatial_ref").drop("band")
    ihdtm_data.name = key
    ihdtm_df = ihdtm_data.to_dataframe()

    # Adding 25 to x and y coordinates to match other datasets
    # Make this into a function for the other IHDTM datasets
    ihdtm_df.index = ihdtm_df.index.set_levels(ihdtm_df.index.levels[0]+25, level=0)
    ihdtm_df.index = ihdtm_df.index.set_levels(ihdtm_df.index.levels[1]+25, level=1)
    main_df = main_df.join(ihdtm_df)
main_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Deciduous woodland,Coniferous woodland,Arable,Improve grassland,Neutral grassland,Calcareous grassland,Acid grassland,Fen,Heather,Heather grassland,...,Littoral rock,Littoral sediment,Saltmarsh,Urban,Suburban,HGHT,CCAR,SURF,OUTF,INFL
y,x,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1299500.0,500.0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,-9999,-9999,-1,-1,255
1299500.0,1500.0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,-9999,-9999,-1,-1,255
1299500.0,2500.0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,-9999,-9999,-1,-1,255
1299500.0,3500.0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,-9999,-9999,-1,-1,255
1299500.0,4500.0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,-9999,-9999,-1,-1,255
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
500.0,695500.0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,-9999,-9999,-1,-1,255
500.0,696500.0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,-9999,-9999,-1,-1,255
500.0,697500.0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,-9999,-9999,-1,-1,255
500.0,698500.0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,-9999,-9999,-1,-1,255


In [4]:
fertiliser = {'fert_k' : FERTILISER_K_PATH, 'fert_n' : FERTILISER_N_PATH, 'fert_p' : FERTILISER_P_PATH}


for key in fertiliser:
    dataset = rioxarray.open_rasterio(fertiliser[key])
    dataset.name = key
    df = dataset.to_dataframe().drop(columns='spatial_ref')
    df = df.drop(index=2).droplevel('band')
    main_df = main_df.join(df)
main_df



Unnamed: 0_level_0,Unnamed: 1_level_0,Deciduous woodland,Coniferous woodland,Arable,Improve grassland,Neutral grassland,Calcareous grassland,Acid grassland,Fen,Heather,Heather grassland,...,Urban,Suburban,HGHT,CCAR,SURF,OUTF,INFL,fert_k,fert_n,fert_p
y,x,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1299500.0,500.0,0,0,0,0,0,0,0,0,0,0,...,0,0,-9999,-9999,-1,-1,255,,,
1299500.0,1500.0,0,0,0,0,0,0,0,0,0,0,...,0,0,-9999,-9999,-1,-1,255,,,
1299500.0,2500.0,0,0,0,0,0,0,0,0,0,0,...,0,0,-9999,-9999,-1,-1,255,,,
1299500.0,3500.0,0,0,0,0,0,0,0,0,0,0,...,0,0,-9999,-9999,-1,-1,255,,,
1299500.0,4500.0,0,0,0,0,0,0,0,0,0,0,...,0,0,-9999,-9999,-1,-1,255,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
500.0,695500.0,0,0,0,0,0,0,0,0,0,0,...,0,0,-9999,-9999,-1,-1,255,,,
500.0,696500.0,0,0,0,0,0,0,0,0,0,0,...,0,0,-9999,-9999,-1,-1,255,,,
500.0,697500.0,0,0,0,0,0,0,0,0,0,0,...,0,0,-9999,-9999,-1,-1,255,,,
500.0,698500.0,0,0,0,0,0,0,0,0,0,0,...,0,0,-9999,-9999,-1,-1,255,,,


In [5]:
# Label data

dataset = rioxarray.open_rasterio(INVASIVE_BIRDS_PATH)
dataset.name = 'data'
df = dataset.squeeze().drop("spatial_ref").drop("band").to_dataframe()
df['data']

RasterioIOError: Datasets/: Permission denied

KeyError: "None of [MultiIndex([('294035', '394093')],\n           names=['y', 'x'])] are in the [index]"