In [1]:
import numpy as np
import rasterio
from rasterio.mask import mask
from rasterio.features import bounds as feature_bounds
import pandas as pd
import geopandas as gpd
import fiona

import numpy.ma as ma # for masked arrays. This allows us to handle NAs for integer arrays

import time 



#todo: align the subnational subtiffs to the overall 100m tif. 

In [2]:
nuts_path = "in_data/admin/Forest_area_PL.geojson"
current_nutsid = 'PL72'

In [3]:
admin_stats = "in_data/admin/PL_landcover.csv"

In [4]:
urban_mask_path = 'in_data/CLC/u2018_clc2018_v2020_20u1_raster100m/DATA/PL_urbanmask_1xx.tif'

In [5]:

foresttif_path    = "out_data/PL_expert_map_integer.tif"
grasslandtif_path = "in_data/Corine_landcover/Grass_ptobability_final_layer_Poland.tif"
croplandtif_path  = "in_data/Corine_landcover/Crops_ptobability_final_layer_Poland.tif"


In [6]:
aggregates = pd.read_csv(admin_stats)
aggregates['misc_ha'] = 0
aggregates['total_ha'] = 0
aggregates.loc[aggregates.NUTSID == "PL71",'total_ha'].item()

0

In [7]:
# Extract features (polygons) from the GeoJSON file using fiona
with fiona.open(nuts_path, "r") as geojson:
    features = [feature for feature in geojson]

In [8]:
# Extract features (polygons) from the GeoJSON file using fiona
with fiona.open(nuts_path, "r") as geojson:
    features = [feature for feature in geojson if feature['properties']['id'] == current_nutsid]

In [9]:
# here we just print the amout of data in a NUTS region

arrays = []

for input_path in [grasslandtif_path, croplandtif_path, foresttif_path]:
    
    
    print(input_path)
    with rasterio.open(input_path) as src:
        for feature in features:
            
            geometry = feature["geometry"]
            nutsid = feature['properties']['id']


            # Mask and crop the raster using the current polygon
            out_image, out_transform = mask(src, [geometry], invert=False, crop=True)
            
            west =  out_transform[2]
            north = out_transform[5]
            
            
            resolution = src.res  # (pixel width, pixel height)

            # Get CRS
            crs = src.crs
            if crs is not None:
                # Extract EPSG code
                epsg_code = crs.to_epsg()  # This will be None if the EPSG code can't be determined
            else:
                epsg_code = None

            affine = out_transform
            array = out_image
            array = np.squeeze(array, axis = 0)
            
            unmasked_pixels = np.count_nonzero(array != 255)
            
            # fill in the pandas admin data
            if input_path == 'in_data/Corine_landcover/Crops_ptobability_final_layer_Poland.tif':
                aggregates.loc[aggregates.NUTSID == nutsid,'total_ha'] = unmasked_pixels
            

            arrays.append(array)
            

            print(f"{nutsid}: {array.shape}. {unmasked_pixels} unmasked pixels")
            
print(epsg_code)
print(resolution)

in_data/Corine_landcover/Grass_ptobability_final_layer_Poland.tif
PL72: (1272, 1497). 1170883 unmasked pixels
in_data/Corine_landcover/Crops_ptobability_final_layer_Poland.tif
PL72: (1272, 1497). 1170883 unmasked pixels
out_data/PL_expert_map_integer.tif
PL72: (1272, 1497). 1148027 unmasked pixels
3035
(100.0, 100.0)


In [10]:
affine

Affine(100.0, 0.0, 5003700.0,
       0.0, -100.0, 3188700.0)

In [11]:
west

5003700.0

In [12]:
array3d = np.stack(arrays, axis = 0)


In [13]:
array3d

array([[[255, 255, 255, ..., 255, 255, 255],
        [255, 255, 255, ..., 255, 255, 255],
        [255, 255, 255, ..., 255, 255, 255],
        ...,
        [255, 255, 255, ..., 255, 255, 255],
        [255, 255, 255, ..., 255, 255, 255],
        [255, 255, 255, ..., 255, 255, 255]],

       [[255, 255, 255, ..., 255, 255, 255],
        [255, 255, 255, ..., 255, 255, 255],
        [255, 255, 255, ..., 255, 255, 255],
        ...,
        [255, 255, 255, ..., 255, 255, 255],
        [255, 255, 255, ..., 255, 255, 255],
        [255, 255, 255, ..., 255, 255, 255]],

       [[255, 255, 255, ..., 255, 255, 255],
        [255, 255, 255, ..., 255, 255, 255],
        [255, 255, 255, ..., 255, 255, 255],
        ...,
        [255, 255, 255, ..., 255, 255, 255],
        [255, 255, 255, ..., 255, 255, 255],
        [255, 255, 255, ..., 255, 255, 255]]], dtype=uint8)

In [14]:
aggregates['misc_ha'] = aggregates['total_ha'] - aggregates['forest_ha'] - aggregates['arable_ha'] - aggregates['grass_ha']

aggregates

Unnamed: 0,NUTSID,forest_ha,arable_ha,grass_ha,misc_ha,total_ha
0,PL,9254668,11009210,3149870,-23413748,0
1,PL92,830702,1224200,475490,-2530392,0
2,PL22,394773,278930,81700,-755403,0
3,PL51,595054,711890,131990,-1438934,0
4,PL52,250928,446400,37410,-734738,0
5,PL61,421942,973940,99600,-1495482,0
6,PL41,769351,1479650,237570,-2486571,0
7,PL62,762736,606550,314380,-1683666,0
8,PL42,817085,651860,153440,-1622385,0
9,PL63,667061,613740,127130,-1407931,0


In [15]:
array_mask = array3d == 255
array_mask.shape

(3, 1272, 1497)

In [16]:
masked_array = ma.array(array3d, mask=array_mask)
masked_array.mask

array([[[ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True],
        ...,
        [ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True]],

       [[ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True],
        ...,
        [ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True]],

       [[ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True],
        ...,
        [ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  Tr

In [17]:
# Flatten the array into a list of (row_index, column_index, value) tuples
flattened_with_indices = [(i, j, k, masked_array[i, j, k]) 
                          for i in range(masked_array.shape[0]) 
                          for j in range(masked_array.shape[1]) 
                          for k in range(masked_array.shape[2]) 
                          if not ma.is_masked(masked_array[i, j, k])]

flattened_with_indices

[(0, 0, 415, 31),
 (0, 1, 415, 31),
 (0, 1, 416, 0),
 (0, 1, 417, 23),
 (0, 1, 418, 31),
 (0, 1, 419, 0),
 (0, 1, 420, 0),
 (0, 1, 421, 0),
 (0, 2, 415, 0),
 (0, 2, 416, 0),
 (0, 2, 417, 53),
 (0, 2, 418, 31),
 (0, 2, 419, 0),
 (0, 2, 420, 0),
 (0, 2, 421, 0),
 (0, 2, 422, 0),
 (0, 2, 423, 0),
 (0, 2, 424, 53),
 (0, 2, 425, 31),
 (0, 2, 426, 0),
 (0, 2, 427, 0),
 (0, 3, 415, 0),
 (0, 3, 416, 0),
 (0, 3, 417, 0),
 (0, 3, 418, 0),
 (0, 3, 419, 31),
 (0, 3, 420, 0),
 (0, 3, 421, 0),
 (0, 3, 422, 0),
 (0, 3, 423, 0),
 (0, 3, 424, 23),
 (0, 3, 425, 0),
 (0, 3, 426, 31),
 (0, 3, 427, 0),
 (0, 3, 428, 0),
 (0, 3, 429, 0),
 (0, 3, 430, 0),
 (0, 3, 431, 0),
 (0, 4, 415, 0),
 (0, 4, 416, 0),
 (0, 4, 417, 0),
 (0, 4, 418, 53),
 (0, 4, 419, 0),
 (0, 4, 420, 0),
 (0, 4, 421, 0),
 (0, 4, 422, 0),
 (0, 4, 423, 0),
 (0, 4, 424, 23),
 (0, 4, 425, 0),
 (0, 4, 426, 0),
 (0, 4, 427, 0),
 (0, 4, 428, 0),
 (0, 4, 429, 0),
 (0, 4, 430, 0),
 (0, 4, 431, 0),
 (0, 4, 432, 0),
 (0, 4, 433, 0),
 (0, 4, 434, 0),
 

In [18]:
# Sort the list by value in descending order
flattened_sorted = sorted(flattened_with_indices, key=lambda x: x[3], reverse=True)
len(flattened_sorted)

3489793

In [19]:
flattened_sorted

[(0, 62, 467, 100),
 (0, 63, 467, 100),
 (0, 104, 399, 100),
 (0, 105, 399, 100),
 (0, 106, 398, 100),
 (0, 106, 399, 100),
 (0, 106, 400, 100),
 (0, 119, 881, 100),
 (0, 119, 882, 100),
 (0, 144, 861, 100),
 (0, 144, 862, 100),
 (0, 146, 855, 100),
 (0, 148, 856, 100),
 (0, 157, 780, 100),
 (0, 169, 769, 100),
 (0, 172, 770, 100),
 (0, 180, 427, 100),
 (0, 180, 770, 100),
 (0, 181, 769, 100),
 (0, 181, 770, 100),
 (0, 182, 769, 100),
 (0, 182, 770, 100),
 (0, 183, 764, 100),
 (0, 184, 763, 100),
 (0, 184, 764, 100),
 (0, 185, 761, 100),
 (0, 185, 762, 100),
 (0, 185, 763, 100),
 (0, 185, 764, 100),
 (0, 186, 761, 100),
 (0, 186, 762, 100),
 (0, 186, 763, 100),
 (0, 187, 260, 100),
 (0, 187, 261, 100),
 (0, 187, 263, 100),
 (0, 187, 264, 100),
 (0, 187, 265, 100),
 (0, 187, 761, 100),
 (0, 187, 762, 100),
 (0, 188, 261, 100),
 (0, 188, 264, 100),
 (0, 188, 265, 100),
 (0, 188, 266, 100),
 (0, 188, 267, 100),
 (0, 189, 250, 100),
 (0, 189, 254, 100),
 (0, 189, 264, 100),
 (0, 189, 265, 

In [20]:
d = [x[3] for x in flattened_sorted]

# Create an index for each float
index = list(range(len(d)))

# Create a scatter plot
plt.scatter(index, d)

# Adding labels for clarity (optional)
plt.xlabel('Index')
plt.ylabel('Value')
plt.title('Scatter Plot of Floats')

# Show the plot
plt.show()


NameError: name 'plt' is not defined

In [19]:
# we need to align the urban mask with the array!!!!

In [29]:
# Linda asked me to hard-code fix the urban areas. 
# load an urban mask. flatten it, only take the urban pixels and put them at the top of the flattened_sorted list so they get assigned first
# and added to the seen pixels set automatically. 

with rasterio.open(urban_mask_path) as src:
    urban_mask, urban_mask_transform = mask(src, [geometry], invert=False, crop=True)
    
#     urban_mask = ma.array(urban_mask, mask=array_mask[0,:,:])


fixed_pixels =  [(3, j, k, 100* urban_mask[0, j, k]) 
                              for j in range(masked_array.shape[1]) 
                              for k in range(masked_array.shape[2]) 
                              if not ma.is_masked(masked_array[0, j, k]) and urban_mask[0, j, k] == 1  ]


len(fixed_pixels)

84238

In [30]:
array.shape

(1272, 1497)

In [31]:
urban_mask.shape

(1, 1272, 1497)

In [32]:
flattened_sorted = fixed_pixels +  flattened_sorted 

flattened_sorted[:10]

[(3, 10, 415, 100),
 (3, 11, 415, 100),
 (3, 12, 415, 100),
 (3, 12, 423, 100),
 (3, 12, 424, 100),
 (3, 13, 415, 100),
 (3, 13, 416, 100),
 (3, 13, 417, 100),
 (3, 13, 418, 100),
 (3, 13, 419, 100)]

In [33]:
# this is from the input data
counter = [aggregates.loc[aggregates.NUTSID == nutsid,'grass_ha'].item(), 
          aggregates.loc[aggregates.NUTSID == nutsid,'arable_ha'].item(), 
          aggregates.loc[aggregates.NUTSID == nutsid,'forest_ha'].item()]

total_px = 1904184
# this is manually from Linda's PPT
# we need the order grass, crop, forest
counter = [98140, 464080, 339650 , len(fixed_pixels)]

In [34]:
flattened_sorted[15524]

(3, 330, 755, 100)

In [35]:
NUTSID = "PL91"
seen_px = set()
output = np.full(array3d.shape[1:3], 10, dtype = 'int')
output = ma.array(output, mask=array_mask[0])


for i, tupl in enumerate(flattened_sorted):
    idx = tupl[1:3]
    
    if idx in seen_px or counter[tupl[0]] < 1:
        continue
    else:
        output[idx] = tupl[0]
        seen_px.add(idx)
        counter[tupl[0]] -= 1

print(counter)

[0, 0, 0, 0]


In [36]:
from rasterio.transform import from_origin

data = output



# Define the transformation and metadata
transform = from_origin(west, north, resolution[0], resolution[1])
height, width = data.shape
crs = f"EPSG:{epsg_code}"  # Example CRS - replace with the appropriate CRS for your data

# Metadata dictionary
metadata = {
    'driver': 'GTiff',
    'height': height,
    'width': width,
    'count': 1,
    'dtype': 'int16',
    'crs': crs,
    'transform': transform,
    'nodata': 255 
}

# Write to a new TIFF file
with rasterio.open('out_data/predictions/landcover_fillup/PL72_prediction.tif', 'w', **metadata) as dst:
    dst.write(data, 1)


In [37]:
# Counting the frequency of each element
unique_elements, counts = np.unique(output, return_counts=True)

# Creating a dictionary for better readability
frequency = dict(zip(['grassland', 'cropland', 'forest', 'urban', 'misc', 'na'], counts))

print(frequency)

{'grassland': 98140, 'cropland': 464080, 'forest': 339650, 'urban': 84238, 'misc': 184775, 'na': 733301}
