In [None]:
# Coordinates for Fort Collins, CO
min_lon = -105.115
max_lon = -105.032
min_lat = 40.521
max_lat = 40.610

# Define the bounding box
bbox_of_interest = [min_lon, min_lat, max_lon, max_lat]

In [None]:
catalog = pystac_client.Client.open(
    "https://planetarycomputer.microsoft.com/api/stac/v1",
    modifier=planetary_computer.sign_inplace,
)

In [None]:
search = catalog.search(
    collections=["cop-dem-glo-30"],
    bbox=bbox_of_interest
)
items = list(search.get_items())
print(f"Returned {len(items)} items")

In [None]:
signed_asset = planetary_computer.sign(items[0].assets["data"])
data = (
    rioxarray.open_rasterio(signed_asset.href)
    .squeeze()
    .drop("band")
    #.coarsen({"y": 5, "x": 5})
    #.mean()
)

data.rio.write_crs("EPSG:4326", inplace=True)  # Change "EPSG:4326" to the appropriate CRS if different

# Specify the path where you want to save the TIFF file
output_tif_path = "C:\\Users\\OneDrive\\Desktop\\GLOBALPCL\\DEMTIF\\output_dataDEM.tif"

# Save the data as a GeoTIFF file
data.rio.to_raster(output_tif_path)

In [None]:
# this ensures all tiles are widthtile by heighttile
import os
from itertools import product
import rasterio as rio
from rasterio import windows

in_path = 'C:\\Users\\OneDrive\\Desktop\\GLOBALPCL\\DEMTIF\\'
input_filename = 'output_dataDEM.tif'
out_path = 'C:\\Users\\OneDrive\\Desktop\\GLOBALPCL\\DEMTILES3\\'
output_filename = 'tile_{}-{}.tif'
widthtile = 300
heighttile = 300

def get_tiles(ds, width=widthtile, height=heighttile):
    nols, nrows = ds.meta['width'], ds.meta['height']
    #offsets = product(range(0, nols, width), range(0, nrows, height))
    offsets = product(range(0, nols, 10), range(0, nrows, 10))

    big_window = windows.Window(col_off=0, row_off=0, width=nols, height=nrows)
    for col_off, row_off in offsets:
        window = windows.Window(col_off=col_off, row_off=row_off, width=width, height=height).intersection(big_window)
        transform = windows.transform(window, ds.transform)
        yield window, transform

with rio.open(os.path.join(in_path, input_filename)) as inds:
    tile_width, tile_height = widthtile, heighttile
    nodata = inds.nodata  # Get the NoData value from the dataset
    meta = inds.meta.copy()

    for window, transform in get_tiles(inds):
        if window.width == tile_width and window.height == tile_height:  # Check if the tile dimensions are as expected
            data = inds.read(window=window)
            if nodata is not None:
                # Modified check for NoData to include tolerance for floating-point rasters
                valid_data_mask = (data != nodata)
            else:
                # If NoData value is not set, consider all data as valid
                valid_data_mask = (data == data)

            if valid_data_mask.any():  # Check if there's any valid data within the tile
                meta['transform'] = transform
                meta['width'], meta['height'] = window.width, window.height
                outpath = os.path.join(out_path, output_filename.format(int(window.col_off), int(window.row_off)))
                with rio.open(outpath, 'w', **meta) as outds:
                    outds.write(data)

print("Done")

In [None]:
import os
import rasterio
from rasterio.transform import rowcol
import numpy as np

# Path to the directory containing your TIFF tiles
tiles_directory = 'C:\\Users\\OneDrive\\Desktop\\GLOBALPCL\\DEMTILES3\\'

# Path to the TIFF file containing the PCL data
pcl_tif_path = 'C:\\Users\\OneDrive\\Desktop\\GLOBALPCL\\Input\\pcl_west_wgs_CO.tif'

# Lists to store image data and labels (assuming all tiles are the same size and have the same number of bands)
images = []
labels = []

# Count number of files to preallocate arrays
num_files = len([name for name in os.listdir(tiles_directory) if name.endswith('.tif')])
first_file = True


# Open the PCL data TIFF file
with rasterio.open(pcl_tif_path) as pcl_src:
    pcl_data = pcl_src.read(1)  # Read the first band

    # Loop through each tile in the directory
    for filename in os.listdir(tiles_directory):

        if filename.endswith(".tif"):
            filepath = os.path.join(tiles_directory, filename)
            with rasterio.open(filepath) as src:
                data = src.read()  # Read all bands of the tile
                if first_file:
                    # Initialize the numpy arrays with correct dimensions
                    images = np.zeros((num_files, *data.shape), dtype=data.dtype)
                    labels = np.zeros(num_files, dtype=np.float32)
                    idx = 0
                    first_file = False
                images[idx] = data
                # Calculate the geographic coordinates of the center pixel of the tile
                center_x, center_y = (src.width // 2, src.height // 2)
                lon, lat = src.xy(center_y, center_x)  # Get lon and lat of the center pixel

                # Convert geographic coordinates to row and col in the PCL TIFF
                pcl_row, pcl_col = rowcol(pcl_src.transform, lon, lat)

                # Extract the PCL value at this position
                center_pcl_value = pcl_data[pcl_row, pcl_col]
                labels[idx] = center_pcl_value  # Append the PCL value as label
                idx += 1

# Check shapes
print("Shape of the images array:", images.shape)
print("Shape of the labels array:", labels.shape)

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# Reshape images to fit the model input
images = images.reshape(len(labels), 300, 300, 1)  # Reshape for CNN input

# Create the CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(300, 300, 1)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='linear')  # Assuming a regression output
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

# Print model summary
model.summary()

# Train the model
model.fit(images, labels, epochs=1, batch_size=64, validation_split=0.2)

## optional 
# Save the model to a HDF5 file
#model.save('C:\\Users\\OneDrive\\Desktop\\GLOBALPCL\\SavedModels\\model_300by300.h5')

# # Later or in another script after re-importing the necessary libraries
# # Load the model back from the file
# loaded_model = tf.keras.models.load_model('my_model.h5')

# # Check the architecture of the loaded model
# loaded_model.summary()

In [None]:
import matplotlib.pyplot as plt

# Predict labels using the trained model
predicted_labels = model.predict(images).flatten()  # Flatten to make it 1D, matching labels' shape

# Scatter plot of Actual vs. Predicted labels
plt.figure(figsize=(8, 6))
plt.scatter(labels, predicted_labels, alpha=0.5)
plt.title('Actual vs. Predicted PCL Values')
plt.xlabel('Actual Labels')
plt.ylabel('Predicted Labels')
plt.grid(True)
plt.plot([labels.min(), labels.max()], [labels.min(), labels.max()], 'k--')  # Line for perfect predictions
plt.show()

# Optionally, display the error metrics
from sklearn.metrics import mean_squared_error, r2_score
mse = mean_squared_error(labels, predicted_labels)
r2 = r2_score(labels, predicted_labels)
print(f'Mean Squared Error: {mse:.2f}')
print(f'R^2 Score: {r2:.2f}')

In [None]:
import os
from itertools import product
import rasterio as rio
from rasterio import windows

in_path = 'C:\\Users\\OneDrive\\Desktop\\GLOBALPCL\\DEMTIF\\'
input_filename = 'output_dataDEM.tif'
out_path = 'C:\\Users\\OneDrive\\Desktop\\GLOBALPCL\\DEMTILESInference\\'
output_filename = 'tile_{}-{}.tif'
widthtile = 500
heighttile = 500

def get_tiles(ds, width=widthtile, height=heighttile):
    nols, nrows = ds.meta['width'], ds.meta['height']
    #offsets = product(range(0, nols, width), range(0, nrows, height))
    offsets = product(range(0, nols, 10), range(0, nrows, 10))
    big_window = windows.Window(col_off=0, row_off=0, width=nols, height=nrows)
    for col_off, row_off in offsets:
        window = windows.Window(col_off=col_off, row_off=row_off, width=width, height=height).intersection(big_window)
        transform = windows.transform(window, ds.transform)
        yield window, transform

with rio.open(os.path.join(in_path, input_filename)) as inds:
    tile_width, tile_height = widthtile, heighttile
    nodata = inds.nodata  # Get the NoData value from the dataset
    meta = inds.meta.copy()

    for window, transform in get_tiles(inds):
        data = inds.read(window=window)
        if nodata is not None:
            # Modified check for NoData to include tolerance for floating-point rasters
            valid_data_mask = (data != nodata)
        else:
            # If NoData value is not set, consider all data as valid
            valid_data_mask = (data == data)

        if valid_data_mask.any():  # Check if there's any valid data within the tile
            meta['transform'] = transform
            meta['width'], meta['height'] = window.width, window.height
            outpath = os.path.join(out_path, output_filename.format(int(window.col_off), int(window.row_off)))
            with rio.open(outpath, 'w', **meta) as outds:
                outds.write(data)
print("done")

In [None]:
def extract_patches_overlap(image, transform, patch_size=300, overlap=299):
    """Extracts overlapping patches from the image along with their center coordinates."""
    half_patch = patch_size // 2
    stride = patch_size - overlap
    patches = []
    coords = []
    for i in range(0, image.shape[0] - patch_size + 1, stride):
        for j in range(0, image.shape[1] - patch_size + 1, stride):
            patch = image[i:i + patch_size, j:j + patch_size]
            if patch.shape[0] == patch_size and patch.shape[1] == patch_size:
                patches.append(patch)
                # Get the geographic coordinates for the center of the patch
                x, y = transform * (j + half_patch, i + half_patch)
                coords.append((x, y))
    return np.array(patches), coords

with rasterio.open('C:\\Users\\OneDrive\\Desktop\\GLOBALPCL\\DEMTILESInference\\tile_0-10.tif') as src:
    large_image = src.read(1)  # Read the first band
    transform = src.transform  # Spatial transform for coordinate conversion

image_patches, patch_coords = extract_patches_overlap(large_image, transform)

image_patches = image_patches.reshape(-1, 300, 300, 1)  # Add channel dimension
predictions = model.predict(image_patches)

In [None]:
import geopandas as gpd
from shapely.geometry import Point
import rasterio as rio
import geopandas as gpd
import xarray as xr
import rioxarray

# Create a GeoDataFrame from these coordinates
gdf = gpd.GeoDataFrame({
    'geometry': [Point(x, y) for x, y in patch_coords]
}, crs="EPSG:4326")  # Make sure to set the correct coordinate reference system

# Extract x and y coordinates
gdf["x"] = gdf.geometry.x
gdf["y"] = gdf.geometry.y
gdf['prediction'] = predictions  
# Now you have gdf with x, y columns along with geometry
print(gdf.head())

import xarray as xr
da = (
    gdf.set_index(["y", "x"])
    .prediction
    .to_xarray()
)
da.rio.to_raster("C:\\Users\\OneDrive\\Desktop\\GLOBALPCL\\OutputTIF\\output_file0_10_300w300h.tif")