In [1]:
import os
import json
import pickle
import numpy as np
import pandas as pd
import xarray as xr
import cv2
from datetime import datetime
from PIL import Image
from libtiff import TIFF
import matplotlib.pyplot as plt
from dea_tools.plotting import display_map, rgb, xr_animation

In [2]:
def find_timestamps(directory):
    """
    Returns
    -------
    timestamps: list of str
        A sorted list of strings taken as the first 23 characters in each filename (except PSScene_collection.json)
    """
    timestamps = set()
    for s in os.listdir(directory):
        timestamp = s[:23]
        timestamps.add(timestamp)
    timestamps.remove('PSScene_collection.json')
    timestamps = sorted(timestamps)
    return timestamps

In [3]:
def find_bboxs(directory, timestamps, tiff_prefix):
    """
    Returns
    -------
    bboxs: list of (list of float)
        The bounding box of each image using the local EPSG:32755 coordinate reference system (as opposed to the global EPSG:6933 CRS)
    """
    bboxs = []
    for timestamp in timestamps:
        filename = directory + timestamp + ".json"
        file = open(filename)
        metadata = json.loads(file.read())
        bbox = metadata['assets'][timestamp + tiff_prefix + "_tif"]['proj:bbox']  # epsg32755 (Local)
        # bbox = metadata['bbox']  # epsg6933 (Global)
        bboxs.append(bbox)
    return bboxs

In [4]:
def find_max_bbox(bboxs):
    """
    Returns
    -------
    max_bbox: list of float
    """
    bbox_stats = pd.DataFrame(bboxs).describe().loc[["min","max"]]
    max_bbox = [bbox_stats.loc["min",0], bbox_stats.loc["min",1], bbox_stats.loc["max",2], bbox_stats.loc["max",3]] 
    return max_bbox

In [5]:
def extract_good_timestamps(timestamps, bboxs, max_bbox):
    """ 
    Extract just the timestamps where the bounding box coordinates match the maximum bounding box
    
    Returns
    -------
    timestamps: list of str
    """
    timestamps = [timestamp for timestamp, bbox in zip(timestamps, bboxs) if bbox == max_bbox]
    return timestamps

In [6]:
def load_masked_images(directory, timestamps, tiff_prefix, udm_suffix="_3B_udm2_clip.tif"):
    """Load images and mask with udm-2

    Info on PlanetScope bands: https://developers.planet.com/docs/apis/data/sensors/
    Info on udm2 bands: https://developers.planet.com/docs/data/udm-2/
    
    Returns
    -------
    images:list of 3d-array
        list of 8 dimensional arrays (band, x, y)
    """
    images = []
    for timestamp in timestamps:
        filename = directory + timestamp + tiff_prefix + ".tif"
        tif = TIFF.open(filename) 
        image = tif.read_image()

        filemask = directory + timestamp + udm_suffix
        tif_udm2 = TIFF.open(filemask)
        udm2 = tif_udm2.read_image() 

        clear_pixels = udm2[0].astype('float')
        clear_pixels[clear_pixels == 0] = 'nan'
        masked_image = image * clear_pixels

        images.append(image)
    return images

In [7]:
def create_datetimes(timestamps):
    """
    Parameters
    ----------
    timestamps: list of str

    Returns
    -------
    datetimestamps: list of DateTime

    """
    datetimestamps = []
    for timestamp in timestamps:
        year = timestamp[0:4]
        month = timestamp[4:6]
        day = timestamp[6:8]
        hour = timestamp[9:11]
        minute = timestamp[11:13]
        second = timestamp[13:15]
        datetimestamp = datetime(int(year), int(month), int(day), hour=int(hour), minute=int(minute), second=int(second))
        datetimestamps.append(datetimestamp)
    return datetimestamps

In [30]:
def calc_truncated_columns(shape_planetscope, shape_sentinel):
    """Calculate the number of planetscape columns to truncate to match the sentinel aspect ratio"""
    ratio_sentinel = shape_sentinel[1]/shape_sentinel[0]
    keep = round(shape_planetscope[0]*ratio_sentinel)
    truncate = shape_planetscope[1] - keep
    return truncate

In [9]:
def create_lat_lon(bbox, shape):
    """ Create the latitudes and longitudes

    Parameters
    ----------
    bbox: list of float
    shape: list of int
    
    Returns
    -------
    x: 1d-array
    y: 1d-array
    """
    pixel_size = (bbox[2] - bbox[0])/shape[0], (bbox[3] - bbox[1])/shape[1]
    y = np.arange(bbox[0], bbox[2], pixel_size[0])
    x = np.arange(bbox[1], bbox[3], pixel_size[1])
    return x, y

In [10]:
start = datetime.now()
print(start)

2024-04-16 15:31:07.796549


In [11]:
# This Sentinel data was generated and downloaded by running "Sentinel_download.ipynb" on the digital Earth Australia Sandbox
with open('sentinel_arbo_4bands_2019-2024.pickle', 'rb') as handle:
    ds_sentinel = pickle.load(handle)
sentinel_shape = ds_sentinel['nbart_red'][0].shape
sentinel_shape

(660, 657)

In [12]:
# The directories containing the 8-band planetscope data
tiff_prefix = "_3B_AnalyticMS_SR_8b_clip"

directory = "../Planet/Farms/ARBO/47bcb1be-9417-477d-9e6c-a1706bfe98da/PSScene/"
outpath = 'planetscope_arbo_8bands_2020-2021.pickle'

# Notebook times out when trying to load all 8 bands from this larger dataset. Probably need to convert to a .py file.
# directory = "../Planet/Farms/ARBO/7578f778-c226-4ac5-b466-9859407b67df/PSScene/"
# outpath = 'planetscope_arbo_8bands_2021-2024.pickle'

directory

'../Planet/Farms/ARBO/47bcb1be-9417-477d-9e6c-a1706bfe98da/PSScene/'

In [13]:
timestamps = find_timestamps(directory)
timestamps[0]

'20200319_231555_54_2310'

In [14]:
bboxs = find_bboxs(directory, timestamps, tiff_prefix)
len(bboxs)

167

In [15]:
max_bbox = find_max_bbox(bboxs)
max_bbox

[685566.0, 6089898.0, 692232.0, 6096501.0]

In [16]:
# Can remove this if we get the padding working correctly
timestamps = [timestamp for timestamp, bbox in zip(timestamps, bboxs) if bbox == max_bbox]
len(timestamps)

98

In [17]:
images = load_masked_images(directory, timestamps, tiff_prefix)



In [18]:
image_array = np.array(images, dtype=float)
image_array.shape

(98, 8, 2201, 2222)

In [31]:
truncated_columns = calc_truncated_columns(image_array[0][0].shape, sentinel_shape)
truncated_columns

31

In [32]:
truncated_images = np.array([[band[:, truncated_columns:] for band in image] for image in image_array])
truncated_images.shape

(98, 8, 2201, 2191)

In [38]:
reversed_shape = sentinel_shape[1], sentinel_shape[0]
resized_images = np.array([[cv2.resize(band, reversed_shape) for band in image] for image in truncated_images])
resized_images.shape

(98, 8, 660, 657)

In [39]:
datetimestamps = create_datetimes(timestamps)
datetimestamps[0]

datetime.datetime(2020, 3, 19, 23, 15, 57)

In [40]:
x, y = create_lat_lon(max_bbox, sentinel_shape)
len(x), len(y)

(657, 660)

In [41]:
transposed_images = resized_images.transpose(1,0,2,3)
transposed_images.shape

(8, 98, 660, 657)

In [42]:
ds_planetscope = xr.Dataset(
    {
        "Coastal Blue":(["time", "y", "x"], transposed_images[0]),
        "nbart_blue":(["time", "y", "x"], transposed_images[1]),
        "Green I":(["time", "y", "x"], transposed_images[2]),
        "nbart_green":(["time", "y", "x"], transposed_images[3]),
        "Yellow":(["time", "y", "x"], transposed_images[4]),
        "nbart_red":(["time", "y", "x"], transposed_images[5]),
        "Red Edge":(["time", "y", "x"], transposed_images[6]),
        "nbart_nir_1":(["time", "y", "x"], transposed_images[7]),
    }, coords={
        "time": datetimestamps,
        "y": ("y", y),
        "x": ("x", x),
    },
)
ds_planetscope = ds_planetscope.odc.assign_crs(crs='EPSG:32755') # Required for xr_animation
ds_planetscope

In [43]:
with open(outpath, 'wb') as handle:
    pickle.dump(ds_planetscope, handle, protocol=pickle.HIGHEST_PROTOCOL)
outpath

'planetscope_arbo_8bands_2020-2021.pickle'

In [28]:
end = datetime.now()
print(end-start)

0:05:05.851762


In [29]:
# Took 5 mins for unpadded 8 band planetscope 2020-2021 (98 images)