In [24]:
import geopandas
import json
import shapely
import shapely.geometry
import xarray
import rasterio
import rioxarray
import os
import fiona
import nasa_hls
import urllib.request as urlreq
import pandas as pd
import numpy as np
import requests
import xmltodict
import shutil
import datetime
import boto3
import pyproj


from shapely.ops import transform
from shapely.geometry import Point
from shapely.geometry import Polygon
from pystac_client import Client 
from collections import defaultdict
from glob import glob
from rasterio.enums import Resampling
from rasterio import Affine
from rasterio.crs import CRS
import matplotlib.pyplot as plt
from subprocess import Popen, PIPE
from tqdm import tqdm
from netrc import netrc
from subprocess import Popen
from platform import system
from getpass import getpass
from rasterio.session import AWSSession
from rasterio.enums import Resampling
from pathlib import Path

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

%matplotlib inline

In [50]:
cdl_file = "/data/requirements/2022_30m_cdls_clipped.tif"

In [51]:
track_df = pd.read_csv(req_path + "track_df.csv")

In [52]:
def reproject_hls(tile_path, 
                  target_crs ="EPSG:5070", 
                  remove_original = True, 
                  resampling_method = Resampling.bilinear):
    
    """
    This function receives the path to a specific HLS tile and reproject it to the targeting crs.
    The option of removing the raw HLS tile is provided
    
    Assumptions:
    - tile_path is a full path that end with .tif
    
    
    Inputs:
    - tile_path: The full path to a specific HLS tile
    - target_crs: The crs that you wish to reproject the tile to, default is EPSG 4326
    - remove_original: The option to remove raw HLS tile after reprojecting, default is True
    - resampling_method: The method that rioxarray use to reproject, default is bilinear
    """
    

    xds = rioxarray.open_rasterio(tile_path)
    cdl = rioxarray.open_rasterio(cdl_file)
    xds_new = xds.rio.reproject_match(cdl, resampling = resampling_method)
    if remove_original:
        if Path(tile_path).is_file():
            os.remove(tile_path)
        xds_new.rio.to_raster(raster_path = tile_path)
    else:
        xds_new.rio.to_raster(raster_path = tile_path.replace(".tif", ".reproject.tif"))
        

In [53]:
track_df.head()

Unnamed: 0,tile,timestep,date,save_path,filename
0,T10SDJ,0,2022-03-08,/data/tiles/HLS.S30.T10SDJ.2022067T190231.v2.0/,HLS.S30.T10SDJ.2022067T190231.v2.0
1,T10SDJ,1,2022-07-16,/data/tiles/HLS.S30.T10SDJ.2022197T185931.v2.0/,HLS.S30.T10SDJ.2022197T185931.v2.0
2,T10SDJ,2,2022-09-29,/data/tiles/HLS.S30.T10SDJ.2022272T190159.v2.0/,HLS.S30.T10SDJ.2022272T190159.v2.0
3,T10SEH,0,2022-03-10,/data/tiles/HLS.S30.T10SEH.2022069T185109.v2.0/,HLS.S30.T10SEH.2022069T185109.v2.0
4,T10SEH,1,2022-06-23,/data/tiles/HLS.S30.T10SEH.2022174T184931.v2.0/,HLS.S30.T10SEH.2022174T184931.v2.0


In [54]:
tiles_to_reproj = track_df.tile.tolist()

In [58]:
def hls_process (dataframe, 
                 bands = ["B02", "B03", "B04", "B8A", "B11", "B12", "Fmask"],
                 remove_original = True):
    tiles_to_reproj = dataframe.tile.tolist()
    for tile in tqdm(tiles_to_reproj):
        path_df = dataframe[dataframe.tile == tile]
        assert len(path_df) == 3 
        for i in range(3):
            for band in bands:
                tile_path = f"{path_df.iloc[i].save_path}{path_df.iloc[i].filename}.{band}.tif"
                if band == "Fmask":
                    reproject_hls(tile_path, remove_original, resampling_method = Resampling.nearest)
                else :
                    reproject_hls(tile_path, remove_original)
        break
                    

In [59]:
hls_process(track_df, remove_original = False)

  0%|                                                                                          | 0/1752 [01:29<?, ?it/s]
