# Random Forest Data Preparation

## Import miscellaneous useful libraries

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
from datetime import datetime
import os
import requests
import boto3
import numpy as np
import xarray as xr
import rasterio as rio
from rasterio.session import AWSSession
from rasterio.plot import show
import rioxarray
import geoviews as gv
import hvplot.xarray
import holoviews as hv
import csv
gv.extension('bokeh', 'matplotlib')

## Get Credentials
See NDSI notebook for detailed breakdown

In [3]:
s3_cred_endpoint = 'https://data.lpdaac.earthdatacloud.nasa.gov/s3credentials'
def get_temp_creds():
    temp_creds_url = s3_cred_endpoint
    return requests.get(temp_creds_url).json()
temp_creds_req = get_temp_creds()
session = boto3.Session(aws_access_key_id=temp_creds_req['accessKeyId'], 
                        aws_secret_access_key=temp_creds_req['secretAccessKey'],
                        aws_session_token=temp_creds_req['sessionToken'],
                        region_name='us-west-2')
rio_env = rio.Env(AWSSession(session),
                  GDAL_DISABLE_READDIR_ON_OPEN='EMPTY_DIR',
                  GDAL_HTTP_COOKIEFILE=os.path.expanduser('~/cookies.txt'),
                  GDAL_HTTP_COOKIEJAR=os.path.expanduser('~/cookies.txt'))
rio_env.__enter__()

<rasterio.env.Env at 0x7fea33d83640>

## Load files

In [4]:
links = {}
links["02"] = open('../data/S3_T11SLB_B02_Links.txt').read().splitlines() # L30 and S30 for BLUE
links["03"] = open('../data/S3_T11SLB_B03_Links.txt').read().splitlines() # L30 and S30 for GREEN
links["04"] = open('../data/S3_T11SLB_B04_Links.txt').read().splitlines() # L30 and S30 for RED
links["06"] = open('../data/S3_T11SLB_B06_Links.txt').read().splitlines() # L30 for SWIR 1
links["11"] = open('../data/S3_T11SLB_B11_Links.txt').read().splitlines() # S30 for SWIR 1
for k in links.keys():
    print(links[k][0])

s3://lp-prod-protected/HLSS30.020/HLS.S30.T11SLB.2019182T183929.v2.0/HLS.S30.T11SLB.2019182T183929.v2.0.B02.tif
s3://lp-prod-protected/HLSS30.020/HLS.S30.T11SLB.2019182T183929.v2.0/HLS.S30.T11SLB.2019182T183929.v2.0.B03.tif
s3://lp-prod-protected/HLSS30.020/HLS.S30.T11SLB.2019182T183929.v2.0/HLS.S30.T11SLB.2019182T183929.v2.0.B04.tif
s3://lp-prod-protected/HLSL30.020/HLS.L30.T11SLB.2019189T183338.v2.0/HLS.L30.T11SLB.2019189T183338.v2.0.B06.tif
s3://lp-prod-protected/HLSS30.020/HLS.S30.T11SLB.2019182T183929.v2.0/HLS.S30.T11SLB.2019182T183929.v2.0.B11.tif


In [5]:
hls_bands = {}
hls_bands["S02"] = rioxarray.open_rasterio(links["02"][4], chuncks=True).squeeze('band', drop=True)
hls_bands["S03"] = rioxarray.open_rasterio(links["03"][4], chuncks=True).squeeze('band', drop=True)
hls_bands["S04"] = rioxarray.open_rasterio(links["04"][4], chuncks=True).squeeze('band', drop=True)
hls_bands["S11"] = rioxarray.open_rasterio(links["11"][3], chuncks=True).squeeze('band', drop=True)

hls_bands["L02"] = rioxarray.open_rasterio(links["02"][2], chuncks=True).squeeze('band', drop=True)
hls_bands["L03"] = rioxarray.open_rasterio(links["03"][2], chuncks=True).squeeze('band', drop=True)
hls_bands["L04"] = rioxarray.open_rasterio(links["04"][2], chuncks=True).squeeze('band', drop=True)
hls_bands["L06"] = rioxarray.open_rasterio(links["06"][0], chuncks=True).squeeze('band', drop=True)

lidar = rioxarray.open_rasterio("../data/ASO_50M_SD_USCASF_20190714.tif", chuncks=True).squeeze('band', drop=True)
lidar = lidar.where(-100 < lidar, drop=True)
# print(lidar)
# lidar.hvplot.image(x='x', y='y', cmap='coolwarm', rasterize=True, width=800, height=600, colorbar=True)

In [6]:
for band in hls_bands.keys():
    hls_bands[band] = hls_bands[band].rio.reproject_match(lidar)

In [7]:
for band in hls_bands.keys():
    hls_bands[band] = hls_bands[band].values.flatten()

In [8]:
thresholded = lidar.where(0.01 < lidar)

In [9]:
baseline = lidar.values.flatten()
linear_lidar = thresholded.values.flatten()
valid = ~np.isnan(baseline)
linear_lidar_filtered = linear_lidar[valid]
def filter_post(arr):
    arr[np.isnan(arr) | np.isnan(-arr)] = 0
    arr[arr > 0] = 1
filter_post(linear_lidar_filtered)
linear_lidar_filtered = linear_lidar_filtered.astype(int)

In [10]:
for band in hls_bands.keys():
    hls_bands[band] = hls_bands[band][valid]

In [11]:
def write_to_csv(labels, *arrs, file_name='output.csv'):
    with open('output.csv', 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)

        # Writing header
        writer.writerow(labels)

        # Writing data row by row
        writer.writerows(data)
#     # Extract data and attributes
#     data = []
#     attributes = []

#     for xarray in xarrays:
#         data.append(xarray.values.flatten())
#         attributes.append(xarray.attrs.get('long_name', ''))

#     # Transpose data for CSV writing
#     data_transposed = zip(*data)

#     # Write to CSV file
#     with open(file_name, 'w', newline='') as csvfile:
#         writer = csv.writer(csvfile)
        
#         # Write attribute row
#         writer.writerow(attributes)
        
#         # Write data rows
#         writer.writerows(data_transposed)

In [13]:
labels = ["Blue", "Green", "Red", "Truth"]
data = np.column_stack((hls_bands["S02"], hls_bands["S03"], \
                        hls_bands["S04"], linear_lidar_filtered))
write_to_csv(labels, data)
print("Done exporting data")

Done exporting data
