## Loading Packages

In [None]:
# import warnings
# warnings.filterwarnings("ignore") # suppress warnings

In [None]:
import os
import sys

## Clip path to all helper functions

In [None]:
function_path = os.path.expanduser("~/geoscience/albedo_downscaling/functions")
sys.path.append(function_path)
# import all the helper functions.
from data_preprocessing import *

plt.style.use("~/geoscience/carbon_estimation/MNRAS.mplstyle")
%matplotlib inline

## Get the Bounding Box of the East River and reproject to MODIS coordinate system

In [None]:
modis_ds_ex = rxr.open_rasterio(modis_albedo_filepath, masked=True)
albedo_shortwave_data = modis_ds_ex['Albedo_BSA_shortwave']
Boundary_reproj_albedo = Boundary.to_crs(albedo_shortwave_data.rio.crs)
Boundary_box_albedo = [box(*Boundary_reproj_albedo.total_bounds)]

colorado_boundary_reproj_albedo = colorado_boundary.to_crs(albedo_shortwave_data.rio.crs)
colorado_boundary_box_albedo = [box(*colorado_boundary_reproj_albedo.total_bounds)]

desired_albedo_bands = ['Albedo_BSA_shortwave', 'Albedo_WSA_shortwave', 'BRDF_Albedo_Band_Mandatory_Quality_shortwave']

## Reproject Bounding Box to UTM 13N

In [None]:
Boundary_reproj_utm = Boundary.to_crs(epsg=32613)
boundary_box_utm = [box(*Boundary_reproj_utm.total_bounds)]
colorado_boundary_utm = colorado_boundary.to_crs(epsg=32613)
boundary_box_colorado_utm = [box(*colorado_boundary_utm.total_bounds)]

## Reproject GOES Bounding Box to GOES CRS

In [None]:
# Reproject bounding box to GOES
# Boundary_reproj_goes = Boundary.to_crs(goes_crs)
Boundary_reproj_goes = Boundary.to_crs(epsg=32613)
Boundary_box_goes = [box(*Boundary_reproj_goes.total_bounds)]
Boundary_box_goes

## Clip and reproject single GOES file as a test

In [None]:
# boundary_box_utm = gpd.GeoDataFrame(geometry=boundary_box_utm, crs="EPSG:32613")
boundary_box_utm

In [None]:
# goes_rxr = rxr.open_rasterio(goes_data_file_valid)
# reproject_clip_and_upsample_goes_raster(goes_rxr, goes_data_file_valid_name, boundary_box_utm)

goes_no_data_ex = rxr.open_rasterio("/bsuhome/tnde/scratch/felix/GOES/data/ABI-L2-LSAC/2023/133/18/OR_ABI-L2-LSAC-M6_G16_s20231331826170_e20231331828543_c20231331830258.nc")
# goes_no_data_ex = rxr.open_rasterio("/bsuhome/tnde/scratch/felix/GOES/data/ABI-L2-LSAC/2021/251/19/OR_ABI-L2-LSAC-M6_G16_s20212511926190_e20212511928563_c20212511929585.nc")
reproject_clip_and_upsample_goes_raster(goes_no_data_ex, goes_data_file_valid_name, boundary_box_utm)


In [None]:
clip_reproject_goes_data_loop_through_directories(goes_data_dir, "09/01/2021", "06/15/2023", 5, boundary_box_utm)
# End date: 02/04/2022

## Reproject MODIS to Colorado Shape

In [None]:
dest_str = base_modis_path + "/reprojected_colorado_modis_data_lat_lon_new"
if not Path(dest_str).exists():
    Path(dest_str).mkdir(parents=True)
    
for file_path in Path(clipped_albedo_output_data).iterdir():
    if file_path.is_file():
        dest_file = dest_str + '/' + file_path.stem + '_reprojected_new'
        reproject_raster(file_path, dest_file)
        # print(f'Reprojected {file_path.stem}')
print("Done!!!")

## Reproject and clip Colorado MODIS data to UTM/SAIL Location

In [None]:
blue_sky_albedo_colorado_data

In [None]:
dest_str = base_modis_path + "/blue_sky_albedo_sail_new/"
if not Path(dest_str).exists():
    Path(dest_str).mkdir(parents=True)
    
for file_path in Path(blue_sky_albedo_colorado_data).iterdir():
    # Clip modis data to shapefile extent
    modis_file = rxr.open_rasterio(file_path)

    # Reproject MODIS and reclip MODIS to UTM
    modis_reproj = modis_file.rio.reproject('EPSG:32613')
    modis_clipped = modis_reproj.rio.clip(boundary_box_utm, all_touched=True,from_disk=True)
    
    output_path = dest_str + file_path.name
    modis_clipped.rio.to_raster(output_path)
    # print(modis_clipped)
print("Done!!!")

## Individual calculations

In [None]:
goes_perfect_ds = rxr.open_rasterio(goes_data_file_valid)   
reproject_clip_and_upsample_goes_raster(goes_perfect_ds, goes_data_file_valid_name, Boundary_box_goes)

## Plot Reprojected Plots

In [None]:
plt.subplots(figsize=(7,7))
reprojected_modis_ex_lat_lon = '/bsuhome/tnde/scratch/felix/modis/reprojected_colorado_modis_data_lat_lon_new/MCD43A3.A2022314.h09v05.061.2022323034956_clipped_reprojected_new'
# reprojected_modis_ex_utm = '/bsuhome/tnde/scratch/felix/modis/reprojected_modis_data_lat_lon_new/MCD43A3.A2022314.h09v05.061.2022323034956_clipped_reprojected_new'
# reprojected_goes_ex = '/bsuhome/tnde/scratch/felix/GOES/data/goes_output_data/OR_ABI-L2-LSAC-M6_G17_s20212461741176_e20212461743549_c20212461744433_clipped_reprojected_new.tif'
with rio.open(reprojected_modis_ex_lat_lon) as src:
    plt.imshow(src.read(1))
    print(src.meta)
    plt.colorbar()

## Plot GOES and MODIS Data

In [None]:
data_dates = visualize_all_tif_data(reprojected_clipped_goes_output_dir, blue_sky_albedo_final_data_dir)

In [None]:
for date in data_dates[0]:
    print(date.strftime("%m-%d-%Y"))
print(len(data_dates[0]))
print(len(data_dates[1]))

## Filter out MODIS images if there is more than 40% of data that is null

In [None]:
#########################################################
# Collect and sort all MODIS Blue Sky Albedo files
modis_files = list(Path(blue_sky_albedo_final_data_dir).iterdir())
sorted_modis_files = sorted(modis_files, key=lambda x: extract_date_from_modis_filename(x.name))

# List to store dates with too many NaN pixels
invalid_dates = []

# Loop through each MODIS file and assess data quality
for modis_file in modis_files:
    with rxr.open_rasterio(modis_file) as modis:
        # Select the first band
        modis_bsa_img = modis.sel(band=1)

        # Get spatial dimensions
        height, width = modis_bsa_img.shape[-2], modis_bsa_img.shape[-1]
        total_pixels = height * width

        # Identify NaN (invalid) pixels
        nan_mask = modis_bsa_img.isnull()
        total_nan_pixels = nan_mask.sum().values

        # Extract the date from the filename
        modis_date = extract_date_from_modis_filename(modis_file.name)

        # Check if the percentage of NaN pixels exceeds the threshold
        if (total_nan_pixels / total_pixels) > NAN_PIXEL_THRESHOLD:
            invalid_dates.append(modis_date)

# Sort and serialize the list of invalid dates
sorted_dates = sorted(invalid_dates)
dates_serialized = [dt.strftime("%Y-%m-%dT%H:%M:%S") for dt in sorted_dates]

# Print summary of invalid dates
print(len(sorted_dates))  # Total number of invalid dates
for date in dates_serialized:
    print(date)

# Save invalid MODIS dates to a JSON file
try:
    with open("/bsuhome/tnde/scratch/felix/modis/invalid_modis_dates_new.json", "w") as file:
        json.dump(dates_serialized, file)
except Exception as e:
    print(f"An error occurred while writing the JSON file: {e}")

## Match GOES Image with MODIS Image using a NaN Mask and interpolating NaN Values the same way MODIS is interpolated

In [None]:
mask_goes_to_match_modis(reprojected_clipped_goes_output_dir, blue_sky_albedo_final_data_dir, nan_goes_data_dir)

## Extra Code - AOD

## Clip bounding box to AOD area

In [None]:
aod_ds_ex = rxr.open_rasterio(aod_data_file, masked=True)
aod_055_data = aod_ds_ex[0]
Boundary_reproj_aod = Boundary.to_crs(aod_055_data.rio.crs)
# Boundary_reproj_aod = Boundary.to_crs(epsg=4326)
Boundary_box_aod = [box(*Boundary_reproj_aod.total_bounds)]

desired_aod_bands = ['Optical_Depth_055', 'AOD_QA']

## Clip AOD data to East River Shapefile

In [None]:
clip_modis_data(aod_data_dir, clipped_aod_output, desired_aod_bands, Boundary_box_aod, True)
# Optical_Depth_055

## Reproject AOD data

In [None]:
# Define source and destination directories for AOD data
clipped_aod_data_str = base_modis_path + 'clipped_aod_data_new'  # Folder containing clipped AOD rasters
dest_str = base_modis_path + "reprojected_aod_data_new"          # Output folder for reprojected rasters

# clipped_aod_data_str = base_modis_path + 'clipped_aod_data'  # Folder containing clipped AOD rasters
# dest_str = base_modis_path + "reprojected_aod_data"          # Output folder for reprojected rasters

# Create the destination directory if it doesn't exist
if not Path(dest_str).exists():
    Path(dest_str).mkdir(parents=True)

# Loop through each file in the clipped AOD directory
for file_path in Path(clipped_aod_data_str).iterdir():
    if file_path.is_file():
        # Build output file path (no extension yet)
        dest_file = dest_str + '/' + file_path.stem + '_reprojected_new'

        # Reproject the raster to the default CRS (EPSG:4326 inside reproject_raster)
        reproject_raster(file_path, dest_file)

        # print progress
        # print(f'Reprojected {file_path.stem}')

print("Done!!!")