## Loading Packages

In [None]:
import os
import sys
import glob

## Clip path to all helper functions¶

In [None]:
function_path = os.path.expanduser("~/geoscience/albedo_downscaling/functions")
sys.path.append(function_path)
# import all the helper functions.
from plot_fxns import *
from albedo_unet1_fxns import *
from data_preprocessing import *

plt.style.use("~/geoscience/carbon_estimation/MNRAS.mplstyle")
%matplotlib inline

## Write all 500m GOES data to a raster

In [None]:
invalid_dates = []
invalid_dates = convert_dates(INVALID_DATES_PATH)
for date in INVALID_GOES_DATES:
    if date not in invalid_dates:
        invalid_dates.append(date)
additional_invalid_dates = [datetime(2021, 9, 18), datetime(2021, 12, 11), datetime(2021, 12, 20), datetime(2022, 3, 5), datetime(2022, 3, 18), datetime(2022, 4, 8),
                           datetime(2022, 5, 2), datetime(2022, 5, 19), datetime(2022, 5, 24), datetime(2022, 9, 13), datetime(2022, 11, 16), datetime(2022, 11, 17), datetime(2022, 12, 16),
                           datetime(2022, 12, 18), datetime(2022, 12, 19), datetime(2023, 6, 1)]
invalid_dates += additional_invalid_dates

# Test period 1
start_date = datetime(2023, 1, 1)
end_date = datetime(2023, 6, 15)

# # Test period 2
# start_date = datetime(2021, 9, 1)
# end_date = datetime(2023, 6, 15)
write_goes_to_raster(GOES_500m_masked_data_path, GOES_500m_masked_output_dir, invalid_dates, start_date, end_date)

## Write MODIS 500m interpolated data that is used as ground truth in the U-Net

In [None]:
modis_files = list(Path(MODIS_bsa_dir).iterdir())
os.makedirs(MODIS_interpolated_data_dir, exist_ok=True)

for mfile in modis_files:
    if mfile.is_file():
        modis_rxr = rxr.open_rasterio(mfile, masked=True)
        interpolated_values = fill_inputs_interpolate(modis_rxr) #interpolate_nan(modis_rxr)
        modis_rxr.values = interpolated_values
        output_file_name = os.path.join(MODIS_interpolated_data_dir, f"{mfile.name}")
        modis_rxr.rio.to_raster(output_file_name)
        # print(f"Wrote to {output_file_name}")
print(f"Execution complete!!!")

## Calculate $R^2$ Scores for 500m GOES and 500m MODIS for Results 

In [None]:
GOES_500m_raster_dir
MODIS_interpolated_data_dir

In [None]:
modis_files = list(Path(MODIS_interpolated_data_dir).iterdir())
# goes_500m_files = list(Path(GOES_500m_raster_dir).iterdir())
goes_500m_masked_files = list(Path(GOES_500m_masked_output_dir).iterdir())

modis_non_interpolated_files = list(Path(MODIS_bsa_dir).iterdir())

In [None]:
tif = modis_non_interpolated_files[10]
with rio.open(tif) as ds:
    arr = ds.read(1, masked=True)  # nodata -> mask
    extent = plotting_extent(ds)

plt.figure(figsize=(8,6))
im = plt.imshow(arr, extent=extent, origin="upper")
plt.tick_params(axis='x', rotation=45)
plt.title("Sentinel-2 Shortwave Albedo (keep-all)")
plt.xlabel(ds.crs.to_string()); plt.ylabel("y (map units)")
cbar = plt.colorbar(im, fraction=0.046, pad=0.04)
cbar.set_label("Albedo (0–1)")
plt.tight_layout(); plt.show()

In [None]:
goes_500m_masked_files2 = [p for p in goes_500m_masked_files if "2023" in p.name]
modis_non_interpolated_files2 = [p for p in modis_non_interpolated_files if "2023" in p.name]
modis_files2 = [p for p in modis_files if "2023" in p.name]
_ = calculate_r2_scores(goes_500m_masked_files2, modis_non_interpolated_files2)
# calculate_r2_scores(goes_500m_masked_files, modis_non_interpolated_files)#, modis_files)

In [None]:
# goes_500m_masked_files
# modis_non_interpolated_files
# modis_files

## Visualize RMSE of U-Net

In [None]:
_ = calculate_RMSE_scores(goes_500m_masked_files2, modis_non_interpolated_files2)

In [None]:
# _ = plot_pixels_per_date(goes_500m_masked_files2, modis_non_interpolated_files2)

## Visualize GOES U-Net 500m and MODIS 500m next to each other
### Visually compares daily GOES 500m albedo predictions to MODIS 500m interpolated albedo

In [None]:
# Load all files from the directories containing interpolated MODIS data and GOES 500m raster data
modis_files = list(Path(MODIS_interpolated_data_dir).iterdir())
goes_500m_files = list(Path(GOES_500m_masked_output_dir).iterdir())
# goes_500m_files = list(Path(GOES_500m_raster_dir).iterdir())

# Filter out only actual files (exclude subdirectories or non-file objects)
only_goes_files = [f for f in goes_500m_files if os.path.isfile(f)]

# Sort GOES files chronologically based on the date extracted from their filenames
sorted_goes_files = sorted(only_goes_files, key=lambda x: extract_date_from_goes_filename(x.name))

# Iterate through each sorted GOES file
for file in sorted_goes_files:
    if file.is_file():
        # Extract the GOES observation date from the filename
        goes_date = datetime.strptime("-".join(str(file.name).split("-")[0:3]), '%m-%d-%Y')
        
        # Open the GOES raster file using rioxarray
        goes_rxr = rxr.open_rasterio(file)
    
        # Match this GOES file with the corresponding MODIS file (same date in YYYYDDD format)
        for modis_file in modis_files:
            if modis_file.is_file() and goes_date.strftime('%Y%j') in str(modis_file.name):
                # Open the MODIS raster file
                modis_rxr = rxr.open_rasterio(modis_file)

                # Plot both GOES and MODIS raster data side by side
                fig, (ax1, ax2) = plt.subplots(2, figsize=(10, 5))
                fig.subplots_adjust(hspace=0.4)  # Add vertical spacing between subplots

                # Plot GOES image
                goes_rxr.plot(ax=ax1)
                ax1.set_title(f"GOES 500m Albedo Data - {goes_date.strftime('%m-%d-%Y')}")
                ax1.set_xlabel("Meters west from Central Meridian")
                ax1.set_ylabel("Meters north from equator")

                # Plot MODIS image
                modis_rxr.plot(ax=ax2)
                ax2.set_title(f"MODIS 500m Albedo Data - {goes_date.strftime('%m-%d-%Y')}")
                ax2.set_xlabel("Meters west from Central Meridian")
                ax2.set_ylabel("Meters north from equator")

                # Automatically adjust layout to prevent overlap
                plt.tight_layout()
                plt.show()


## Visualize GOES NaN Mask 500m and MODIS Ground Truth Data (non-masked)

In [None]:
# Load the list of MODIS blue sky albedo files and GOES 500m files with NaN values
modis_files = list(Path(MODIS_bsa_dir).iterdir())
goes_500m_files = list(Path(GOES_NaN_Data_dir).iterdir())

# Filter only actual files from the GOES list (exclude directories or other entities)
only_goes_files = [f for f in goes_500m_files if os.path.isfile(f)]

# Sort the GOES files based on extracted datetime from their filenames
sorted_goes_files = sorted(only_goes_files, key=lambda x: extract_date_from_goes_nan_filename(x.name))

# Iterate through each GOES file
for file in sorted_goes_files:
    if file.is_file():
        # Extract datetime from the GOES filename using a custom parser
        goes_date = extract_date_from_goes_nan_filename(file.name)

        # Open the GOES raster file using rioxarray
        goes_rxr = rxr.open_rasterio(file)
    
        # Loop through MODIS files to find the corresponding date match
        for modis_file in modis_files:
            if modis_file.is_file() and goes_date.strftime('%Y%j') in str(modis_file.name):
                # Open the corresponding MODIS raster file
                modis_rxr = rxr.open_rasterio(modis_file)

                # Create a side-by-side plot comparing GOES and MODIS data
                fig, (ax1, ax2) = plt.subplots(2, figsize=(10, 5))
                fig.subplots_adjust(hspace=0.4)

                # Plot GOES data with title and axis labels
                goes_rxr.plot(ax=ax1)
                ax1.set_title(f"GOES 500m NaN Albedo Data - {goes_date.strftime('%m-%d-%Y')}")
                ax1.set_xlabel("Meters west from Central Meridian")
                ax1.set_ylabel("Meters north from Equator")

                # Plot MODIS data with title and axis labels
                modis_rxr.plot(ax=ax2)
                ax2.set_title(f"MODIS 500m Blue Sky Albedo Data - {goes_date.strftime('%m-%d-%Y')}")
                ax2.set_xlabel("Meters west from Central Meridian")
                ax2.set_ylabel("Meters north from Equator")

                # Improve layout and show the plot
                plt.tight_layout()
                plt.show()


## Visualize snow albedo field data

In [None]:
albedo_data = pd.read_csv(SAIL_field_data_file)
albedo_data['time'] = pd.to_datetime(albedo_data['time'])
albedo_data.set_index('time', inplace=True)

date = datetime(2023, 5, 6)
albedo_at_date = albedo_data.loc[date, '0']
print(albedo_at_date)

goes_500m_rxr = rxr.open_rasterio(GOES_500m_05_06_23_file, masked=True)
modis_500m_rxr = rxr.open_rasterio(MODIS_500m_05_06_23_file, masked=True)
goes_500m_rxr = goes_500m_rxr.rio.write_crs(modis_500m_rxr.rio.crs)

# sail_location_lon_coord = -106.986
# sail_location_lat_cord = 38.956
sail_location_y_coord = 4313769.41
sail_location_x_coord = 327915.02

goes_data_at_location = goes_500m_rxr.sel(x=sail_location_x_coord, y=sail_location_y_coord, method="nearest").values
modis_data_at_location = modis_500m_rxr.sel(x=sail_location_x_coord, y=sail_location_y_coord, method="nearest").values
print(goes_data_at_location)
print(modis_data_at_location)

fig, (ax1, ax2) = plt.subplots(2, figsize=(7,7))
goes_500m_rxr.plot(ax=ax1)
modis_500m_rxr.plot(ax=ax2)
plt.show()

## Individual data visualizations

In [None]:
# Visualize GOES Raw Data Clipped
goes_rxr = rxr.open_rasterio(GOES_2km_ex_may_6_2023, masked=True)
goes_reprojected = goes_rxr.rio.reproject("EPSG:32613")
goes_clipped = goes_reprojected["LSA"].rio.clip(boundary_box_utm, all_touched=True,from_disk=True, drop=True).squeeze()

# Plot
plt.figure(figsize=(10, 4))
goes_clipped.plot()
plt.title("GOES-R 2km Albedo Data")
plt.xlabel("Meters west from Central Meridian")
plt.ylabel("Meters north from equator")
plt.style.use("default")
plt.show()

In [None]:
# Visualize MODIS Blue Sky Albedo
modis_rxr = rxr.open_rasterio(MODIS_bsa_ex_sep_1, masked=True)

# Plot
plt.figure(figsize=(10, 4))
modis_rxr.plot()
plt.title("MODIS Blue Sky Albedo Data")
plt.xlabel("Meters west from Central Meridian")
plt.ylabel("Meters north from equator")
plt.show()

# Plot MODIS BSA, WSA, blue-sky and GOES-R blue-sky 

In [None]:
goes_file = Path("/bsuhome/tnde/scratch/felix/GOES/data/goes_output_data_new/OR_ABI-L2-LSAC-M6_G16_s20212441826171_e20212441828544_c20212441829544_clipped_reprojected_new.tif")
goes_file

In [None]:
goes_timestamp_str = extract_datetime_from_goes_filename(goes_file.name)
actual_date = datetime.strptime(goes_timestamp_str[:7], "%Y%j")
goes_display_hour = f"{goes_timestamp_str[7:9]}:{goes_timestamp_str[9:11]}"
goes_timestamp_str, actual_date, goes_display_hour

In [None]:
# import all the helper functions.
from modis_bluesky_albedo import *
black_sky_albedo_arr =  get_albedo_values(modis_albedo_data_dir, True) # Clear sky
white_sky_albedo_arr =  get_albedo_values(modis_albedo_data_dir, False) # Cloudy sky

In [None]:
# date_str = "2021-11-27"
# date_str = "2021-09-01"
date_str = "2023-06-05"
dt = datetime.strptime(date_str, "%Y-%m-%d")
yyyydoy = f"{dt.year}{dt.timetuple().tm_yday:03d}"

print(yyyydoy)


In [None]:
# modis_bsa = black_sky_albedo_arr["2021331"]
# modis_wsa = white_sky_albedo_arr["2021331"]
modis_blue_sky = "/bsuhome/tnde/scratch/felix/modis/blue_sky_albedo_sail_new/2021331_modis_blue_sky_albedo_.tif"

# goes_blue_sky = "/bsuhome/tnde/scratch/felix/GOES/data/goes_output_data_new/OR_ABI-L2-LSAC-M6_G16_s20213311826176_e20213311828549_c20213311829572_clipped_reprojected_new.tif"
goes_blue_sky = "/bsuhome/tnde/scratch/felix/GOES/data/ABI-L2-LSAC/2023/156/18/OR_ABI-L2-LSAC-M6_G16_s20231561826179_e20231561828552_c20231561830350.nc"

In [None]:
# Visualize GOES Raw Data Clipped
goes_rxr = rxr.open_rasterio(goes_blue_sky, masked=True)
goes_reprojected = goes_rxr.rio.reproject("EPSG:32613")
goes_clipped = goes_reprojected["LSA"].rio.clip(boundary_box_utm, all_touched=True,from_disk=True, drop=True).squeeze()

# Plot
plt.figure(figsize=(8, 6))
goes_clipped.plot()
plt.title("GOES-R 2km Albedo Data")
plt.xlabel("Easting")
plt.ylabel("Northing")
plt.style.use("default")
plt.show()

In [None]:
cf_file = "/bsuhome/tnde/scratch/felix/Sentinel-2/s2_albedo_outputs/tsi_cloud_fractions.csv"
cf_vals = pd.read_csv(cf_file)
print(len(cf_vals))
cf_vals = cf_vals[cf_vals["cf_interp"]<=0.10]
cf_vals = cf_vals.drop_duplicates(subset=["date"])
display(cf_vals.head())
len(cf_vals)

In [None]:
modis_test_path = "/bsuhome/tnde/scratch/felix/UNet/Unet_test_preds_modis_new/predicted_*"
modis_test_files = os.path.abspath(modis_test_path)
modis_unet_test_files_list = sorted(glob.glob(modis_test_files))
modis_unet_test_files_list[:5]

### U-Net actual vs downscaled images

In [None]:
# goes_blue_sky = "/bsuhome/tnde/scratch/felix/GOES/data/ABI-L2-LSAC/2023/012/18/OR_ABI-L2-LSAC-M6_G16_s20230121826172_e20230121828545_c20230121830487.nc"
goes_blue_sky = "/bsuhome/tnde/scratch/felix/GOES/data/ABI-L2-LSAC/2023/126/18/OR_ABI-L2-LSAC-M6_G16_s20231261826171_e20231261828544_c20231261830209.nc"
modis_blue_sky = "/bsuhome/tnde/scratch/felix/modis/blue_sky_albedo_sail_new/2021331_modis_blue_sky_albedo_.tif"
goes_blue_sky_predicted = "/bsuhome/tnde/scratch/felix/UNet/Unet_test_preds_modis_new/predicted_2023-01-12_modis_blue_sky_albedo_.tif"

# Visualize GOES Raw Data Clipped
goes_rxr = rxr.open_rasterio(goes_blue_sky, masked=True)
goes_reprojected = goes_rxr.rio.reproject("EPSG:32613")
goes_clipped = goes_reprojected["LSA"].rio.clip(boundary_box_utm, all_touched=True,from_disk=True, drop=True).squeeze()

# Plot
plt.figure(figsize=(8, 6))
goes_clipped.plot()
plt.title("GOES-R 2km Albedo Data")
plt.xlabel("Easting")
plt.ylabel("Northing")
plt.style.use("default")
plt.show()

In [None]:
# import matplotlib.pyplot as plt
# import numpy as np
# import rioxarray as rxr

# plt.style.use("default")

# goes_blue_sky = "/bsuhome/tnde/scratch/felix/GOES/data/ABI-L2-LSAC/2023/012/18/OR_ABI-L2-LSAC-M6_G16_s20230121826172_e20230121828545_c20230121830487.nc"
# modis_blue_sky = "/bsuhome/tnde/scratch/felix/modis/blue_sky_albedo_sail_new/2021331_modis_blue_sky_albedo_.tif"
# goes_blue_sky_predicted = "/bsuhome/tnde/scratch/felix/UNet/Unet_test_preds_modis_new/predicted_2023-01-12_modis_blue_sky_albedo_.tif"


goes_blue_sky = "/bsuhome/tnde/scratch/felix/GOES/data/ABI-L2-LSAC/2023/126/18/OR_ABI-L2-LSAC-M6_G16_s20231261826171_e20231261828544_c20231261830209.nc"
modis_blue_sky = "/bsuhome/tnde/scratch/felix/modis/blue_sky_albedo_sail_new/2023126_modis_blue_sky_albedo_.tif"
goes_blue_sky_predicted = "/bsuhome/tnde/scratch/felix/UNet/Unet_test_preds_modis_new/predicted_2023-05-06_modis_blue_sky_albedo_.tif"

# goes_blue_sky = "/bsuhome/tnde/scratch/felix/GOES/data/ABI-L2-LSAC/2023/156/18/OR_ABI-L2-LSAC-M6_G16_s20231561826179_e20231561828552_c20231561830350.nc"
# modis_blue_sky = "/bsuhome/tnde/scratch/felix/modis/blue_sky_albedo_sail_new/2023156_modis_blue_sky_albedo_.tif"
# goes_blue_sky_predicted = "/bsuhome/tnde/scratch/felix/UNet/Unet_test_preds_modis_new/predicted_2023-06-05_modis_blue_sky_albedo_.tif"

# -----------------------------
# GOES: reproject + clip + scale
# -----------------------------
goes_rxr = rxr.open_rasterio(goes_blue_sky, masked=True)
goes_reprojected = goes_rxr.rio.reproject("EPSG:32613")

# boundary_box_utm must be a geometry / GeoDataFrame in EPSG:32613
goes_clipped = (
    goes_reprojected["LSA"]
    .rio.clip(boundary_box_utm, all_touched=True, from_disk=True, drop=True)
    .squeeze()
)

scale_factor = 0.0001
goes_scaled = goes_clipped * scale_factor

# -----------------------------
# MODIS & predicted MODIS
# -----------------------------
modis_da = rxr.open_rasterio(modis_blue_sky, masked=True)
if "band" in modis_da.dims:
    modis_da = modis_da.sel(band=1, drop=True)

pred_da = rxr.open_rasterio(goes_blue_sky_predicted, masked=True)
if "band" in pred_da.dims:
    pred_da = pred_da.sel(band=1, drop=True)

# Optional physical clipping
def clip01(a):
    return a.clip(min=0.0, max=1.0)

goes_scaled = clip01(goes_scaled)
modis_da    = clip01(modis_da)
pred_da     = clip01(pred_da)

# -----------------------------
# Shared vmin/vmax across all three
# -----------------------------
vals_all = np.concatenate([
    goes_scaled.values[np.isfinite(goes_scaled.values)],
    modis_da.values[np.isfinite(modis_da.values)],
    pred_da.values[np.isfinite(pred_da.values)],
])

if vals_all.size > 0:
    vmin, vmax = np.nanpercentile(vals_all, [2, 98])
else:
    vmin, vmax = 0.0, 1.0

# -----------------------------
# Plot: three panels + one colorbar
# -----------------------------
fig, axes = plt.subplots(1, 3, figsize=(15, 4), constrained_layout=False)

# GOES
im0 = goes_scaled.plot.imshow(
    ax=axes[0],
    cmap="viridis",
    add_colorbar=False,
    vmin=vmin,
    vmax=vmax,
)
axes[0].set_title("GOES LSAC Blue-sky Albedo")
axes[0].set_xlabel("Easting (m)")
axes[0].set_ylabel("Northing (m)")
axes[0].tick_params(axis="x", rotation=25)

# MODIS
im1 = modis_da.plot.imshow(
    ax=axes[1],
    cmap="viridis",
    add_colorbar=False,
    vmin=vmin,
    vmax=vmax,
)
axes[1].set_title("MODIS Blue-sky Albedo")
axes[1].set_xlabel("Easting (m)")
axes[1].set_ylabel("Northing (m)")
axes[1].tick_params(axis="x", rotation=25)

# Predicted MODIS (U-Net)
im2 = pred_da.plot.imshow(
    ax=axes[2],
    cmap="viridis",
    add_colorbar=False,
    vmin=vmin,
    vmax=vmax,
)
axes[2].set_title("U-Net Predicted GOES-R Blue-sky Albedo")
axes[2].set_xlabel("Easting (m)")
axes[2].set_ylabel("Northing (m)")
axes[2].tick_params(axis="x", rotation=25)

# Shared colorbar on the right
cax = fig.add_axes([0.92, 0.15, 0.015, 0.7])  # [left, bottom, width, height]
cbar = fig.colorbar(im2, cax=cax)
cbar.set_label("Blue-sky Albedo")

plt.subplots_adjust(right=0.9, wspace=0.25)
plt.show()
