In [1]:
import xarray as xr
import pandas as pd
import numpy as np
import stackstac
import planetary_computer as pc
from dask.distributed import Client, LocalCluster
import matplotlib.pyplot as plt
import warnings

warnings.filterwarnings("ignore")

# Import your modular function
from env_function import environmental_variables

# Start Dask (safe + required)
cluster = LocalCluster(
    n_workers=4,
    threads_per_worker=2,
    memory_limit="8GB"
)
client = Client(cluster)

client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 4
Total threads: 8,Total memory: 29.80 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:37825,Workers: 4
Dashboard: http://127.0.0.1:8787/status,Total threads: 8
Started: Just now,Total memory: 29.80 GiB

0,1
Comm: tcp://127.0.0.1:44833,Total threads: 2
Dashboard: http://127.0.0.1:42767/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:33033,
Local directory: /tmp/dask-scratch-space/worker-t5bi4adt,Local directory: /tmp/dask-scratch-space/worker-t5bi4adt

0,1
Comm: tcp://127.0.0.1:37433,Total threads: 2
Dashboard: http://127.0.0.1:40989/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:45387,
Local directory: /tmp/dask-scratch-space/worker-7vn6fsli,Local directory: /tmp/dask-scratch-space/worker-7vn6fsli

0,1
Comm: tcp://127.0.0.1:43177,Total threads: 2
Dashboard: http://127.0.0.1:44165/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:40909,
Local directory: /tmp/dask-scratch-space/worker-ue5gz1h9,Local directory: /tmp/dask-scratch-space/worker-ue5gz1h9

0,1
Comm: tcp://127.0.0.1:45603,Total threads: 2
Dashboard: http://127.0.0.1:44843/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:44429,
Local directory: /tmp/dask-scratch-space/worker-9fov20d0,Local directory: /tmp/dask-scratch-space/worker-9fov20d0


In [2]:
bbox = (-82.7167, 27.5833, -82.3833, 28.0333)  # Tampa Bay
start_date = "2019-01-01"
end_date = "2024-12-31"

env_data = environmental_variables(
    bbox=bbox,
    start_date=start_date,
    end_date=end_date,
    variables=["sst", "precip"]
)

sst_lazy = env_data["sst"]          # xarray DataArray (lazy)
precip_items = env_data["precip"]   # list of STAC Items

type(sst_lazy), type(precip_items)


Note: SST conversion (Kelvin to Celsius) applied.


(xarray.core.dataarray.DataArray, list)

In [3]:
# Refresh SAS tokens (SIGN EACH ITEM — REQUIRED)
for item in precip_items:
    pc.sign_inplace(item)

# Uniform subsample across entire time range
max_items = 800
indices = np.linspace(
    0, len(precip_items) - 1,
    max_items,
    dtype=int
)
precip_sample = [precip_items[i] for i in indices]

len(precip_sample)

800

In [4]:
precip_stack = stackstac.stack(
    precip_sample,
    assets=["cog"],
    epsg=4326,
    fill_value=np.nan,
)

precip_monthly = (
    precip_stack
    .mean(dim=["x", "y"])
    .squeeze(drop=True)
    .resample(time="1ME")
    .sum(min_count=1)
)

In [None]:
final_ds = xr.Dataset({
    "sst": sst_lazy,
    "precip": precip_monthly
})

# Compute only the final result (safe)
final_df = final_ds.compute().to_dataframe()

final_df.head()

In [None]:
plt.figure(figsize=(12, 5))
plt.plot(final_df.index, final_df["sst"], label="Monthly SST (°C)")
plt.plot(final_df.index, final_df["precip"], label="Monthly Precip (mm)")
plt.title("Monthly SST & Precipitation (2019–2024)")
plt.xlabel("Time")
plt.ylabel("Value")
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# --- 1. Separate DataFrames ---

# Select the SST column and save it to a new DataFrame
sst_df = final_df[['sst']].copy()

# Select the Precipitation column and save it to a new DataFrame
precip_df = final_df[['precip']].copy()

# --- 2. Define File Paths ---
sst_path = "kalu_sst_data.csv"
precip_path = "kalu_precip_data.csv"

# --- 3. Save to CSV ---
sst_df.to_csv(sst_path, index=True, index_label='time')
print(f"✔ Successfully saved SST data to: {sst_path}")

precip_df.to_csv(precip_path, index=True, index_label='time')
print(f"✔ Successfully saved Precipitation data to: {precip_path}")

# --- 4. Cleanup Dask Resources (Crucial) ---
try:
    client.close()
    cluster.close()
    print("\nDask client and cluster successfully shut down.")
except Exception as e:
    print(f"Could not shut down Dask resources: {e}")

# Display the paths

In [None]:
from kalu_analysis import (
    load_env_timeseries,
    join_indices_and_env,
    compute_correlations_and_rmse,
)
from grace_functions import compute_indices

# 1) Define study box + time
bbox = (-82.7167, 27.5833, -82.3833, 28.0333)  # Tampa Bay
start_date = "2019-01-01"
end_date = "2024-12-31"

# 2) Grace's function: monthly NDWI/NDTI/NDCI over Tampa Bay
indices_ds = compute_indices(bbox=bbox, start_date=start_date, end_date=end_date)

# 3) Kalu's function: monthly SST
sst_da = load_env_timeseries(bbox=bbox, start_date=start_date, end_date=end_date)

# 4) Join and compute stats
df = join_indices_and_env(indices_ds, sst_da)
stats = compute_correlations_and_rmse(df)

stats

