# Data collection

## [NASA Prediction of Worldwide Energy Resources (POWER)](https://aws.amazon.com/marketplace/pp/prodview-agetvmpkkzb7a?sr=0-1&ref_=beagle&applicationId=AWSMPContessa#resources)

In [25]:
'''
*Version: 1.0 Published: 2024/02/14* Source: [NASA POWER](https://power.larc.nasa.gov/)
POWER Remotely Connect to, Slice, and Download from a POWER Zarr via Python
This is an overview of the process to connect to and download from a POWER Zarr-formatted ARD via Python.
'''

import os
import fsspec

import pandas as pd
import xarray as xr

from datetime import datetime

"Between 2000 and 2016, all countries experienced incidences of drought: Botswana and Zimbabwe experienced the highest number of drought events with 5 and 7 years out of 16, respectively." - [link](https://www.frontiersin.org/articles/10.3389/fsufs.2023.1159901/full)
So between 

In [26]:
# Specify the time range for daily data (for example, for the year 2022)
start_date = '2001-01-01'
end_date = '2024-04-01'

# Latitude and Longitude ranges for Zimbabwe
# lat_range = (-22.4, -15.6)  # South to North
# lon_range = (25, 33)   # West to East

# South Sudan and Ethopia
lat_range = (4, 13.5)  # South to North
lon_range = (23, 42)  # West to East


In [27]:
import folium
from folium import Rectangle

def visualize_area(lat_range, lon_range):
    """
    Visualizes a specified area on a map using Folium.

    Parameters:
    - lat_range (tuple): A tuple of (min_latitude, max_latitude) for the area.
    - lon_range (tuple): A tuple of (min_longitude, max_longitude) for the area.

    Returns:
    - A Folium Map object centered around the midpoint of the specified area with a rectangle overlay of the area.
    """
    # Calculate the center of the area
    center_lat = (lat_range[0] + lat_range[1]) / 2
    center_lon = (lon_range[0] + lon_range[1]) / 2

    # Create a Folium map centered around the calculated midpoint
    map_centered = folium.Map(location=[center_lat, center_lon], zoom_start=6)

    # Define the bounds of the rectangle for the specified area
    bounds = [(lat_range[0], lon_range[0]), (lat_range[1], lon_range[1])]

    # Create a rectangle overlay and add it to the map
    folium.Rectangle(bounds=bounds, color='#ff7800', fill=True, fill_opacity=0.2).add_to(map_centered)

    # Display the map
    return map_centered

In [28]:
# Visualize the area
map_visualization = visualize_area(lat_range, lon_range)

# To display the map in a Jupyter Notebook, simply call the map object
map_visualization

In [29]:
# Define filepaths for different datasets
filepaths = [
    'https://power-analysis-ready-datastore.s3.us-west-2.amazonaws.com/power_901_daily_meteorology_utc.zarr',
    'https://power-analysis-ready-datastore.s3.us-west-2.amazonaws.com/power_901_daily_precipitation_utc.zarr',
    'https://power-analysis-ready-datastore.s3.us-west-2.amazonaws.com/power_901_daily_radiation_utc.zarr'
]

In [30]:
def load_and_process_dataset(filepath: str, start_date: str, end_date: str, lat_range: tuple, lon_range: tuple) -> xr.Dataset:
    """
    Loads and processes a dataset from a given Zarr store, slicing it according to specified time, latitude, and longitude ranges.
    
    Parameters:
    - filepath (str): URL to the Zarr store.
    - start_date (str): Start date in the format YYYY-MM-DD.
    - end_date (str): End date in the format YYYY-MM-DD.
    - lat_range (tuple): Latitude range as (start, end).
    - lon_range (tuple): Longitude range as (start, end).
    
    Returns:
    - xr.Dataset: An xarray Dataset containing the sliced data.
    """
    filepath_mapped = fsspec.get_mapper(filepath)
    ds = xr.open_zarr(filepath_mapped, consolidated=True)
    ds_selected_all = {}
    
    for variable in ds.data_vars:
        print(f"Downloading {variable}...")
        ds_selected_all[variable] = ds[variable].sel(
            time=pd.date_range(start=start_date, end=end_date, freq='D'),
            lat=slice(*lat_range),
            lon=slice(*lon_range)
        ).load()
        
    ds_combined = xr.Dataset(ds_selected_all)
    return ds_combined

In [31]:
datasets = [load_and_process_dataset(filepath, start_date, end_date, lat_range, lon_range) for filepath in filepaths]

Downloading CDD0...
Downloading CDD10...
Downloading CDD18_3...
Downloading DISPH...
Downloading EVLAND...
Downloading EVPTRNS...
Downloading FROST_DAYS...
Downloading FRSEAICE...
Downloading FRSNO...
Downloading GWETPROF...
Downloading GWETROOT...
Downloading GWETTOP...
Downloading HDD0...
Downloading HDD10...
Downloading HDD18_3...
Downloading PBLTOP...
Downloading PRECSNOLAND...
Downloading PRECTOTCORR...
Downloading PS...
Downloading QV10M...
Downloading QV2M...
Downloading RH2M...
Downloading RHOA...
Downloading SLP...
Downloading SNODP...
Downloading T10M...
Downloading T10M_MAX...
Downloading T10M_MIN...
Downloading T10M_RANGE...
Downloading T2M...
Downloading T2MDEW...
Downloading T2MWET...
Downloading T2M_MAX...
Downloading T2M_MIN...
Downloading T2M_RANGE...
Downloading TO3...
Downloading TQV...
Downloading TROPPB...
Downloading TROPQ...
Downloading TROPT...
Downloading TS...
Downloading TS_MAX...
Downloading TS_MIN...
Downloading TS_RANGE...
Downloading U10M...
Downloading U

In [32]:
# Reindex datasets to match the first dataset's coordinates and merge them
common_coords = {'lat': datasets[0].lat, 'lon': datasets[0].lon, 'time': datasets[0].time}
reindexed_datasets = [ds.reindex(lat=common_coords['lat'], lon=common_coords['lon'], time=common_coords['time'], method='nearest') for ds in datasets]
ds_combined = xr.merge(reindexed_datasets)

In [33]:
ds_combined

In [34]:
output = r'' # if none the location of the script is where the files will be outputted.

# export region as NetCDF4
ds_combined.to_netcdf(path=os.path.join(output, "power_nasa_data.nc"))