In [85]:
import gzip
import struct
import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd
import rasterio
from rasterio.transform import from_origin
from rasterio.mask import mask
import urllib.request
from datetime import date
from datetime import timedelta



def read_persiann_css_online(url, nrow, ncol, dtype=np.int16):
    """Downloads, decompresses a Persiann CCS .bin.gz file, converts it to a NumPy array,
        sets -9999 values to NaN, divides all values by 100, and reshapes to 2x2.

        Args:
            url: The URL of the Persiann CCS .bin.gz file.
            dtype: The desired data type for the NumPy array (default: np.float32).

        Returns:
            A reshaped NumPy array containing the processed data as a nrow*ncol matrix.

        Raises:
            URLError: If an error occurs while downloading the file.
            ValueError: If the decompressed data size is not compatible with nrow*ncol reshape.
    """
    # Try opening the URL and decompressing the data
    try:
        with urllib.request.urlopen(url) as response:
            compressed_data = response.read()
            decompressed_data = gzip.decompress(compressed_data)
            # Convert to NumPy array
            data_array = np.frombuffer(decompressed_data, dtype=dtype)
            data_array = data_array.astype(np.float16)
            # Set -9999 values to NaN
            data_array[data_array == -9999] = np.nan
            # Divide all values by 100
            data_array = data_array / 100
            # reshape the array
            data_array=np.reshape(data_array,(nrow,ncol))
            return data_array
    except urllib.error.URLError as e:
        raise urllib.error.URLError(f"Error reading file from {url}: {e}")

def iterate_between_days(start_date, end_date):
  """Iterates through all days between the given start and end date (inclusive).

  Args:
      start_date: The starting date as a datetime.date object.
      end_date: The ending date as a datetime.date object.

  Yields:
      A datetime.date object for each day between the start and end date (inclusive).
  """

  # Ensure start date is less than or equal to end date
  if start_date > end_date:
    raise ValueError("Start date must be less than or equal to end date.")

  current_date = start_date
  while current_date <= end_date:
    yield current_date
    current_date += timedelta(days=1)

def format_number_with_zeros(number, desired_digits):
  """Formats a number with leading zeros to reach the desired number of digits.

  Args:
      number: The integer to format.
      desired_digits: The desired number of digits in the output string.

  Returns:
      A string representation of the number with leading zeros if needed.
  """

  if not isinstance(number, int) or desired_digits <= 0:
    raise ValueError("Invalid input: number must be an integer and desired_digits must be positive.")

  # Convert the number to a string
  number_str = str(number)

  # Add leading zeros if needed
  num_leading_zeros = desired_digits - len(number_str)
  formatted_string = "0" * num_leading_zeros + number_str

  return formatted_string

def iter_url(start_year, start_month, start_day, end_year, end_month, end_day, interval):
    """
    Generates a set of URLs based on the given parameters and date range.

    Args:
        start_year: The starting year of the date range.
        start_month: The starting month of the date range.
        start_day: The starting day of the date range.
        end_year: The ending year of the date range.
        end_month: The ending month of the date range.
        end_day: The ending day of the date range.
        interval: The interval between URLs in hours (e.g., 3 for every 3 hours).

    Returns:
        A set containing all generated URLs.
    """

    # Validate input (optional)
    # ... (add checks for valid date range, interval, etc., if needed)
    start_date = date(start_year, start_month, start_day)  # Replace with your desired start date
    end_date = date(end_year, end_month, end_day)  # Replace with your desired end date
    url_base = "https://persiann.eng.uci.edu/CHRSdata/PERSIANN-CCS/"
    # Initialize an empty set to store URLs
    urls = set()
    for day in iterate_between_days(start_date, end_date):
        doy = day.timetuple().tm_yday  # Day of the year
        year_2d = str(day.year)[-2:]  # Last two digits of the year
        doy_formatted = format_number_with_zeros(doy, 3)
        for hh in range(0, 22, interval):
            hh_formatted = format_number_with_zeros(hh, 2)  # Hour with leading zeros
            # Construct the URL and add it to the set
            url = url_base + str(interval) + "hrly/" + "rgccs" + str(interval) + "h" + year_2d + doy_formatted + hh_formatted + '.bin.gz'
            urls.add(url)
    return urls

def read_persiann_ccs(file_path):
    ncols, nrows = 9000, 3000
    data = np.zeros((nrows, ncols), dtype=np.float32)  # Initialize data array

    with gzip.open(file_path, 'rb') as f:
        for i in range(nrows):
            for j in range(ncols):
                # Read two bytes from the file, big-endian format
                val = struct.unpack('>h', f.read(2))[0]
                # Convert to mm/3hr, handling the no-data value
                data[i, j] = np.nan if val == -9999 else val / 100.0

    return data

def convert_to_geotiff(data, geotiff_path):
    transform = from_origin(-180, 60, 0.04, 0.04)
    metadata = {
        'driver': 'GTiff',
        'height': data.shape[0],
        'width': data.shape[1],
        'count': 1,
        'dtype': 'float32',
        'crs': '+proj=latlong',
        'transform': transform
    }
    
    with rasterio.open(geotiff_path, 'w', **metadata) as dst:
        dst.write(data, 1)

def clip_raster_with_gpkg(raster_path, gpkg_path, clipped_raster_path):
    gdf = gpd.read_file(gpkg_path)
    gdf = gdf.to_crs(crs='+proj=latlong')
    
    with rasterio.open(raster_path) as src:
        out_image, out_transform = mask(src, gdf.geometry, crop=True)
        out_meta = src.meta.copy()
        
        out_meta.update({
            'driver': 'GTiff',
            'height': out_image.shape[1],
            'width': out_image.shape[2],
            'transform': out_transform
        })
        
        with rasterio.open(clipped_raster_path, 'w', **out_meta) as dest:
            dest.write(out_image)

def plot_clipped_data(clipped_raster_path):
    with rasterio.open(clipped_raster_path) as src:
        data = src.read(1)
        plt.figure(figsize=(12, 6))
        plt.imshow(data, cmap='viridis', origin='upper')
        plt.colorbar(label='Precipitation (mm/3hr)')
        plt.title('Clipped PERSIANN-CCS Precipitation')
        plt.xlabel('Longitude')
        plt.ylabel('Latitude')
        plt.show()

# # Paths
file_path = "C:/Users/liang.yang/Downloads/rgccs3h0300100.bin.gz"
# geotiff_path = '/home/hidrologia/Downloads/rgccs3h0300100.tif'
# gpkg_path = '/home/hidrologia/Pronostico_hidro/Inputs/Manduriacu_delimitada.gpkg'
# clipped_raster_path = '/home/hidrologia/Downloads/clipped_rgccs3h0300100.tif'

# # Read and convert the data to GeoTIFF
# data = read_persiann_ccs(file_path)
# convert_to_geotiff(data, geotiff_path)

# # Clip the GeoTIFF with the geopackage shapefile
# clip_raster_with_gpkg(geotiff_path, gpkg_path, clipped_raster_path)

# # Plot the clipped data
# plot_clipped_data(clipped_raster_path)


In [50]:
nrow = 3000
ncol = 9000
start_year=2024
start_month=1
start_day=1
end_year=2024
end_month=1
end_day=3
interval=3
for url in iter_url(start_year,start_month,start_day,end_year,end_month,end_day,interval):
  try:
    data = read_persiann_css_online(url,nrow,ncol)
    # Process the decompressed data here (e.g., print(data))
  except urllib.error.URLError as e:
    print(f"An error occurred: {e}")
  # integrate all the iterated data together
  

(3000, 9000)

In [88]:
iter_url(2014,1,1,2014,1,3,3)

{'https://persiann.eng.uci.edu/CHRSdata/PERSIANN-CCS/3hrly/rgccs3h1400100.bin.gz',
 'https://persiann.eng.uci.edu/CHRSdata/PERSIANN-CCS/3hrly/rgccs3h1400103.bin.gz',
 'https://persiann.eng.uci.edu/CHRSdata/PERSIANN-CCS/3hrly/rgccs3h1400106.bin.gz',
 'https://persiann.eng.uci.edu/CHRSdata/PERSIANN-CCS/3hrly/rgccs3h1400109.bin.gz',
 'https://persiann.eng.uci.edu/CHRSdata/PERSIANN-CCS/3hrly/rgccs3h1400112.bin.gz',
 'https://persiann.eng.uci.edu/CHRSdata/PERSIANN-CCS/3hrly/rgccs3h1400115.bin.gz',
 'https://persiann.eng.uci.edu/CHRSdata/PERSIANN-CCS/3hrly/rgccs3h1400118.bin.gz',
 'https://persiann.eng.uci.edu/CHRSdata/PERSIANN-CCS/3hrly/rgccs3h1400121.bin.gz',
 'https://persiann.eng.uci.edu/CHRSdata/PERSIANN-CCS/3hrly/rgccs3h1400200.bin.gz',
 'https://persiann.eng.uci.edu/CHRSdata/PERSIANN-CCS/3hrly/rgccs3h1400203.bin.gz',
 'https://persiann.eng.uci.edu/CHRSdata/PERSIANN-CCS/3hrly/rgccs3h1400206.bin.gz',
 'https://persiann.eng.uci.edu/CHRSdata/PERSIANN-CCS/3hrly/rgccs3h1400209.bin.gz',
 'ht