In [32]:
import numpy as np
import pandas as pd

In [30]:
# max and min locations for streams
max_lat = 61.56044
min_lat = 48.57835
max_lon = -114.53583
min_lon = -137.97775

In [2]:
# %pip install netCDF4
from netCDF4 import Dataset

# Define the path to the file
data_dir = 'data/'
file_path = data_dir + 'Rainf_WFDE5_CRU_201101_v2.1.nc'

# Load the data
test_data = Dataset(file_path)

# Display the data
print(test_data)

<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4 data model, file format HDF5):
    title: WATCH Forcing Data methodology applied to ERA5 data
    institution: Copernicus Climate Change Service
    contact: http://copernicus-support.ecmwf.int
    comment: Methodology implementation for ERA5 and dataset production by B-Open Solutions for the Copernicus Climate Change Service in the context of contract C3S_25c
    Conventions: CF-1.7
    summary: ERA5 data regridded to half degree regular lat-lon; Genuine land points from CRU grid and ERA5 land-sea mask only; Rainf bias-corrected using CRU TS4.04 wet days & CRU TS4.04 precip totals, catch correction, and precip phase correction according to elevation and bias-corrected Tair
    reference: Cucchi et al., 2020, Earth Syst. Sci. Data, 12(3), 2097–2120, doi:10.5194/essd-12-2097-2020; Weedon et al., 2014, Water Resources Res., 50, 7505-7514, doi:10.1002/2014WR015638; Harris et al., 2020, Scientific Data, 7(1), doi:10.1038/s41597-020-045

In [None]:
test_data.variables.keys()

dict_keys(['lat', 'lon', 'time', 'Rainf'])

In [10]:
test_data.variables['lat'][:]

masked_array(data=[-89.75, -89.25, -88.75, -88.25, -87.75, -87.25, -86.75,
                   -86.25, -85.75, -85.25, -84.75, -84.25, -83.75, -83.25,
                   -82.75, -82.25, -81.75, -81.25, -80.75, -80.25, -79.75,
                   -79.25, -78.75, -78.25, -77.75, -77.25, -76.75, -76.25,
                   -75.75, -75.25, -74.75, -74.25, -73.75, -73.25, -72.75,
                   -72.25, -71.75, -71.25, -70.75, -70.25, -69.75, -69.25,
                   -68.75, -68.25, -67.75, -67.25, -66.75, -66.25, -65.75,
                   -65.25, -64.75, -64.25, -63.75, -63.25, -62.75, -62.25,
                   -61.75, -61.25, -60.75, -60.25, -59.75, -59.25, -58.75,
                   -58.25, -57.75, -57.25, -56.75, -56.25, -55.75, -55.25,
                   -54.75, -54.25, -53.75, -53.25, -52.75, -52.25, -51.75,
                   -51.25, -50.75, -50.25, -49.75, -49.25, -48.75, -48.25,
                   -47.75, -47.25, -46.75, -46.25, -45.75, -45.25, -44.75,
                   -44.25

In [25]:
test_data.variables['time']

<class 'netCDF4._netCDF4.Variable'>
int64 time(time)
    standard_name: time
    long_name: Time
    axis: T
    units: hours since 1900-01-01
    calendar: proleptic_gregorian
unlimited dimensions: 
current shape = (744,)
filling on, default _FillValue of -9223372036854775806 used

In [29]:
test_data.variables['Rainf']

<class 'netCDF4._netCDF4.Variable'>
float32 Rainf(time, lat, lon)
    _FillValue: 1e+20
    units: kg m-2 s-1
    long_name: Rainfall Flux
    standard_name: rainfall_flux
unlimited dimensions: 
current shape = (744, 360, 720)
filling on

In [20]:
test_time = test_data.variables['time'][:][0]

In [None]:
import datetime
# convert from hours since 1900-01-01 00:00:00 to a human-readable date
print(datetime.datetime.fromtimestamp(test_time))

1970-01-11 22:16:48


In [17]:
# get only v2.1 data

# collect all the files
import os
files = os.listdir(data_dir)
filtered_files = []

# filter out the v2.1 files
for file in files:
    version = file.split('_')[-1]
    if version == 'v2.1.nc':
        filtered_files.append(file)

len(filtered_files)

108

In [None]:
# Initialize time and rainfall arrays
times = np.array([])
rainfall = None  # Start with None, initialize after first file

for file in filtered_files[:3]:
    data = Dataset(data_dir + file)

    # Append times
    times = np.append(times, data.variables['time'][:])

    # Initialize rainfall array if it's the first file
    if rainfall is None:
        rainfall = data.variables['Rainf'][:]
    else:
        # Concatenate along the first axis
        rainfall = np.concatenate((rainfall, data.variables['Rainf'][:]), axis=0)

rainfall.shape


(744, 360, 720)
(744, 360, 720)
(672, 360, 720)
(1416, 360, 720)
(744, 360, 720)
(2160, 360, 720)


(2160, 360, 720)