In [1]:
import os
import pandas as pd
import geopandas as gpd

from osgeo import gdal
import numpy as np
import matplotlib.pyplot as plt

In [2]:
def get_capa_daily_data(year, month, day):
    # Archived "nowcast"
    url = f"/vsicurl/https://collaboration.cmc.ec.gc.ca/science/outgoing/capa.grib/{year}{month}{day}12_000_CMC_RDPA_APCP-024-0700cutoff_SFC_0_ps10km.grib2"
    ds = gdal.Open(url)
    if ds is None:
        # Recent "nowcast"
        url = f"/vsicurl/https://dd.weather.gc.ca/analysis/precip/rdpa/grib2/polar_stereographic/24/CMC_RDPA_APCP-024-0700cutoff_SFC_0_ps10km_{year}{month}{day}12_000.grib2"
        ds = gdal.Open(url)
    
    if ds is None:
        # Archived "hindcast"
        url = f"/vsicurl/https://collaboration.cmc.ec.gc.ca/science/outgoing/capa.grib/hindcast/capa_hindcast_v2.4/24h/{year}{month}{day}12_000_CMC_RDPA_APCP-024_SFC_0_ps10km.grib2"
        ds = gdal.Open(url)
    
    # https://eccc-msc.github.io/open-data/msc-data/nwp_rdpa/readme_rdpa-datamart_en/
    # Band 0 (1) is Analysis of Accumulated Precipitation on a 06hr or 24hr interval
    # Band 1 (2) is Confidence Index for Analysis
    try:
        band = ds.GetRasterBand(1)
        dataset = band.ReadAsArray()
        dataset[dataset == band.GetNoDataValue()] = np.nan
        dataset = np.flip(dataset, 0)
        flag = 0
    except AttributeError:
        dataset = 0.0
        flag = 1
    
    return dataset, flag

In [15]:
start_yr = 2021
end_yr = 2021
tot = np.zeros((end_yr-start_yr+1,824,935))
flg = np.zeros((end_yr-start_yr+1))

dt = pd.to_datetime(np.arange(f'{start_yr}-01-01',f'{end_yr+1}-01-01',dtype='datetime64[D]'))  # end_yr + 1 to be inclusive

cnt = 0
for i in dt:
    ds, f = get_capa_daily_data(i.strftime('%Y'), 
                                i.strftime('%m'), 
                                i.strftime('%d'))  # get total precip for each grid
    flg[i.year - start_yr] += f
    tot[i.year - start_yr,:,:] += ds

In [4]:
flow_stn = ['07BE001', '07DD001', '07DA001', '07DA018', '07DA040', '07DA033', '07CE013', '07CE002', '07CE007',
            '07CD005', '07CD001', '07DB002', '07DB003', '07DA038', '07DA039', '07DA032', '07DA041', '07DC001',
            '07DC003', '07CE008', '07CD004', '07CD008', '07CD009', '07CB002', '07DA027', '07CE005', '07DA026',
            '07DA030', '07DB006', '07DB001', '07DC004', '07DA035', '07DA029', '07DA028', '07DA008', '07DA034',
            '07CE003', '07DA007', '07DA042', '07DA044', '07DA006', '07CE010', '07DA037', '07DA045']
level_stn = ['07CE001', '07DA024', '07DA023', '07DA025']
stations = [*flow_stn, *level_stn]
df_map_rdpa = []
for sta in stations:
    shapefile = os.path.join(r'C:\Users\LeachJ\Documents\06_Oil Sands Monitoring Group\gis', 
                             'rdpa_grids', 
                             f'CaPA_grid_{sta}.shp')
    if os.path.exists(shapefile):
        gdf = gpd.read_file(shapefile)
        gdf['Weight'] = gdf.area / np.sum(gdf.area)
        
        for cnt, yyyy in enumerate(range(start_yr, end_yr+1)):
            MAP = 0
            for i in range(0, len(gdf)):
                # -1 for nj and ni because the index in the shapefiles start at 1 instead of 0
                MAP += tot[cnt, gdf['nj'][i]-1, gdf['ni'][i]-1].squeeze() * gdf['Weight'][i]
            # MAP = MAP / len(gdf)
            
            data = {'StationID': sta, 'YEAR': yyyy, 'VALUE': MAP, 'MISSING': flg[cnt]}
            df_map_rdpa.append(pd.DataFrame(data, index=[cnt]))

df_map_rdpa = pd.concat(df_map_rdpa)
df_map_rdpa.reset_index(drop=True, inplace=True)
df_map_rdpa.to_csv(f"CaPA_MAP_{start_yr}-{end_yr}.csv", index=False)

In [5]:
ni = []
nj = []
lat = []
lon = []
totP = []
df_grid_rdpa = []
shapefile = os.path.join(r'C:\Users\LeachJ\Documents\5_Oil Sands Monitoring Group\gis', 
                         'rdpa_grids', 
                         'CaPA_grid_07DD001.shp')
if os.path.exists(shapefile):
    gdf = gpd.read_file(shapefile)
        
    for cnt, yyyy in enumerate(range(start_yr, end_yr+1)):
        for i in range(0, len(gdf)):
            # -1 for nj and ni because the index in the shapefiles start at 1 instead of 0
            totP.append(tot[cnt, gdf['nj'][i]-1, gdf['ni'][i]-1].squeeze())
            ni.append(gdf['ni'][i])
            nj.append(gdf['nj'][i])
            lat.append(gdf['Latitude'][i])
            lon.append(gdf['Longitude'][i])
                
        data = {'ni': ni, 'nj': nj, 'Longitude': lon, 'Latitude': lat, 'TotalPrecipitation': totP, 'Year': yyyy}
        df_grid_rdpa.append(pd.DataFrame(data))

df_grid_rdpa = pd.concat(df_grid_rdpa)
df_grid_rdpa.reset_index(drop=True, inplace=True)
df_grid_rdpa.to_csv(f"CaPA_grid_total_precip_{start_yr}-{end_yr}.csv", index=False)

In [6]:
"""dfcoord = pd.read_csv('10km_res')
# coordinates of the first grid point should be 18.1429° N 142.8968° W
dfcoord['Longitude'] = dfcoord['Longitude'] - 360  - 0.0043 + 0.000044
dfcoord['Latitude'] = dfcoord['Latitude'] - 0.002130
# set starting index to 0,0
dfcoord['ni'] = dfcoord['ni'] - 1
dfcoord['nj'] = dfcoord['nj'] - 1
lon = pd.pivot_table(dfcoord, values='Longitude', index=['nj'], columns=['ni'])
lat = pd.pivot_table(dfcoord, values='Latitude', index=['nj'], columns=['ni'])

plt.scatter(lon.values, lat.values, c=tot)"""

"dfcoord = pd.read_csv('10km_res')\n# coordinates of the first grid point should be 18.1429° N 142.8968° W\ndfcoord['Longitude'] = dfcoord['Longitude'] - 360  - 0.0043 + 0.000044\ndfcoord['Latitude'] = dfcoord['Latitude'] - 0.002130\n# set starting index to 0,0\ndfcoord['ni'] = dfcoord['ni'] - 1\ndfcoord['nj'] = dfcoord['nj'] - 1\nlon = pd.pivot_table(dfcoord, values='Longitude', index=['nj'], columns=['ni'])\nlat = pd.pivot_table(dfcoord, values='Latitude', index=['nj'], columns=['ni'])\n\nplt.scatter(lon.values, lat.values, c=tot)"

In [7]:
#dfCSL = pd.read_csv(r'C:\Users\leachj\Documents\5_Oil Sands Monitoring Group\gis\Climate_station_locations.csv')

In [8]:
"""for i in range(0, len(dfCSL)):
    staidx = np.argmin(np.sqrt((dfcoord['Latitude'] - dfCSL['Latitude'][i]) ** 2 + 
                               (dfcoord['Longitude'] - dfCSL['Longitude'][i]) ** 2))
    grid_tot = tot[:, int(dfcoord.loc[staidx]['nj']), int(dfcoord.loc[staidx]['ni'])]
    print(f'{dfCSL["Name"][i]} precip for 2002: {grid_tot[0]:.0f} mm')"""

'for i in range(0, len(dfCSL)):\n    staidx = np.argmin(np.sqrt((dfcoord[\'Latitude\'] - dfCSL[\'Latitude\'][i]) ** 2 + \n                               (dfcoord[\'Longitude\'] - dfCSL[\'Longitude\'][i]) ** 2))\n    grid_tot = tot[:, int(dfcoord.loc[staidx][\'nj\']), int(dfcoord.loc[staidx][\'ni\'])]\n    print(f\'{dfCSL["Name"][i]} precip for 2002: {grid_tot[0]:.0f} mm\')'