# Make Daymet Projection Data
Collect daymet data and make projection files per region

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
from glob import glob
import json
import geopandas as gpd
import pandas as pd
import numpy as np
import logging
from concurrent.futures import ProcessPoolExecutor
import matplotlib.pyplot as plt
import xarray as xr

from rex import Resource, MultiYearResource, init_logger
from region_classifier import RegionClassifier

from sup3r.preprocessing.data_handling.base import DataHandler
from sup3r.preprocessing.data_handling import DataHandlerNCforCC
from sup3r.preprocessing.data_handling import DataHandlerNCforCCwithPowerLaw
from sup3r.bias.bias_calc import SkillAssessment

from make_projection_summaries_cmip import get_fps, get_states_shape, get_targets_shapes, make_summary_files, FEATURES, MODELS, TAGS, REGIONS

DataHandlerNCforCC.CHUNKS = {'time': None, 'lat': None, 'lon': None}

In [None]:
logger = logging.getLogger(__name__)
init_logger(__name__)
init_logger('make_projection_summaries')
init_logger('sup3r', log_level='DEBUG')

In [None]:
fps = [f"/projects/alcaps/daymet/DaymetV4_VIC4_pr_{y}.h5" for y in range(1980, 2020)]

In [None]:
res = MultiYearResource(fps)

In [None]:
meta = res.meta
states, states_col = get_states_shape()
meta = RegionClassifier(meta, states, states_col).classify()
meta = meta.rename({'NAME': 'state'}, axis=1)

meta

In [None]:
def read(i_split_gids):
    with MultiYearResource(fps) as res:
        out = res['pr', :, i_split_gids].sum(axis=1)
    return out

In [None]:
for region, rstates in REGIONS.items():
    region = region.lower().replace(' ', '_')

    if region != 'conus':
        mask = meta['state'].isin(rstates)
        assert len(meta.loc[mask, 'state'].unique()) == len(rstates)
        rgids = np.where(mask)[0]
    
        fp_out = f'./projections/{region}_daymet_pr.csv'
        if not os.path.exists(fp_out):
            logger.info(f'Working on {region}')
            split_gids = np.array_split(rgids, 100)
            futures = []
            out = 0
            with ProcessPoolExecutor() as exe:
                for i_split_gids in split_gids:
                    future = exe.submit(read, i_split_gids)
                    futures.append(future)
                
                for i, (future, i_split_gids) in enumerate(zip(futures, split_gids)):
                    out += future.result()
                    logger.info('Finished future {} out of {}'.format(i+1, len(futures)))
            
            out /= len(rgids)
            
            df = pd.DataFrame({'DAYMET': out}, index=res.time_index)
            df.to_csv(fp_out)
            logger.info(f'Saved: {fp_out}')


In [None]:
df_conus = None
idfs = {}
n_total = 0
for region, rstates in REGIONS.items():
    region = region.lower().replace(' ', '_')

    if region != 'conus':
        mask = meta['state'].isin(rstates)
        assert len(meta.loc[mask, 'state'].unique()) == len(rstates)
        rgids = np.where(mask)[0]
    
        fp_out = f'./projections/{region}_daymet_pr.csv'

        idf = pd.read_csv(fp_out, index_col=0)
        idf.index = pd.to_datetime(idf.index)
        idfs[region] = idf.copy()
        idf *= len(rgids)
        n_total += len(rgids)

        if df_conus is None:
            df_conus = idf.copy()
        else:
            df_conus += idf.copy()

df_conus /= n_total

In [None]:
df_conus.to_csv(f'./projections/conus_daymet_pr.csv')
df_conus

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(15, 5))
df_conus.rolling(365).mean().iloc[::1].plot(ax=ax)
idfs['south'].rolling(365).mean().iloc[::1].plot(ax=ax)