# Make ERA5 Projection Data
Collect ERA5 data and make projection files per region

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
from glob import glob
import json
import geopandas as gpd
import pandas as pd
import numpy as np
import logging
from concurrent.futures import ProcessPoolExecutor
import matplotlib.pyplot as plt
import xarray as xr

from rex import Resource, MultiYearNSRDB, init_logger
from region_classifier import RegionClassifier

from sup3r.preprocessing.data_handling.base import DataHandler
from sup3r.preprocessing.data_handling import DataHandlerNCforERA, DataHandlerNCforCC
from sup3r.preprocessing.data_handling import DataHandlerNCforCCwithPowerLaw
from sup3r.bias.bias_calc import SkillAssessment

from make_projection_summaries_cmip import get_fps, get_countries_shape, get_states_shape, get_eez_shape, get_targets_shapes, make_summary_files, FEATURES, MODELS, TAGS, REGIONS

DataHandlerNCforCC.CHUNKS = {'time': None, 'lat': None, 'lon': None}

In [None]:
logger = logging.getLogger(__name__)
init_logger(__name__)
init_logger('make_projection_summaries_cmip')
init_logger('sup3r', log_level='DEBUG')

In [None]:
fp_out = './projections/conus_era5_trh.csv'
features = ['temperature_2m', 'relativehumidity_2m']
fps = [f"/projects/alcaps/era5_surface/era5_surface_{y}.nc" for y in range(1980, 2020)]

fp_out = './projections/conus_era5_windspeed_100m.csv'
features = ['windspeed_100m']
fps = [f"/projects/alcaps/era5/conus/yearly/era5_uv_{y}_combined_all_interp.nc" for y in range(2000, 2020)]

In [None]:
dh = DataHandlerNCforERA(fps, features)
dh

In [None]:
countries, countries_col = get_countries_shape()
states, states_col = get_states_shape()
eez, eez_col = get_eez_shape()
meta = RegionClassifier(dh.meta, countries, countries_col).classify()
meta = RegionClassifier(meta, states, states_col).classify()
meta = RegionClassifier(meta, eez, eez_col).classify()
meta['atlantic'] = (meta[countries_col] == '-999') & (meta[eez_col] != '-999') & (meta['longitude'] < -105)
meta['gulf'] = (meta[countries_col] == '-999') & (meta[eez_col] != '-999') & (meta['longitude'] > -105) & (meta['longitude'] < -81)
meta['pacific'] = (meta[countries_col] == '-999') & (meta[eez_col] != '-999') & (meta['longitude'] > -81)

In [None]:
meta

In [None]:
all_df = {}
arr = dh.data.reshape((dh.data.shape[0]*dh.data.shape[1], dh.data.shape[2], dh.data.shape[3]))

for rname, states in REGIONS.items():
    iloc = np.where(meta['NAME'].isin(states))[0]

    df = {dset: arr[iloc, :, idf].mean(0) for idf, dset in enumerate(dh.features)}
    df = pd.DataFrame(df, index=dh.time_index)
    all_df[rname] = df

    r_fp_out = fp_out.replace('conus_', rname.lower().replace(' ', '_') + '_')
    df.to_csv(r_fp_out)
    logger.info(f'Finished writing: {r_fp_out}')

for rname in ('atlantic', 'gulf', 'pacific'):
    iloc = np.where(meta[rname])[0]
    
    df = {dset: arr[iloc, :, idf].mean(0) for idf, dset in enumerate(dh.features)}
    df = pd.DataFrame(df, index=dh.time_index)
    all_df[rname] = df

    r_fp_out = fp_out.replace('conus_', rname.lower().replace(' ', '_') + '_')
    df.to_csv(r_fp_out)
    logger.info(f'Finished writing: {r_fp_out}')

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(15, 5))
tslice=slice(4000, 4500)
tslice=slice(-500, None)
for rname, states in REGIONS.items():
    df = all_df[rname]
    plt.plot(df.loc[df.index.values[tslice], features[0]], label=rname)

plt.legend()
plt.show()
plt.close()