In [4]:
import xarray as xr
import pandas as pd

In [2]:
def to_anomaly(data, rolling = 11):
    """
    If no previously trained model is available, or if the available save file is corrupted, a new model needs to be
    trained.
    :param train_dat: Data needed for training the model
    :return: A trained model
    """

    grouped_data = data.groupby('time.month')

    anomaly_per_month = {}
    monthly_mean_dict = {}
    for month, data in grouped_data:
        df = data.to_dataframe()
        monthly_rolling_mean = {}
        for region, eco_dat in df.groupby('eco_regions'):
            eco_dat = eco_dat.reset_index(level='eco_regions').drop(['eco_regions'], axis=1)
            monthly_rolling_mean[region] = eco_dat.rolling(str(rolling*365.25)+'D', min_periods=1).mean()


        monthly_mean_ds = xr.concat([df.to_xarray() for df in monthly_rolling_mean.values()], dim=pd.Index(data=monthly_rolling_mean.keys(), name="eco_regions"))
        monthly_mean_dict[month] = monthly_mean_ds
        anomaly_per_month[month]=data-monthly_mean_ds
    monthly_anomaly = xr.concat(anomaly_per_month.values(), dim='time').sortby('time')
    return monthly_anomaly

In [5]:
with xr.open_dataset('../data_files/vars_per_eco_update.nc') as ds:
    pred_dat = ds.drop_vars(['regions', 'grid_cell_weight', 'grid_cell_area', 'TER', 'monthly_flux', 'smoothed_flux', 'opt_flux',
                    'prior_flux_per_s', 'scaling_factor', 'sf_per_eco', 'monthly_sf', 'smoothed_sf', 'eco_area'])
    remaining_vars = ds[['regions', 'grid_cell_weight', 'grid_cell_area', 'TER', 'monthly_flux', 'smoothed_flux', 'opt_flux',
                    'prior_flux_per_s', 'scaling_factor', 'sf_per_eco', 'monthly_sf', 'smoothed_sf', 'eco_area']]
print(pred_dat.data_vars)
anomaly_data = to_anomaly(pred_dat[list(pred_dat.data_vars)[:37]]) # , mean_data

print(anomaly_data)

Data variables:
    g10m_MAX   (eco_regions, time) float64 ...
    swvl1_MIN  (eco_regions, time) float64 ...
    swvl1_MAX  (eco_regions, time) float64 ...
    slhf_MIN   (eco_regions, time) float64 ...
    slhf_MAX   (eco_regions, time) float64 ...
    slhf_AVG   (eco_regions, time) float64 ...
    sshf_MIN   (eco_regions, time) float64 ...
    sshf_MAX   (eco_regions, time) float64 ...
    sshf_AVG   (eco_regions, time) float64 ...
    ssrd_MIN   (eco_regions, time) float64 ...
    ssrd_MAX   (eco_regions, time) float64 ...
    ssrd_AVG   (eco_regions, time) float64 ...
    u10m_AVG   (eco_regions, time) float64 ...
    v10m_AVG   (eco_regions, time) float64 ...
    blh_MAX    (eco_regions, time) float64 ...
    d2m_MIN    (eco_regions, time) float64 ...
    d2m_MAX    (eco_regions, time) float64 ...
    d2m_AVG    (eco_regions, time) float64 ...
    lsp_SUM    (eco_regions, time) float64 ...
    skt_MIN    (eco_regions, time) float64 ...
    skt_MAX    (eco_regions, time) float64 .

In [None]:
new_var_ds = xr.merge([remaining_vars, anomaly_data])

file_name= '../data_files/vars_per_eco_monthly_anomaly.nc'
with open(file_name, 'wb') as out:
    new_var_ds.to_netcdf(out)