In [21]:
import xarray as xr
import pandas as pd
import numpy as np

In [22]:
years = np.array([2020, 2021])        # 2 years
months = np.array([1, 2, 3])          # 3 months
days = np.array([1, 2, 3, 4])         # 4 days

# Essentially simulate that there exists unique permnos for 
# non-unique permcos since each permco (company) can have 
# multiple permnos (stocks).
permnos       = [1, 2, 3, 4]
permco_values = [1, 1, 2, 2]

Y = years.shape[0]
M = months.shape[0]
D = days.shape[0]
A = len(permnos)

me_values = np.random.randint(low=0, high=(Y * M * D * A), size=(Y, M, D, A))

In [23]:
me_values.shape

(2, 3, 4, 4)

In [24]:
ds = xr.Dataset(
    data_vars={
        "me": (("year", "month", "day", "permno"), me_values)
    },
    coords={
        "year": years,
        "month": months,
        "day": days,
        "permno": permnos,
        "permco": ("permno", permco_values)
    },
    attrs={"my_custom_dataset_attr": "structure is original."}
)

In [28]:
summe = ds.me.groupby(ds.permco).sum(dim="permno")  # This gives a DataArray with dims (year, month, day, permco)
maxme = ds.me.groupby(ds.permco).max(dim="permno")  # This gives a DataArray with dims (year, month, day, permco)

maxme_broadcast = maxme.sel(permco=ds.permco)  # This will align maxme with the original permno dimension
is_max_permno = (ds.me == maxme_broadcast) 
summe_broadcast = summe.sel(permco=ds.permco)  # This aligns summe with the permno dimension

new_me = xr.where(is_max_permno, summe_broadcast, ds.me)

ds_updated = ds.copy()
ds_updated["me"] = new_me



In [32]:
ds