# Multi-dimensional data access with Xarray

![](http://xarray.pydata.org/en/stable/_images/dataset-diagram.png)

Henrik Andersson, 2022-05-19

In [None]:
import xarray
xarray.set_options(display_expand_data=False, display_expand_attrs=False)

In [None]:
ds = xarray.open_dataset("data/gefs.nc")
ds

In [None]:
prmsl = ds.prmslmsl
prmsl

In [None]:
X = ds.tmp2m.to_numpy()

In [None]:
X.shape

In [None]:
# axes:
# 0 = ens
# 1 = time
# 2 = lat
# 3 = lon
X_mean = X.mean(axis=0) # axis == ens

In [None]:
# Ok we got rid of axis 0
# new axes:
# 0 = time
# 1 = lat
# 2 = lon
X2_mean = X_mean.mean(axis=0)

In [None]:
import matplotlib.pyplot as plt

plt.pcolormesh(X2_mean)
plt.title("Time averaged - ensemble average")
cbar = plt.colorbar()
cbar.set_label("2m temperature (K)")

In [None]:
plt.pcolormesh(ds.lon, ds.lat, X2_mean,shading='auto')
plt.title("Time averaged - ensemble average")
cbar = plt.colorbar()
cbar.set_label("2m temperature (K)")
plt.xlabel("Longitude")
plt.ylabel("Latitude")

In [None]:
ds.tmp2m.mean(dim="ens").mean(dim="time").plot()

In [None]:
ds.tmp2m.sel(time="2022-05-22 12:00").sel(ens=1).plot()

In [None]:
example = ds.tmp2m.sel(time="2022-05-22 12:00").sel(ens=1)

In [None]:
anomaly = (example - example.mean())

In [None]:
anomaly.plot()

In [None]:
ds.tmp2m.sel(time="2022-05-31 12:00").plot(x="lon", y="lat", col="ens", col_wrap=5)

In [None]:
prmsl_cph = prmsl.interp(lat=55.67,lon=12.56);

In [None]:
prmsl_cph.sel(ens=1).plot()

In [None]:
prmsl_cph.plot.line(x='time', add_legend=False);

In [None]:
prmsl_cph.quantile(q=[0.1,0.5,0.9], dim="ens").plot.line(x="time");

In [None]:
(ds.prmslmsl
   .interp(lat=55.67,lon=12.56) # Copenhagen
   .quantile(q=[0.1,0.5,0.9], dim="ens")
   .plot.line(x="time"));

In [None]:
prmsl_cph.min(dim="ens").to_dataframe()

In [None]:
prmsl_cph.min(dim="ens").to_dataframe().to_csv("pressure_cph.csv")

In [None]:
prmsl.mean(dim="time")

In [None]:
filename = "time_averaged_ensemble_mslp.nc"
prmsl.mean(dim="time").to_netcdf(filename)

In [None]:
ds["prmslmls_avg"] = prmsl.mean(dim="time")
ds["prmsl_cph"] = prmsl_cph

In [None]:
ds

In [None]:
ds.to_netcdf("all_my_data_in_one_file.nc")