In [1]:
import xarray as xr 
import numpy as np 
import pandas as pd

In [2]:
times = pd.DatetimeIndex(start='2000-01-01', end='2005-12-31', freq='1M')

In [3]:
times

DatetimeIndex(['2000-01-31', '2000-02-29', '2000-03-31', '2000-04-30',
               '2000-05-31', '2000-06-30', '2000-07-31', '2000-08-31',
               '2000-09-30', '2000-10-31', '2000-11-30', '2000-12-31',
               '2001-01-31', '2001-02-28', '2001-03-31', '2001-04-30',
               '2001-05-31', '2001-06-30', '2001-07-31', '2001-08-31',
               '2001-09-30', '2001-10-31', '2001-11-30', '2001-12-31',
               '2002-01-31', '2002-02-28', '2002-03-31', '2002-04-30',
               '2002-05-31', '2002-06-30', '2002-07-31', '2002-08-31',
               '2002-09-30', '2002-10-31', '2002-11-30', '2002-12-31',
               '2003-01-31', '2003-02-28', '2003-03-31', '2003-04-30',
               '2003-05-31', '2003-06-30', '2003-07-31', '2003-08-31',
               '2003-09-30', '2003-10-31', '2003-11-30', '2003-12-31',
               '2004-01-31', '2004-02-29', '2004-03-31', '2004-04-30',
               '2004-05-31', '2004-06-30', '2004-07-31', '2004-08-31',
      

In [4]:
len(times)

72

In [5]:
temp = xr.DataArray(np.random.randn(72, 180, 360), coords={'time': times, 'lat': np.arange(180)-90, 'lon': np.arange(360)-180}, dims=('time', 'lat', 'lon'), name='T')
temp

<xarray.DataArray 'T' (time: 72, lat: 180, lon: 360)>
array([[[-0.336891, -2.530147, ..., -0.08898 ,  2.33301 ],
        [-2.013169, -0.314178, ...,  1.14589 , -0.54565 ],
        ...,
        [ 0.50125 , -0.838776, ...,  1.717061, -0.379004],
        [ 0.840366,  0.201914, ..., -0.836495, -0.079808]],

       [[-1.123021, -0.5934  , ...,  0.455241, -1.142908],
        [-1.133499, -0.165356, ...,  0.178867,  1.570098],
        ...,
        [-0.580287, -0.217049, ..., -1.367889,  0.348764],
        [-1.098016,  1.74528 , ...,  0.810879,  1.187659]],

       ...,

       [[ 0.153952, -0.566678, ..., -0.205883, -0.063712],
        [ 0.249094,  0.493843, ...,  0.749748,  0.332812],
        ...,
        [ 0.105701,  0.142899, ...,  0.880594, -0.515978],
        [-1.055244,  0.180521, ...,  0.04043 , -0.820333]],

       [[-0.39891 ,  0.335855, ..., -1.215697, -1.016974],
        [-0.772851,  0.233387, ..., -0.351151, -0.969042],
        ...,
        [ 1.239201,  2.056613, ..., -0.327199, -1

In [6]:
sst = xr.DataArray(np.random.randn(72, 180, 360), coords={'time': times, 'lat': np.arange(180)-90, 'lon': np.arange(360)-180}, dims=('time', 'lat', 'lon'), name='SST')

In [7]:
xr.Dataset({'T': temp, 'SST': sst})

<xarray.Dataset>
Dimensions:  (lat: 180, lon: 360, time: 72)
Coordinates:
  * time     (time) datetime64[ns] 2000-01-31 2000-02-29 ... 2005-12-31
  * lat      (lat) int64 -90 -89 -88 -87 -86 -85 -84 ... 83 84 85 86 87 88 89
  * lon      (lon) int64 -180 -179 -178 -177 -176 -175 ... 175 176 177 178 179
Data variables:
    T        (time, lat, lon) float64 -0.3369 -2.53 -0.8213 ... 0.06546 -0.4847
    SST      (time, lat, lon) float64 0.3463 -0.4228 0.4959 ... 1.073 -0.5717

In [8]:
!mkdir -p data

In [9]:
def create_data_array(time, lat, lon, name):
    data_array = xr.DataArray(np.random.randn(len([time]), len(lat), len(lon)), 
                      coords={'time': [time], 'lat': lat, 'lon': lon},
                      dims=('time', 'lat', 'lon'),
                      name=name)
    return data_array 

def generate_fake_data(time, suffix):
    lat = np.linspace(start=-90, stop=90, num=180, dtype='int')
    lon = np.linspace(start=-180, stop=180, num=360, dtype='int')
    sst = create_data_array(time, lat, lon, name='sst')
    prec = create_data_array(time, lat, lon, name='prec')
    pressure = create_data_array(time, lat, lon, name='pressure')
    meta = xr.DataArray(np.random.randn(len(lat), len(lon)), 
                        coords={'lat': lat, 'lon': lon}, 
                        dims=('lat', 'lon'),
                        name='meta_var')
    dset = xr.Dataset({'sst': sst, 'pressure': pressure, 'prec': prec, 'meta_var': meta})
    path = f'data/tslice{str(suffix)}.nc'
    # print(dset)
    dset.to_netcdf(path, engine='netcdf4', mode='w')

In [10]:
times = pd.DatetimeIndex(start='2000-01-01', freq='1M', periods=24)
times

DatetimeIndex(['2000-01-31', '2000-02-29', '2000-03-31', '2000-04-30',
               '2000-05-31', '2000-06-30', '2000-07-31', '2000-08-31',
               '2000-09-30', '2000-10-31', '2000-11-30', '2000-12-31',
               '2001-01-31', '2001-02-28', '2001-03-31', '2001-04-30',
               '2001-05-31', '2001-06-30', '2001-07-31', '2001-08-31',
               '2001-09-30', '2001-10-31', '2001-11-30', '2001-12-31'],
              dtype='datetime64[ns]', freq='M')

In [11]:
for index, time in enumerate(times):
    generate_fake_data(time, index)

In [12]:
!ncdump -h data/tslice0.nc

netcdf tslice0 {
dimensions:
	time = 1 ;
	lat = 180 ;
	lon = 360 ;
variables:
	int64 time(time) ;
		time:units = "days since 2000-01-31 00:00:00" ;
		time:calendar = "proleptic_gregorian" ;
	int64 lat(lat) ;
	int64 lon(lon) ;
	double sst(time, lat, lon) ;
		sst:_FillValue = NaN ;
	double pressure(time, lat, lon) ;
		pressure:_FillValue = NaN ;
	double prec(time, lat, lon) ;
		prec:_FillValue = NaN ;
	double meta_var(lat, lon) ;
		meta_var:_FillValue = NaN ;
}


In [13]:
!du -s -h data/ 

 48M	data/


In [14]:
xr.open_mfdataset('data/*.nc')

<xarray.Dataset>
Dimensions:   (lat: 180, lon: 360, time: 24)
Coordinates:
  * lat       (lat) int64 -90 -88 -87 -86 -85 -84 -83 ... 83 84 85 86 87 88 90
  * lon       (lon) int64 -180 -178 -177 -176 -175 -174 ... 175 176 177 178 180
  * time      (time) datetime64[ns] 2000-01-31 2000-02-29 ... 2000-10-31
Data variables:
    sst       (time, lat, lon) float64 dask.array<shape=(24, 180, 360), chunksize=(1, 180, 360)>
    pressure  (time, lat, lon) float64 dask.array<shape=(24, 180, 360), chunksize=(1, 180, 360)>
    prec      (time, lat, lon) float64 dask.array<shape=(24, 180, 360), chunksize=(1, 180, 360)>
    meta_var  (time, lat, lon) float64 dask.array<shape=(24, 180, 360), chunksize=(1, 180, 360)>

In [15]:
%load_ext version_information
%version_information dask, numpy, xarray, netcdf4

Software,Version
Python,3.6.6 64bit [GCC 4.2.1 Compatible Apple LLVM 6.1.0 (clang-602.0.53)]
IPython,7.0.1
OS,Darwin 17.7.0 x86_64 i386 64bit
dask,0.19.4
numpy,1.15.1
xarray,0.10.9
netcdf4,1.4.1
Thu Oct 18 11:43:02 2018 MDT,Thu Oct 18 11:43:02 2018 MDT
