In [1]:
import os
import xarray as xr
import time
import pandas as pd
import numpy as np
from google.cloud import storage
import netCDF4

In [2]:
client = storage.Client()

In [3]:
BUCKET = 'solar-pv-nowcasting-data'
SATELLITE_DATA_PATH = 'satellite/EUMETSAT/SEVIRI_RSS/reprojected/just_UK/2019/06/03/'

In [4]:
blobs = client.list_blobs(BUCKET, prefix=SATELLITE_DATA_PATH)
blobs = list(blobs)
print(f'{len(blobs)} netcdf files found.')

18 netcdf files found.


In [5]:
def open_netcdf_from_gcp(blob):
    # Adapted from https://github.com/pydata/xarray/issues/1075#issuecomment-373541528
    netcdf_bytes = blob.download_as_bytes()
    nc4_ds = netCDF4.Dataset('EUMETSAT', memory=netcdf_bytes)
    store = xr.backends.NetCDF4DataStore(nc4_ds)
    return xr.open_dataset(store, engine='netcdf4')

In [6]:
def load_all():
    for blob in blobs:
        ds = open_netcdf_from_gcp(blob)
        ds.load()

In [7]:
def load_rectange():
    for blob in blobs:
        ds = open_netcdf_from_gcp(blob)['HRV']
        # Dimensions are time, x, y
        ds_rect = ds[:, 128:256, 128:256]
        ds_rect.load()

In [8]:
NUM_RUNS = 3
col_to_func = {'all': load_all, 'rectangle': load_rectange}
runtimes = pd.DataFrame(np.NaN, columns=col_to_func.keys(), index=range(NUM_RUNS))

In [9]:
for run_i in range(NUM_RUNS):
    print(run_i)
    for col, func in col_to_func.items():
        print(col)
        t0 = time.time()
        func()
        t1 = time.time()
        runtime = t1 - t0
        runtimes[col][run_i] = runtime

0
all
rectangle
1
all
rectangle
2
all
rectangle


In [10]:
runtimes

Unnamed: 0,all,rectangle
0,14.519056,7.030767
1,11.673636,6.619871
2,10.836033,6.894945
