In [1]:
import dcachefs
import gcsfs
import getpass
import os
import tqdm

# Copying tiles from GCS to dCache storage

In order to access a Google Cloud Storage (GCS) bucket via [GCSFS](https://gcsfs.readthedocs.io) ([Filesystem Spec](https://filesystem-spec.readthedocs.io) compatible file system module for GCS), one needs to set up authentication credentials:
* Make sure to have sufficient "Bucket" and "Object" permissions (being "Bucket Owner" is not sufficient to download the data); 
* Download and uncompress the [*Google Cloud SDK*](https://cloud.google.com/sdk/docs/quickstart) tarball;
* Run `./google-cloud-sdk/bin/gcloud init --no-launch-browser` and provide authentication credentials and default project information (a new one can be created)
* Install `gcsfs`  using `pip`: 
```shell
pip install gcsfs
```

In [2]:
google_account = getpass.getpass()

 ·················


In [46]:
# read authentication credentials created by `gcloud`
gcs_fs = gcsfs.GCSFileSystem(
    token=f"/Users/fnattino/.config/gcloud/legacy_credentials/{google_account}@gmail.com/adc.json"
)

In [4]:
# get bucket tile list
tiles = gcs_fs.glob(
    "gs://ee-data_export/S2_composite_2019-11-1_2020-3-1_tile_*.tif"
)
tiles[:10]

['ee-data_export/S2_composite_2019-11-1_2020-3-1_tile_0.tif',
 'ee-data_export/S2_composite_2019-11-1_2020-3-1_tile_1.tif',
 'ee-data_export/S2_composite_2019-11-1_2020-3-1_tile_10.tif',
 'ee-data_export/S2_composite_2019-11-1_2020-3-1_tile_100.tif',
 'ee-data_export/S2_composite_2019-11-1_2020-3-1_tile_101.tif',
 'ee-data_export/S2_composite_2019-11-1_2020-3-1_tile_102.tif',
 'ee-data_export/S2_composite_2019-11-1_2020-3-1_tile_103.tif',
 'ee-data_export/S2_composite_2019-11-1_2020-3-1_tile_104.tif',
 'ee-data_export/S2_composite_2019-11-1_2020-3-1_tile_105.tif',
 'ee-data_export/S2_composite_2019-11-1_2020-3-1_tile_106.tif']

In [5]:
len(tiles)

341

In [6]:
# configure access to dCache file system
dcache_fs = dcachefs.dCacheFileSystem(
    username=getpass.getpass("username"),
    password=getpass.getpass("password"),
    api_url="https://dcacheview.grid.surfsara.nl:22880/api/v1/", 
    webdav_url="https://webdav.grid.surfsara.nl:2880"
)

username ·················
password ················


In [45]:
for tile in tqdm.tqdm(tiles):
    
    _, filename = os.path.split(tile)
    
    source_uri = f"gs://{tile}"
    destination_uri = f"/pnfs/grid.sara.nl/data/iceshelf/disk/S2_composite_2019-11-1_2020-3-1/{filename}"

    already_exists = dcache_fs.exists(destination_uri)
    same_size = False if not already_exists else gcs_fs.size(source_uri) == dcache_fs.size(destination_uri)

    # download missing/incomplete tiles
    if not already_exists or not same_size:
        with gcs_fs.open(source_uri) as f_read:
            with dcache_fs.open(destination_uri, "wb", block_size=0, timeout=900) as f:  # open file in stream mode
                f.write(f_read)

100%|██████████| 341/341 [5:51:21<00:00, 61.82s/it]   
