In [4]:
from zarrtraj import *
from MDAnalysisTests.datafiles import PSF, DCD
import fsspec
import s3fs
import os
import time
import MDAnalysis as mda
import zarr
z = zarr.open_group("hdd-test.zarrtraj", mode='w')

u = mda.Universe(PSF, DCD)
start = time.time()
with mda.Writer(z, u.trajectory.n_atoms, format='ZARRTRAJ') as w:
    for ts in u.trajectory:
        w.write(u.atoms)
stop = time.time()
print(f"Writing time is {stop-start} seconds")

  warn("ignoring keyword argument %r" % k)


Writing time is 0.38862156867980957 seconds


In [20]:
# TRY WRITING USING DASK- MUCH FASTER

from zarr.storage import LRUStoreCache
import dask.array as da


store1 = zarr.DirectoryStore("hdd-test.zarrtraj")
local = zarr.open_group(store=store1, mode='r')

key = os.getenv('AWS_KEY')
secret = os.getenv('AWS_SECRET_KEY')
storage_options = {
    'key': key,
    'secret': secret
}
s3 = s3fs.S3FileSystem(key=key, secret=secret)
store2 = s3fs.S3Map(root='zarrtraj-test/s3-test-copying.zarrtraj', s3=s3, check=False)
r = zarr.open_group(store=store2, mode='a')
p = r.create_group('particles')
t = p.create_group('trajectory')
pos = t.create_group('position')
v = pos.require_dataset('value', shape=local['particles']['trajectory']['position']['value'].shape)

dask_array = da.from_zarr(local['particles']['trajectory']['position']['value'])
dask_array.to_zarr(v, overwrite=True)


In [None]:
z = zarr.open_group("ssd-test.zarrtraj", mode='r')

u = mda.Universe(PSF, DCD)
start = time.time()
num = 0
for ts in u.trajectory:
    print(ts[0])
    num += ts[0][0]
stop = time.time()
print(f"Reading time is {stop-start} seconds")

In [None]:
import zarrtraj
import zarr
import MDAnalysis as mda
import MDAnalysisData

yiip = MDAnalysisData.yiip_equilibrium.fetch_yiip_equilibrium_short()
# NOTE: change this to five before doing true benchmark test
u = mda.Universe(yiip.topology, yiip.trajectory)

out = zarr.open_group('yiip.zarrtraj', mode='w')

with mda.Writer(out, u.trajectory.n_atoms, format='ZARRTRAJ') as w:
    for ts in u.trajectory:
        w.write(u.atoms)

In [3]:
# TRY WRITING TO CACHE FIRST AND THEN S3- FAILS, NOT FASTER

import s3fs
import zarr
from zarr.storage import LRUStoreCache
import zarrtraj
import MDAnalysis as mda
import os
import time
from MDAnalysisTests.datafiles import PSF, DCD
import MDAnalysisData

storage_options = {
    'key': key,
    'secret': secret
}
yiip = MDAnalysisData.yiip_equilibrium.fetch_yiip_equilibrium_short()
# NOTE: change this to five before doing true benchmark test

key = os.getenv('AWS_KEY')
secret = os.getenv('AWS_SECRET_KEY')
s3 = s3fs.S3FileSystem(key=key, secret=secret)
store = s3fs.S3Map(root='zarrtraj-test-data/s3-test-cacheing.zarrtraj', s3=s3, check=False)
cache = LRUStoreCache(store, max_size=2**25)
root = zarr.group(store=cache)

u = mda.Universe(yiip.topology, yiip.trajectory)

start = time.time()
with mda.Writer(root, frames=u.trajectory.n_frames,n_atoms=u.trajectory.n_atoms, format='ZARRTRAJ', chunks=(10, u.trajectory.n_atoms, 3)) as w:
    for ts in u.trajectory:
        w.write(u.atoms)
stop = time.time()
print(stop-start)



TypeError: shape do not match existing array; expected (1, 3, 3), got (0, 3, 3)

In [12]:
# TRY READING INTO CACHE FIRST 
import logging
from zarr.storage import LRUStoreCache
#logging.basicConfig(level=logging.DEBUG)

key = os.getenv('AWS_KEY')
secret = os.getenv('AWS_SECRET_KEY')
storage_options = {
    'key': key,
    'secret': secret
}

# aprox 2^19 bytes (0.5mb) needed for cache

s3 = s3fs.S3FileSystem(key=key, secret=secret)
store = s3fs.S3Map(root='zarrtraj-test/s3-test.zarrtraj', s3=s3, check=False)
cache = LRUStoreCache(store, max_size=2**19)
root = zarr.group(store=cache)


u = mda.Universe(PSF, root)

"""
num = 0
start = time.time()
for ts in u.trajectory:
    num += ts[0][0]
stop = time.time()
print(stop-start)
"""
start = time.time()
u.trajectory[[1, 11, 97, 43, 61]]
stop = time.time()

In [None]:
# TEST ACCESS SPEED FOR SAMPLE ZARR DATA

import s3fs
import zarr
s3 = s3fs.S3FileSystem(anon=True, client_kwargs=dict(region_name='eu-west-2'))
store = s3fs.S3Map(root='zarr-demo/store', s3=s3, check=False)
root = zarr.group(store=store)
for item in root.items():
    print(item)

In [None]:
# BASIC DASK TEST

import s3fs
import zarr
import os
import time
import dask.array as da


key = os.getenv('AWS_KEY')
secret = os.getenv('AWS_SECRET_KEY')
s3 = s3fs.S3FileSystem(key=key, secret=secret)
store = s3fs.S3Map(root='zarrtraj-test/s3-test.zarrtraj/particles/trajectory/position/value', s3=s3, check=False)
dask_root = da.from_zarr(store)
start = time.time()
result = dask_root.mean(axis=0)
computed_result = result.compute()
stop = time.time()
print(stop-start)
print(computed_result)



In [3]:
import s3fs
import zarr
import os
import time
from MDAnalysisTests.datafiles import PSF, DCD
import logging
#logging.basicConfig(level=logging.DEBUG)

key = os.getenv('AWS_KEY')
secret = os.getenv('AWS_SECRET_KEY')
storage_options = {
    'key': key,
    'secret': secret
}
s3 = s3fs.S3FileSystem(key=key, secret=secret)
store = s3fs.S3Map(root='zarrtraj-test-data/s3-test.zarrtraj', s3=s3, check=False)
root = zarr.group(store=store)



In [None]:
import dask.array as da

pos = da.from_zarr('s3://zarrtraj-test/s3-test.zarrtraj')['position']
vel = da.from_zarr('s3://zarrtraj-test/s3-test.zarrtraj')['velocity']
force = da.from_zarr('s3://zarrtraj-test/s3-test.zarrtraj')['force']

u = mda.Universe(PSF, (pos, vel, force))
u.trajectory[1, 5, 10] # not actually loaded into memory until .compute() or .to_zarr() is called somewhere

In [1]:
import s3fs
import zarr
s3 = s3fs.S3FileSystem(anon=True, client_kwargs=dict(region_name='eu-west-2'))
store = s3fs.S3Map(root='zarr-demo/store', s3=s3, check=False)
root = zarr.group(store=store)

print(type(store))
print(type(root.))

zarr.storage.FSStore

<class 'fsspec.mapping.FSMap'>
<class 'zarr.storage.FSStore'>


In [2]:
out = zarr.open_group('y.zarrtraj', mode='w')

In [3]:
z = zarr.open_group('y.zarrtraj', mode='r')
print(len(z))

0


In [4]:
# TEST- New buffered writer

from zarrtraj import *
from MDAnalysisTests.datafiles import PSF, DCD
import fsspec
import s3fs
import os
import time
import MDAnalysis as mda
import zarr
key = os.getenv('AWS_KEY')
secret = os.getenv('AWS_SECRET_KEY')
storage_options = {
    'key': key,
    'secret': secret
}
s3 = s3fs.S3FileSystem(key=key, secret=secret)
store = s3fs.S3Map(root='zarrtraj-test-data/s3-cached-write-test.zarrtraj', s3=s3, check=False)
z = zarr.group(store=store)

u = mda.Universe(PSF, DCD)
start = time.time()
with mda.Writer(z, u.trajectory.n_atoms, n_frames=u.trajectory.n_frames, format='ZARRTRAJ') as w:
    for ts in u.trajectory:
        w.write(u.atoms)
stop = time.time()
print(f"Writing time is {stop-start} seconds")




TypeError: object of type 'bool' has no len()

In [1]:
%env AWS_SECRET_KEY=bjNkAaChXbSUiN/sf//AqO3NOoGQeTj7Svo0qgQv
%env AWS_KEY=AKIA6RJXOAIBRK4FNSWI

env: AWS_SECRET_KEY=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
env: AWS_KEY=AKIA6RJXOAIBRK4FNSWI
