Benchmarking RMSF analysis speed using cloud & disk reading

Setup: 

1. Download the 90ns YiiP trajectory to the local filesystem
2. Write it into the zarrtraj format on disk
3. Upload the zarrtraj-formatted trajectory to an accessible AWS S3 bucket

In [5]:
import zarrtraj
import MDAnalysis as mda
import MDAnalysisData
import zarr

# 1
yiip = MDAnalysisData.yiip_equilibrium.fetch_yiip_equilibrium_long(data_home='/nfs/homes3/ljwoods2/workspace/zarrtraj/notebooks/notebook_data_tmp')
# 2
u = mda.Universe(yiip.topology, yiip.trajectory)
z = zarr.open_group("yiip.zarrtraj", mode = 'w')
with mda.Writer(z, u.atoms.n_atoms,
                    n_frames=u.trajectory.n_frames,
                    # Select _ frames 
                    chunks=(10, u.trajectory.n_atoms, 3),
                    format='ZARRTRAJ') as W:
    for ts in u.trajectory:
        W.write(u.atoms)



(9001,)
(9002,)
[0 0 0 ... 0 0 0]
(9002,)
(9003,)
[500000      0      0 ...      0      0      0]
(9003,)
(9004,)
[500000 505000      0 ...      0      0      0]
(9004,)
(9005,)
[500000 505000 510000 ...      0      0      0]
(9005,)
(9006,)
[500000 505000 510000 ...      0      0      0]
(9006,)
(9007,)
[500000 505000 510000 ...      0      0      0]
(9007,)
(9008,)
[500000 505000 510000 ...      0      0      0]
(9008,)
(9009,)
[500000 505000 510000 ...      0      0      0]
(9009,)
(9010,)
[500000 505000 510000 ...      0      0      0]
(9010,)
(9011,)
[500000 505000 510000 ...      0      0      0]
(9011,)
(9012,)
[500000 505000 510000 ...      0      0      0]
(9012,)
(9013,)
[500000 505000 510000 ...      0      0      0]
(9013,)
(9014,)
[500000 505000 510000 ...      0      0      0]
(9014,)
(9015,)
[500000 505000 510000 ...      0      0      0]
(9015,)
(9016,)
[500000 505000 510000 ...      0      0      0]
(9016,)
(9017,)
[500000 505000 510000 ...      0      0      0]
(9017,

In [None]:
import os 
import s3fs

# 3
# Must set the environmental variable using Jupyter's
# "%env KEY=VALUE"
# before running

AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
BUCKET_NAME = os.getenv("BUCKET_NAME")

source = zarr.open_group("yiip.zarrtraj", mode='r')

s3_fs = s3fs.S3FileSystem(
    # anon must be false to allow authentication
    anon=False,
    client_kwargs=dict(
        region_name='us-east-1',
    )
)

cloud_store = s3fs.S3Map(
    root=f'{BUCKET_NAME}/yiip.zarrtraj',
    s3=s3_fs,
    check=False
)

zarr.convenience.copy_store(source.store, cloud_store,
                            if_exists='replace')