## 1. Scrap API with fsspec, s3 & zarr stores.

We are searching for the group EFM.

Variables: fwtmp, magpr_ang, magpr_len, magpr_r, magpr_z, magpr_x, magpr_c


In [5]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import xarray as xr
import fsspec
import zarr

from pathlib import Path

from joblib import Parallel, delayed

In [6]:
class ShotLoader:
    def __init__(self):
        self.endpoint_url = "https://s3.echo.stfc.ac.uk"
        self.url = "s3://mast/level1/shots/{shot}.zarr"

        self.fs = fsspec.filesystem(
            **dict(
                protocol="filecache",
                target_protocol="s3",
                cache_storage=".cache",
                target_options=dict(anon=True, endpoint_url=self.endpoint_url),
            )
        )

    def load(self, shot: int, group: str) -> xr.Dataset:
        url = self.url.format(shot=shot)
        store = zarr.storage.FSStore(fs=self.fs, url=url)
        dataset = xr.open_zarr(store, group=group)
        return dataset

In [7]:
def get_remote_store(path: str, endpoint_url: str):
    fs = fsspec.filesystem(
        **dict(
            protocol="simplecache",
            target_protocol="s3",
            cache_storage=".cache",
            target_options=dict(anon=True, endpoint_url=endpoint_url),
        )
    )
    return fs.get_mapper(path)

In [None]:
def process_shot(shot_id: int, output_dir: str):
    store = get_remote_store(
        f"s3://mast/level1/shots/{shot_id}.zarr",
        endpoint_url="https://s3.echo.stfc.ac.uk",
    )
    efm = xr.open_zarr(store, group="efm")
    print(efm)
    print(efm.data_vars)


    fwtmp = efm.fwtmp
    magpr_r = efm.magpr_r
    magpr_z = efm.magpr_z
    magpr_x = efm.magpr_x
    magpr_c = efm.magpr_c
    magpr_len = efm.magpr_len
    magpr_ang = efm.magpr_ang

    summary = xr.open_zarr(store, group="summary")
    ip = summary.ip

    ds = xr.Dataset(
        dict(
            ip=ip,
            fwtmp=fwtmp,
            magpr_r=magpr_r,
            magpr_z=magpr_z,
            magpr_x=magpr_x,
            magpr_c=magpr_c,
            magpr_len=magpr_len,
            magpr_ang=magpr_ang
        )
    )

    file_name = Path(output_dir) / f"{shot_id}.parquet"
    frame = ds.to_dataframe()
    frame.to_parquet(file_name)
    return shot_id

In [9]:
shots_df = pd.read_parquet(f'https://mastapp.site/parquet/level2/shots')
shot_list = shots_df['shot_id'].tolist()
shot_list = shot_list[0:10]
len(shot_list)

10

In [10]:
output_dir = Path().absolute().parent / "result_files/efit_analysis/efit_parquet"
output_dir.mkdir(exist_ok=True, parents=True)
# tasks = (delayed(process_shot)(shot_id, output_dir) for shot_id in shot_list)
# pool = Parallel(n_jobs=2, verbose=10)
# pool(tasks)
process_shot(shot_list[0], output_dir)

  return cls(


<xarray.Dataset> Size: 2MB
Dimensions:            (time: 50, normalizedpsi: 65, n_iterations: 250,
                        fcoil_seg_n: 1004, fcoil_n: 167, ff_knot_points: 36,
                        ffp_knot_points: 36, ffprime_coefs_n: 2, r: 65, z: 65,
                        kff_knot_points: 36, kpp_knot_points: 36,
                        kww_knot_points: 36, lcfs_coords: 74, limiter_n: 37,
                        mag_probe_n: 78, height: 65, radius: 65,
                        pp_knot_points: 36, pprime_coefs_n: 2, n_rot_coefs: 2,
                        psi_loop_n: 46, ww_knot_points: 36)
Coordinates: (12/14)
  * fcoil_n            (fcoil_n) float32 668B 0.0 1.0 2.0 ... 164.0 165.0 166.0
  * ffprime_coefs_n    (ffprime_coefs_n) float32 8B 0.0 1.0
  * height             (height) float32 260B -2.0 -1.938 -1.875 ... 1.938 2.0
  * lcfs_coords        (lcfs_coords) float32 296B 0.0 1.0 2.0 ... 71.0 72.0 73.0
  * mag_probe_n        (mag_probe_n) float32 312B 0.0 1.0 2.0 ... 75.0 76.0 77

AttributeError: 'Dataset' object has no attribute 'fwtmp'

In [26]:
import torch
import torch.nn as nn

x_rand = torch.rand((4, 100, 1))  # (batch_size, seq_len, input_dim)

x_conved = nn.Conv1d(1, 4, kernel_size=(5,), stride=(2,), padding=(2,))(x_rand.permute(0, 2, 1)).permute(0, 2, 1)
print(x_conved.shape)

torch.Size([4, 50, 4])
