---
title: JUNO Magnetic field data pipeline
---

JUNO Magnetic field data can be downloaded from [PDS](https://pds-ppi.igpp.ucla.edu/mission/JUNO/JNO/FGM) website.

In [2]:
%load_ext autoreload
%autoreload 2

In [11]:
# | default_exp missions/juno/fgm
# | hide
#| export
import pooch
import polars as pl

from tqdm import tqdm
from pipe import filter

from typing import Literal, Callable
from functools import partial

```txt
File Naming Convention                                                        
==============================================================================
Convention:                                                                   
   fgm_jno_LL_CCYYDDDxx_vVV.ext                                               
Where:                                                                        
   fgm - Fluxgate Magnetometer three character instrument abbreviation        
   jno - Juno                                                                 
    LL - CODMAC Data level, for example, l3 for level 3                       
    CC - The century portion of a date, 20                                    
    YY - The year of century portion of a date, 00-99                         
   DDD - The day of year, 001-366                                             
    xx - Coordinate system of data (se = Solar equatorial, ser = Solar        
         equatorial resampled, pc = Planetocentric, ss = Sun-State,           
         pl = Payload)                                                        
     v - separator to denote Version number                                   
    VV - version                                                              
   ext - file extension (sts = Standard Time Series (ASCII) file, lbl = Label 
         file)                                                                
Example:                                                                      
   fgm_jno_l3_2014055se_v00.sts    
```

## Downloading data 

In [4]:
#| code-summary: type definitions
#| export
JunoPhases = Literal["CRUISE", "JUPITER"]
JunoFGMCoords = Literal['SE', 'SS', 'PL']
JunoFGMTimeResolutions = Literal["1SEC", "1MIN", "FULL"]

In [5]:
#| export
from space_analysis.utils.lbl import load_lbl

import os
from zipfile import ZipFile

from pooch.processors import ExtractorProcessor

In [6]:
# | export
def load_func(file: str):
    df: pl.DataFrame = pl.from_dataframe(load_lbl(file))
    return (
        df.lazy()
        .with_columns(
            time=pl.col("SAMPLE UTC").str.slice(0, 4).str.to_datetime("%Y")
            + pl.duration(
                milliseconds=(pl.col("DECIMAL DAY") - 1) * 24 * 60 * 60 * 1000
            )
        )
        .drop(["SAMPLE UTC", "DECIMAL DAY", "INSTRUMENT RANGE", "X", "Y", "Z"])
        .sort("time")
    )


def process_member(member, zip_file, extract_dir, clean=True):
    lbl_fp = zip_file.extract(member, path=extract_dir)
    sts_fp = zip_file.extract(member.replace(".lbl", ".sts"), path=extract_dir)

    # Convert the file to arrow format
    arrow_fp = member.replace(".lbl", ".arrow")
    load_func(lbl_fp).collect().write_ipc(arrow_fp)

    # Remove the lbl and sts files
    if clean:
        os.remove(lbl_fp)
        os.remove(sts_fp)

    return arrow_fp


def unpack_and_convert(fname, extract_dir):
    """
    Post-processing hook to unzip a file and convert it to a different format in real-time. (Otherwise the files unzipped would take up too much space on the user's computer.)

    Parameters
    ----------
    fname : str
       Full path of the zipped file in local storage

    """

    with ZipFile(fname, "r") as zip_file:
        # Extract the data file from within the archive
        members = zip_file.namelist() | filter(lambda x: x.endswith(".lbl"))
        func = partial(process_member, zip_file=zip_file, extract_dir=extract_dir)
        return list(map(func, tqdm(list(members))))


class Unpack(ExtractorProcessor):

    suffix = ".unzip"

    def _extract_file(self, fname, extract_dir):
        unpack_and_convert(fname, extract_dir)

In [7]:
# | export
def download_data(
    dataset="JNO-SS-3-FGM-CAL-V1.0",
    phase: JunoPhases = "CRUISE",
    coord: JunoFGMCoords = "SE",
    datatype: JunoFGMTimeResolutions = "1SEC",  # time resolution
    processor: Callable = Unpack(),
    format="arrow",
) -> list[str]:

    url = f"https://pds-ppi.igpp.ucla.edu/ditdos/download?id=pds://PPI/{dataset}/DATA/{phase}/{coord}/{datatype}"

    files = pooch.retrieve(
        url=url,
        known_hash=None,
        progressbar=True,
        processor=processor,
    )

    return sorted(files | filter(lambda x: x.endswith(f".{format}")))

In [8]:
download_data(datatype="FULL")

['/Users/zijin/Library/Caches/pooch/f4058d9a1f2f53f904b7fb2614f91bad-download.unzip/2011/fgm_jno_l3_2011237se_v01.arrow',
 '/Users/zijin/Library/Caches/pooch/f4058d9a1f2f53f904b7fb2614f91bad-download.unzip/2011/fgm_jno_l3_2011238se_v01.arrow',
 '/Users/zijin/Library/Caches/pooch/f4058d9a1f2f53f904b7fb2614f91bad-download.unzip/2011/fgm_jno_l3_2011239se_v01.arrow',
 '/Users/zijin/Library/Caches/pooch/f4058d9a1f2f53f904b7fb2614f91bad-download.unzip/2011/fgm_jno_l3_2011240se_v01.arrow',
 '/Users/zijin/Library/Caches/pooch/f4058d9a1f2f53f904b7fb2614f91bad-download.unzip/2011/fgm_jno_l3_2011241se_v01.arrow',
 '/Users/zijin/Library/Caches/pooch/f4058d9a1f2f53f904b7fb2614f91bad-download.unzip/2011/fgm_jno_l3_2011242se_v01.arrow',
 '/Users/zijin/Library/Caches/pooch/f4058d9a1f2f53f904b7fb2614f91bad-download.unzip/2011/fgm_jno_l3_2011243se_v01.arrow',
 '/Users/zijin/Library/Caches/pooch/f4058d9a1f2f53f904b7fb2614f91bad-download.unzip/2011/fgm_jno_l3_2011244se_v01.arrow',
 '/Users/zijin/Library/C

In [12]:
#| hide
from nbdev import nbdev_export
nbdev_export()

## Dataset Overview

### Index

In [None]:
pds_dir = "https://pds-ppi.igpp.ucla.edu/data"

possible_coords = ["se", "ser", "pc", "ss", "pl"]
possible_exts = ["sts", "lbl"]
possible_data_rates = ["1s", "1min", "1h"]

juno_ss_config = {
    "DATA_SET_ID": "JNO-SS-3-FGM-CAL-V1.0",
    "FILE_SPECIFICATION_NAME": "INDEX/INDEX.LBL",
}

juno_j_config = {
    "DATA_SET_ID": "JNO-J-3-FGM-CAL-V1.0",
    "FILE_SPECIFICATION_NAME": "INDEX/INDEX.LBL",
}

#### Process index

In [None]:
import pandas
import pdpipe as pdp

In [None]:
def process_jno_index(df: pandas.DataFrame):
    _index_time_format = "%Y-%jT%H:%M:%S.%f"

    df.columns = df.columns.str.replace(" ", "")
    jno_index_pipeline = pdp.PdPipeline(
        [
            pdp.ColDrop(["PRODUCT_ID", "CR_DATE", "PRODUCT_LABEL_MD5CHECKSUM"]),
            pdp.ApplyByCols("SID", str.rstrip),
            pdp.ApplyByCols("FILE_SPECIFICATION_NAME", str.rstrip),
            pdp.ColByFrameFunc(
                "START_TIME",
                lambda df: pandas.to_datetime(
                    df["START_TIME"], format=_index_time_format
                ),
            ),
            pdp.ColByFrameFunc(
                "STOP_TIME",
                lambda df: pandas.to_datetime(
                    df["STOP_TIME"], format=_index_time_format
                ),
            ),
            # pdp.ApplyByCols(['START_TIME', 'STOP_TIME'], pandas.to_datetime, format=_index_time_format), # NOTE: This is slow
        ]
    )

    return jno_index_pipeline(df)