# Basic data processing

In [47]:
import os
import pathlib
import numpy as np
from datetime import datetime
from enum import Enum
from dataclasses import dataclass
from spacepy import pycdf

## Loading data from ftp server
Place your `FTP_PASSWORD` and download data from the ftp server. If the password contains special characters remember to escape them with `\`.

In [2]:
!mkdir data

In [3]:
!wget -r --user=ifjagh --password=FTP_PASSWORD ftp://ftptrans.psi.ch/to_radem/ -nd -np -P ../data/

--2024-02-18 21:26:02--  ftp://ftptrans.psi.ch/to_radem/
           => ‘../data/.listing’
Resolving ftptrans.psi.ch (ftptrans.psi.ch)... 192.33.120.71
Connecting to ftptrans.psi.ch (ftptrans.psi.ch)|192.33.120.71|:21... connected.
Logging in as ifjagh ... Logged in!
==> SYST ... done.    ==> PWD ... done.
==> TYPE I ... done.  ==> CWD (1) /to_radem ... done.
==> PASV ... done.    ==> LIST ... done.

.listing                [ <=>                ]  33.74K  --.-KB/s    in 0.05s   

2024-02-18 21:26:03 (683 KB/s) - ‘../data/.listing’ saved [34547]

Removed ‘../data/.listing’.
--2024-02-18 21:26:03--  ftp://ftptrans.psi.ch/to_radem/Bard%20timeline-review-Oct-2022%20(1).xlsx
           => ‘../data/Bard timeline-review-Oct-2022 (1).xlsx’
==> CWD not required.
==> PASV ... done.    ==> RETR Bard timeline-review-Oct-2022 (1).xlsx ... done.
Length: 21543 (21K)


2024-02-18 21:26:03 (484 KB/s) - ‘../data/Bard timeline-review-Oct-2022 (1).xlsx’ saved [21543]

--2024-02-18 21:26:03--  ftp://ftptran

## Extracting `.tar.gz` files

In [4]:
# Extracts all tar files from data/ directory
!for f in ../data/*.tar.gz; do tar -xvf "$f" -C ../data/; done;

juicepsa-pds4-PI-01-juice_rad-20230416T180019/juice_rad/data_raw/rad_raw_sc_20230416.lblx
juicepsa-pds4-PI-01-juice_rad-20230416T180019/juice_rad/data_raw/rad_raw_sc_20230416.cdf
juicepsa-pds4-PI-01-juice_rad-20230416T180019/juicepsa-pds4-PI-01-juice_rad-20230416T180019-checksum_manifest.tab
juicepsa-pds4-PI-01-juice_rad-20230416T180019/juicepsa-pds4-PI-01-juice_rad-20230416T180019-transfer_manifest.tab
juicepsa-pds4-PI-01-juice_rad-20230416T180019/juicepsa-pds4-PI-01-juice_rad-20230416T180019.xml
juicepsa-pds4-PI-01-juice_rad-20230419T213312/juice_rad/data_raw/rad_raw_sc_20230418.lblx
juicepsa-pds4-PI-01-juice_rad-20230419T213312/juice_rad/data_raw/rad_raw_sc_20230418.cdf
juicepsa-pds4-PI-01-juice_rad-20230419T213312/juicepsa-pds4-PI-01-juice_rad-20230419T213312-checksum_manifest.tab
juicepsa-pds4-PI-01-juice_rad-20230419T213312/juicepsa-pds4-PI-01-juice_rad-20230419T213312-transfer_manifest.tab
juicepsa-pds4-PI-01-juice_rad-20230419T213312/juicepsa-pds4-PI-01-juice_rad-20230419T21331

In [6]:
# Remove tar.gz files and all non-raw data
!find ../data -maxdepth 1 -type f -delete

In [7]:
for file in os.listdir("../data/"):
    print(os.path.join("../data/", file))

../data/juicepsa-pds4-PI-01-juice_rad-20230416T180019
../data/juicepsa-pds4-PI-01-juice_rad-20230419T213312
../data/juicepsa-pds4-PI-01-juice_rad-20230419T213444
../data/juicepsa-pds4-PI-01-juice_rad-20230421T000051
../data/juicepsa-pds4-PI-01-juice_rad-20230421T000226
../data/juicepsa-pds4-PI-01-juice_rad-20230425T092621
../data/juicepsa-pds4-PI-01-juice_rad-20230426T084435
../data/juicepsa-pds4-PI-01-juice_rad-20230426T084607
../data/juicepsa-pds4-PI-01-juice_rad-20230707T185430
../data/juicepsa-pds4-PI-01-juice_rad-20230709T185431
../data/juicepsa-pds4-PI-01-juice_rad-20230720T153618
../data/juicepsa-pds4-PI-01-juice_rad-20230901T105022
../data/juicepsa-pds4-PI-01-juice_rad-20230912T144122
../data/juicepsa-pds4-PI-01-juice_rad-20230912T144248
../data/juicepsa-pds4-PI-01-juice_rad-20230912T144414
../data/juicepsa-pds4-PI-01-juice_rad-20230912T144541
../data/juicepsa-pds4-PI-01-juice_rad-20230912T144708
../data/juicepsa-pds4-PI-01-juice_rad-20230912T144834
../data/juicepsa-pds4-PI-01-

# Reading raw CDF data 

In [60]:
@dataclass
class RawCDF:
    name: str
    date: datetime
    type: str
    data: pycdf.CDF

In [61]:
def parse_date(filename: str) -> datetime:
    date_string = filename[-12:-4]
    format = '%Y%m%d'
    return datetime.strptime(date_string, format).date()

In [66]:
def parse_type(filename: str) -> CDFType:
    return 'science' if filename[8:10] == 'sc' else 'housekeeping' # FixMe: Non exhaustive match

In [69]:
cdfs = [
    RawCDF(name=path.name,
           date=parse_date(path.name),
           type=parse_type(path.name),
           data=pycdf.CDF(str(path))) 
    for path in pathlib.Path('../data').rglob('*.cdf')
]
science_cdfs = [cdf for cdf in cdfs if cdf.type == 'science']

# Experimentation

In [78]:
cdf = science_cdfs[40].data

In [82]:
for electron_counts, time in zip(list(cdf["ELECTRONS"]), list(cdf["TIME_UTC"])):
    print(f"{time} -> {electron_counts}")

2023-09-25 00:00:54.023700 -> [  5  16  16  16  11  19  51 627   0]
2023-09-25 00:01:54.025594 -> [  3   9  11  16  15  21  65 660   0]
2023-09-25 00:02:54.027427 -> [  4  10  12  19  16  20  61 681   0]
2023-09-25 00:03:54.029291 -> [  5  23  14  16   8  21  68 630   0]
2023-09-25 00:04:54.030651 -> [  6  17   9   9  16  17  65 674   0]
2023-09-25 00:05:54.032011 -> [  4  11  15  10   9  18  64 657   0]
2023-09-25 00:06:54.033081 -> [  5  19  13  10  12  21  52 679   0]
2023-09-25 00:07:54.035235 -> [  7  16  17  22  19  25  68 689   0]
2023-09-25 00:08:54.037480 -> [  3  16  12  13  11  20  62 714   0]
2023-09-25 00:09:54.038261 -> [  4  15  12  13  20  18  50 620   0]
2023-09-25 00:10:54.039804 -> [  4  18  16  17  14  23  67 629   0]
2023-09-25 00:11:54.041652 -> [  2  11   9  14  12   9  51 665   0]
2023-09-25 00:12:54.043318 -> [  6  14  14  11  23  28  52 639   0]
2023-09-25 00:13:54.045410 -> [  4  15   9  19  10  22  56 621   0]
2023-09-25 00:14:54.046938 -> [  4  11  10   9  