In [1]:
%matplotlib notebook

In [2]:
from ztfquery import sedm
import pandas
import numpy as np
import os

***
# 1 PharosIO | What has been observed when.

### 1.1 Structure
PharosIO has two sets of data attributes:

- `whatdata`: (multi-index) DataFrame serving as observing log
- `pharosfile`: list of file on disk you have access to.

The `PharosIO` object is built upon `whatdata` and `pharosfile` will be loaded only if necessary. This is made automatically and you should not need to manually do it.

### 1.2 Individual date files
Each night have one `whatfile` (file containing the `whatdata`) and one `pharosfile`. Once downloaded (see `download_whatfile()` and `download_pharosfile()`) they are stored in `$ZTFDATA/sedm/whatfiles` and `$ZTFDATA/sedm/pharosfile` respectively.


### 1.3 `whatdata`, `store` and `load_local()`
To avoid having to load many small file, the `PharosIO()` object is able to store the whatdata MultiIndex DataFrame, which concatenates all the individual `whatfile`'s data. calling `store()` stores it as `$ZTFDATA/sedm/whatfiles/stored_data.parquet`.

Once you have called `store()` at least once, you can load a `PharosIO()`
directly from this stored dataframe. This avoids to reopen each individual whatfiles and it much, much faster.

```python
pio = sedm.PharosIO.load_local(stored=True)
```

### 1.4 `update()`

Once you have instanciate a `PharosIO` object, you can `update()` it. This will: 1. download the missing whatfiles, 2. reload the whatdata from the individual whatfiles to make sure it is up to date and 3. `store()` the new `whatdata`, such that the next time you use `load_local()` you get the updated `whatdata`.


### 1.5: Parquet format

whatfiles are stored using the parquet format for it is one on the fastest to read and right

### 1.6 Dask `client`

The `sedm.bulk_download()` accept as input a Dask `client` for multiprocess downloading that can scale effortlessly. Hence, the `download_whatfiles()` and download_pharosfiles() methods (and therefore `update()`) also accepts a `client` as they use bulk_download()

In [3]:
pio = sedm.PharosIO()

Nothing loaded

In [4]:
pio.whatdata

In [7]:
USE_DASK = True

if USE_DASK:
    from dask.distributed import Client
    #client = Client(n_workers=16)
    client = Client() # faster I think
    print(client)
else:
    client=None

<Client: 'tcp://127.0.0.1:55459' processes=4 threads=4, memory=17.18 GB>


In [6]:
pio.update(client=client)



In [7]:
pio.whatdata

Unnamed: 0,Unnamed: 1,filename,airmass,shutter,exptime,target
20210215,0,ifu20210215_00_01_05.fits,1.354,2.0,0.0,Calib bias 1 of 10
20210215,1,ifu20210215_00_01_10.fits,1.354,2.0,0.0,Calib bias 2 of 10
20210215,2,ifu20210215_00_01_15.fits,1.354,2.0,0.0,Calib bias 3 of 10
20210215,3,ifu20210215_00_01_20.fits,1.354,2.0,0.0,Calib bias 4 of 10
20210215,4,ifu20210215_00_01_25.fits,1.354,2.0,0.0,Calib bias 5 of 10
...,...,...,...,...,...,...
20190425,45,ifu20190425_06_08_19.fits,1.302,0.1,1200.0,ZTF19aarinmw
20190425,46,ifu20190425_06_31_48.fits,1.603,0.1,2250.0,ZTF19aanesgt
20190425,47,ifu20190425_07_28_00.fits,1.006,0.1,180.0,STD-HZ44
20190425,48,ifu20190425_07_34_30.fits,1.942,0.1,2250.0,ZTF19aaphifl


## Let's use the load_local() now

In [4]:
%time pio = sedm.PharosIO.load_local() # stored = False by default

CPU times: user 2.41 s, sys: 290 ms, total: 2.7 s
Wall time: 2.49 s


In [3]:
%time pio = sedm.PharosIO.load_local(stored=True)

CPU times: user 103 ms, sys: 27.3 ms, total: 130 ms
Wall time: 121 ms


In [6]:
pio.get_whatdata(ztf_only=True).groupby("target").size().sort_values(False)

target
ZTF02wi          1
ZTF20aabcouv     1
ZTF20aabcemq     1
ZTF20aabbvcx     1
ZTF20aaazhtk     1
                ..
ZTF20abwxywy    14
ZTF20aaelulu    15
ZTF19acxpuql    15
ZTF19abucwzt    17
ZTF19abqwtfu    21
Length: 5931, dtype: int64

## Massive downloading of the pharosfiles

In [11]:
futures = pio.download_pharosfiles(client=client, force_dl=True) # check your localhost:8787



In [12]:
# Check that all worked

In [4]:
pio.update()



### pharosfiles


In [4]:
pio.get_pharosdata("20180822", force_dl=False)

['/data/20180822/20180822_Flat.fits',
 '/data/20180822/20180822_HexaGrid.pkl',
 '/data/20180822/20180822_TraceMatch.pkl',
 '/data/20180822/20180822_TraceMatch_WithMasks.pkl',
 '/data/20180822/20180822_WaveSolution.pkl',
 '/data/20180822/20180822_WaveSolution_range_0_201.pkl',
 '/data/20180822/20180822_WaveSolution_range_1005_1206.pkl',
 '/data/20180822/20180822_WaveSolution_range_1206_1406.pkl',
 '/data/20180822/20180822_WaveSolution_range_1406_1606.pkl',
 '/data/20180822/20180822_WaveSolution_range_201_402.pkl',
 '/data/20180822/20180822_WaveSolution_range_402_603.pkl',
 '/data/20180822/20180822_WaveSolution_range_603_804.pkl',
 '/data/20180822/20180822_WaveSolution_range_804_1005.pkl',
 '/data/20180822/20180822_flat3d.png',
 '/data/20180822/20180822_wavesolution_dispersionmap.png',
 '/data/20180822/Cd.fits',
 '/data/20180822/Hg.fits',
 '/data/20180822/Xe.fits',
 '/data/20180822/bkgd_crr_b_ifu20180822_03_25_34.fits',
 '/data/20180822/bkgd_crr_b_ifu20180822_03_31_14.fits',
 '/data/2018

In [7]:
pio.get_target_pharosdata("ZTF20aaelulu", kind="spec",
                          not_contains="redo",
                          extension=".fits")

{'20200108': ['spec_auto_robot_lstep1__crr_b_ifu20200108_09_40_27_ZTF20aaelulu.fits'],
 '20200111': ['spec_auto_robot_lstep1__crr_b_ifu20200111_09_33_10_ZTF20aaelulu.fits'],
 '20200113': ['spec_auto_robot_lstep1__crr_b_ifu20200113_08_19_35_ZTF20aaelulu.fits'],
 '20200119': ['spec_auto_robot_lstep1__crr_b_ifu20200119_09_42_23_ZTF20aaelulu.fits'],
 '20200123': ['spec_auto_robot_lstep1__crr_b_ifu20200123_11_20_33_ZTF20aaelulu.fits'],
 '20200125': ['spec_auto_robot_lstep1__crr_b_ifu20200125_12_07_05_ZTF20aaelulu.fits'],
 '20200129': ['spec_auto_robot_lstep1__crr_b_ifu20200129_07_32_00_ZTF20aaelulu.fits'],
 '20200204': ['spec_auto_robot_lstep1__crr_b_ifu20200204_11_56_38_ZTF20aaelulu.fits'],
 '20200205': ['spec_auto_robot_lstep1__crr_b_ifu20200205_07_26_36_ZTF20aaelulu.fits'],
 '20200214': ['spec_auto_robot_lstep1__crr_b_ifu20200214_06_46_15_ZTF20aaelulu.fits'],
 '20200221': ['spec_auto_robot_lstep1__crr_b_ifu20200221_05_54_25_ZTF20aaelulu.fits'],
 '20200228': ['spec_auto_robot_lstep1__crr_

***
# 2 SEDmQuery | Download what you need

In [3]:
from ztfquery import sedm

In [4]:
squery = sedm.SEDMQuery()

In [9]:
squery.get_target_spectra("ZTF20aaelulu", client=client, not_contains="redo")

['/Users/mrigault/Data/ZTF/sedm/redux/20200108/spec_auto_robot_lstep1__crr_b_ifu20200108_09_40_27_ZTF20aaelulu.fits',
 '/Users/mrigault/Data/ZTF/sedm/redux/20200111/spec_auto_robot_lstep1__crr_b_ifu20200111_09_33_10_ZTF20aaelulu.fits',
 '/Users/mrigault/Data/ZTF/sedm/redux/20200113/spec_auto_robot_lstep1__crr_b_ifu20200113_08_19_35_ZTF20aaelulu.fits',
 '/Users/mrigault/Data/ZTF/sedm/redux/20200119/spec_auto_robot_lstep1__crr_b_ifu20200119_09_42_23_ZTF20aaelulu.fits',
 '/Users/mrigault/Data/ZTF/sedm/redux/20200123/spec_auto_robot_lstep1__crr_b_ifu20200123_11_20_33_ZTF20aaelulu.fits',
 '/Users/mrigault/Data/ZTF/sedm/redux/20200125/spec_auto_robot_lstep1__crr_b_ifu20200125_12_07_05_ZTF20aaelulu.fits',
 '/Users/mrigault/Data/ZTF/sedm/redux/20200129/spec_auto_robot_lstep1__crr_b_ifu20200129_07_32_00_ZTF20aaelulu.fits',
 '/Users/mrigault/Data/ZTF/sedm/redux/20200204/spec_auto_robot_lstep1__crr_b_ifu20200204_11_56_38_ZTF20aaelulu.fits',
 '/Users/mrigault/Data/ZTF/sedm/redux/20200205/spec_auto

In [None]:
squery.get_ni