In [27]:
import os
import requests
import tarfile
import glob
import healpy as hp
from astropy.io import fits
from astropy.table import Table
from multiprocessing import Pool
import numpy as np
from tqdm import tqdm

url = "http://vipers.inaf.it/data/pdr2/spectra/"
files = ["VIPERS_W1_SPECTRA_1D_PDR2.tar.gz", "VIPERS_W4_SPECTRA_1D_PDR2.tar.gz"]

header_keys = ['ID', 'RA', 'DEC', 'REDSHIFT', 'REDFLAG', 'EXPTIME', 'NORM', 'MAG']


def download_data(vipers_data_path: str = ''):
    """Download the VIPERS data from the web and unpack it into the specified directory."""
    # Create the output directory if it does not exist
    if not os.path.exists(vipers_data_path):
        os.makedirs(vipers_data_path)

    subdirectory_path = os.path.join(vipers_data_path, "vipers")

    # Download each file
    for file in files:
        local_path = os.path.join(vipers_data_path, file)

        # Check if file needs to be downloaded
        if not os.path.exists(local_path):
            print(f"Downloading {file}...")
            response = requests.get(url + file, stream=True)
            if response.status_code == 200:
                with open(local_path, 'wb') as f:
                    for chunk in response.iter_content(chunk_size=8192):
                        f.write(chunk)
            else:
                print(f"Failed to download {file}. Status code: {response.status_code}")
                continue

        # Unpack the tar.gz file into its specific subdirectory
        print(f"Unpacking {file} into {subdirectory_path}...")
        with tarfile.open(local_path, "r:gz") as tar:
            tar.extractall(path=subdirectory_path)
        print(f"{file} unpacked successfully!")

        # Remove the tar files
        os.remove(local_path)


def extract_data(filename):
    """Extract the contents of a tar file to a dictionary for each file"""
    hdu = fits.open(filename)
    header = hdu[1].header
    data = hdu[1].data

    results = {}

    # Loop through the header keys and add them to the results dictionary
    for key in header_keys:
        results[key.lower()] = header[key]
    
    # Add the spectrum data to the results dictionary
    results['spectrum_flux'] = data['FLUXES']
    results['spectrum_wave'] = data['WAVES']
    results['spectrum_ivar'] = data['NOISE']
    results['spectrum_mask'] = data['MASK']
    
    hdu.close()
    return results


def main(vipers_data_path: str = '', nside: int = 16, num_processes: int = 10):

    # Download the VIPERS data if it does not exist
    if not os.path.exists(vipers_data_path):
        download_data(vipers_data_path)

    # Load all fits file, standardize them and append to HDF5 file
    files = glob.glob(os.path.join(vipers_data_path, '*/*.fits'))
    files = files

    # Run the parallel processing
    with Pool(num_processes) as pool:
        results = list(tqdm(pool.imap(extract_data, files), total=len(files)))

    # Convert to table
    table = Table(results)

    # Get healpix files
    healpix_indices = hp.ang2pix(nside, table['RA'], table['DEc'], lonlat=True, nest=True)
    unique_indices = np.unique(healpix_indices)




    return 
    


In [2]:
mkdir /mnt/ceph/users/polymathic/AstroPile/vipers/vipers

In [1]:
ls /mnt/ceph/users/polymathic/AstroPile/sdss16/

[0m[01;34mboss[0m/  [01;34meboss[0m/  [01;34msdss[0m/  [01;36msdss16.py[0m@  [01;34msegue1[0m/  [01;34msegue2[0m/


In [28]:
vipers_data_path = '/mnt/ceph/users/polymathic/AstroPile/vipers/'

results = main(vipers_data_path, num_processes=30)

100%|██████████| 91507/91507 [00:58<00:00, 1553.11it/s]


In [25]:
from astropy.table import Table

table = Table(results)

In [26]:
table

ra,dec,redshift,redflag,exptime,norm,mag,spectrum_flux,spectrum_wave,spectrum_ivar,spectrum_mask
float64,float64,float64,float64,float64,float64,float64,float32[557],float32[557],float32[557],int32[557]
36.587488,-5.87282,0.5282,0.5282,540.0,4.10179033084178,21.8407,2.3823306e-18 .. 1.9335241e-18,5514.28 .. 9484.12,5.1994865e-18 .. 1.2869239e-18,2 .. 2
36.8297429,-5.6403521,0.674,0.674,540.0,3.51227436980568,21.2643,4.9038398e-18 .. 2.2705266e-18,5514.28 .. 9484.12,2.1399899e-18 .. 1.1310551e-18,2 .. 2
37.9509886,-4.3851945,0.7268,0.7268,540.0,3.20371418646672,21.4765,1.9446907e-18 .. 4.443808e-18,5514.28 .. 9484.12,3.875907e-18 .. 1.73436e-18,2 .. 0
38.1825541,-5.0315352,0.602,0.602,540.0,3.60485094849837,21.9827,1.819438e-18 .. 2.3870528e-18,5514.28 .. 9484.12,1.36540785e-17 .. 1.1807854e-18,2 .. 1
32.1458921,-4.9582418,0.9201,0.9201,540.0,3.51399440500701,21.4144,1.805379e-18 .. 1.6961524e-18,5514.28 .. 9484.12,2.2603422e-18 .. 6.2513224e-19,2 .. 3
34.5842437,-4.5026261,0.7084,0.7084,540.0,2.72324289071621,22.3618,1.8365136e-18 .. 1.238279e-18,5514.28 .. 9484.12,1.8876553e-18 .. 2.2781586e-18,2 .. 2
35.5375799,-5.6754759,0.8488,0.8488,540.0,4.64565290854139,22.2298,3.6352342e-19 .. 2.5730293e-18,5514.28 .. 9484.12,5.305859e-18 .. 1.6058797e-18,2 .. 1
36.5182922,-5.5703414,0.5774,0.5774,540.0,2.65741869331687,21.5781,2.2099321e-18 .. 2.9648943e-18,5514.28 .. 9484.12,6.479856e-18 .. 7.4023886e-19,2 .. 1
37.7940448,-4.900404,0.6419,0.6419,540.0,2.86914320197045,20.5048,3.59577e-18 .. 1.0528536e-17,5514.28 .. 9484.12,2.9602077e-18 .. 1.2539307e-18,2 .. 1
37.9535528,-5.9771072,0.5191,0.5191,540.0,1.82564727639019,22.3682,7.1668433e-19 .. 1.6308515e-18,5514.28 .. 9484.12,1.9315333e-18 .. 1.109179e-18,2 .. 2


In [2]:
base_dset = '/mnt/ceph/users/polymathic/AstroPile/vipers/VIPERS_W1_SPECTRA_1D_PDR2/'
sample_file = 'VIPERS_101121877.fits'

extract_data(base_dset + sample_file)

{'ra': 30.8151647,
 'dec': -5.974833,
 'redshift': 0.2096,
 'redflag': 0.2096,
 'exptime': 540.0,
 'norm': 26.2311112823193,
 'mag': 19.2464,
 'spectrum_flux': array([5.4272670e-17, 5.4272670e-17, 5.4272670e-17, 5.4272670e-17,
        5.4272670e-17, 5.4272670e-17, 5.4272670e-17, 5.4272670e-17,
        5.4272670e-17, 5.4272670e-17, 5.4272670e-17, 5.4272670e-17,
        5.4272670e-17, 5.4272670e-17, 5.4272670e-17, 5.4272670e-17,
        5.4272670e-17, 5.4272670e-17, 5.4272670e-17, 5.4272670e-17,
        5.9429466e-17, 6.0549535e-17, 6.1833028e-17, 6.2154616e-17,
        4.2078373e-17, 5.2663939e-17, 5.6291176e-17, 4.2719726e-17,
        5.8214179e-17, 4.6110098e-17, 3.9270072e-17, 5.8511382e-17,
        6.5032172e-17, 6.5937147e-17, 6.8442479e-17, 6.1203477e-17,
        5.7858485e-17, 5.2497633e-17, 5.0166999e-17, 4.9738383e-17,
        5.1694176e-17, 4.5273587e-17, 4.2983610e-17, 4.5155283e-17,
        4.8909218e-17, 5.0450295e-17, 5.9191026e-17, 6.4730251e-17,
        5.1856022e-17, 5.

In [6]:
ls /mnt/ceph/users/polymathic/AstroPile/vipers/VIPERS_W1_SPECTRA_1D_PDR2 | head -n 10

VIPERS_101121877.fits
VIPERS_101122063.fits
VIPERS_101122068.fits
VIPERS_101122081.fits
VIPERS_101122084.fits
VIPERS_101122144.fits
VIPERS_101122212.fits
VIPERS_101122214.fits
VIPERS_101122224.fits
VIPERS_101122250.fits
ls: write error: Broken pipe
