# The Sloan Digital Sky Survey Quasar Catalog: sixteenth data release (DR16Q)

Catalogue of DR16Q_Superset_v3.fits. The column descriptions can be found in the data models.

https://www.sdss.org/dr16/algorithms/qso_catalog/

In [1]:
from astropy.io import fits
from astropy.table import Table
from matplotlib import pyplot as plt
from astroML.datasets import fetch_sdss_spectrum
import multiprocessing
import pandas as pd
from functools import partial
import numpy as np
import glob
import re
import wget
import os
import shutil
import os.path

import sys
sys.path.append('../')
from file_path import *
from tqdm import tqdm

# Read lists

In [2]:
catalogue = Table.read('../../SDSS-data/DR16Q_Superset_v3.fits', format='fits')
names = catalogue['PLATE','MJD','FIBERID', 'CLASS_PERSON','Z_VI', 'Z_QN'].to_pandas()

# Define multithreds functions

In [3]:
def parallelize_dataframe(df,func,data_home='./',number=8):
    num_partitions = number # number of partitions to split dataframe
    df_split = np.array_split(df, num_partitions)
    pool = multiprocessing.Pool(num_partitions)
    f = partial(func, data_home)
    pool.map(f, df_split)
    pool.close()
    pool.join()
    return

def download(data_home,df):
    # Fetch single spectrum
    for index, row in df.iterrows():
        plate = str(int(row['PLATE']))
        mjd = str(int(row['MJD'])).zfill(5)
        fiberID = str(int(row['FIBERID'])).zfill(4)
        rootURL = 'https://data.sdss.org/sas/dr16/eboss/spectro/redux/v5_13_0/spectra/full/'+plate+'/'
        filename = 'spec-'+plate+'-'+mjd+'-'+fiberID+'.fits'
        url = rootURL+filename  
        if os.path.exists(data_home+'/'+filename):
            #print(filename,': file exsit, skip.')
            continue        
        try:
            wget.download(url,data_home)
        except:
            print("!Failed:", plate, mjd, fiberID,'\n')


# Download Quasars

In [4]:
names10000 = names[names['PLATE']>10000]
names9000 = names[(names['PLATE']<=10000) & (names['PLATE']>9000)]
names8000 = names[(names['PLATE']<=9000) & (names['PLATE']>8000)]
names7000 = names[(names['PLATE']<=8000) & (names['PLATE']>7000)]
names6000 = names[(names['PLATE']<=7000) & (names['PLATE']>6000)]
names5000 = names[(names['PLATE']<=6000) & (names['PLATE']>5000)]
names4000 = names[(names['PLATE']<=5000) & (names['PLATE']>4000)]
names3000 = names[(names['PLATE']<=4000) & (names['PLATE']>3523)]

In [7]:
parallelize_dataframe(remain,download,data_home=data_path+'../../SDSS-data/DR16Q_Superset_v3/8000/',number=16)

In [11]:
len(catalogue[catalogue['Z_QN']>0])

In [8]:
names9000

Unnamed: 0,PLATE,MJD,FIBERID,CLASS_PERSON,Z_VI,Z_QN
38,9345,57713,519,3,1.360,1.349210
377,9403,58018,485,0,-1.000,0.482406
435,9345,57713,493,0,-1.000,2.042758
464,9403,58018,500,0,-1.000,2.607313
483,9403,58018,513,0,-1.000,1.857009
...,...,...,...,...,...,...
1440554,9179,57682,10,0,-1.000,2.040523
1440566,9179,57682,994,0,-1.000,1.820021
1440569,9179,57682,56,3,3.066,3.060739
1440573,9159,57666,9,0,-1.000,1.772507


In [25]:
ids[(ids['CLASS_PERSON'] == 3) & (ids['Z_VI']>0)]

Unnamed: 0,PLATE,MJD,FIBERID,CLASS_PERSON,Z_VI,Z_QN
15,6173,56238,528,3,2.309,2.311340
16,7596,56945,162,3,2.309,2.309355
19,4216,55477,310,3,0.250,-1.000000
27,6172,56269,366,3,2.497,2.498696
28,6177,56268,595,3,2.497,2.511271
...,...,...,...,...,...,...
1440600,6182,56190,380,3,2.415,2.419373
1440606,7696,57655,120,3,2.452,2.443098
1440607,6511,56540,912,3,2.452,2.451875
1440608,7134,56566,408,3,2.452,2.452158


In [19]:
print('Not Inspected:', len(catalogue[catalogue['CLASS_PERSON'] == 0]))
print('Star:', len(catalogue[catalogue['CLASS_PERSON'] == 1]))
print('Quasar:', len(catalogue[catalogue['CLASS_PERSON'] == 3]))
print('Galaxy:', len(catalogue[catalogue['CLASS_PERSON'] == 4]))
print('BAL Quasar:', len(catalogue[catalogue['CLASS_PERSON'] == 30]))
print('Blazar:', len(catalogue[catalogue['CLASS_PERSON'] == 50]))
print('Total:', len(catalogue))

Not Inspected: 731347
Star: 233859
Quasar: 396843
Galaxy: 39054
BAL Quasar: 39501
Blazar: 11
Total: 1440615


In [24]:
zVIs =  catalogue['PLATE','MJD','FIBERID', 'CLASS_PERSON','Z_VI', 'Z_QN'].to_pandas()
zVIs = zVIs[zVIs['Z_VI']>0]

In [25]:
print('zVI Not Inspected:', len(zVIs[zVIs['CLASS_PERSON'] == 0]))
print('zVI Star:', len(zVIs[zVIs['CLASS_PERSON'] == 1]))
print('zVI Quasar:', len(zVIs[zVIs['CLASS_PERSON'] == 3]))
print('zVI Galaxy:', len(zVIs[zVIs['CLASS_PERSON'] == 4]))
print('zVI BAL Quasar:', len(zVIs[zVIs['CLASS_PERSON'] == 30]))
print('zVI Blazar:', len(zVIs[zVIs['CLASS_PERSON'] == 50]))
print('zVI Total:', len(zVIs))

zVI Not Inspected: 4789
zVI Star: 3406
zVI Quasar: 396831
zVI Galaxy: 30666
zVI BAL Quasar: 39501
zVI Blazar: 1
zVI Total: 475194


In [None]:
396843/233859

In [14]:
274967/192925

In [None]:
len(names7000)

In [23]:
names10000

Unnamed: 0,PLATE,MJD,FIBERID,CLASS_PERSON,Z_VI,Z_QN
0,11279,58449,85,0,-1.0,2.029856
9,11279,58449,978,0,-1.0,1.565547
14,11277,58450,705,0,-1.0,2.033733
34,11277,58450,294,0,-1.0,0.856487
40,11546,58488,936,0,-1.0,0.542567
...,...,...,...,...,...,...
1440577,11279,58449,947,0,-1.0,2.321162
1440590,11546,58488,78,0,-1.0,1.280721
1440596,11277,58450,700,0,-1.0,1.656114
1440601,11277,58450,685,0,-1.0,2.418494


In [None]:
1+1

In [None]:
i = 0
j = 0
plates = []
mjds = []
fiberIDs = []
for index, row in tqdm(names8000.iterrows()):
    plate = str(int(row['PLATE']))
    mjd = str(int(row['MJD'])).zfill(5)
    fiberID = str(int(row['FIBERID'])).zfill(4)
    filename = 'spec-'+plate+'-'+mjd+'-'+fiberID+'.fits'
    if not os.path.exists(data_path+'../../SDSS-data/DR16Q_Superset_v3/8000'+'/'+filename):
        #print(filename,': file exsit, skip.')
        plates.append(row['PLATE'])
        mjds.append(row['MJD'])
        fiberIDs.append(row['FIBERID'])
        j = j + 1 
print(j)

124607it [00:16, 7585.88it/s]

In [6]:
remain = pd.DataFrame(list(zip(plates,mjds,fiberIDs)),columns=['PLATE','MJD',"FIBERID"])

In [20]:
a = list(zip(plates,mjds,fiberIDs))

In [8]:
remain['MJD']

0        57328.0
1        56564.0
2        57654.0
3        56604.0
4        58073.0
          ...   
16700    58402.0
16701    57328.0
16702    57654.0
16703    57339.0
16704    57655.0
Name: MJD, Length: 16705, dtype: float64