In [2]:
#%pip install --upgrade pip
#%pip install numpy networkx scipy joblib matplotlib pandas astropy requests json

# imports
%matplotlib inline

import numpy as np
import pandas as pd 
from astropy.io import fits
import requests

import os
import json

In [3]:
###### Download the Catalog SDSS DR10 QSFIT
filename = "dataset/qsfit.fits"
url = "https://qsfit.inaf.it/cat_1.24/fits/qsfit_1.2.4.fits"
r = requests.get(url)
os.makedirs(os.path.dirname(filename), exist_ok=True)
with open(filename, 'wb') as f:
    f.write(r.content) 

In [4]:
hdu_list = fits.open(filename, memmap=True)
df = pd.DataFrame(hdu_list[1].data)
df = df.reset_index()  # make sure indexes pair with number of rows

In [5]:
len(df)

71261

In [6]:
df.head()

Unnamed: 0,index,SPEC,SDSS_NAME,RA,DEC,REDSHIFT,PLATE,FIBER,MJD,E_BV,...,BALMER__LOGNE,BALMER__LOGNE_ERR,BALMER__LOGTAU,BALMER__LOGTAU_ERR,BALMER__FWHM,BALMER__FWHM_ERR,BALMER__QUALITY,CHISQ,DOF,ELAPSED_TIME
0,0,spec-0685-52203-0467,000006.53+003055.2,0.027228,0.515341,1.8246,685,467,52203,0.025235,...,9.0,0.0,0.0,,5040.799805,0.0,2,3016.012695,3135,6.814029
1,1,spec-0685-52203-0470,000008.13+001634.6,0.0339,0.276301,1.8373,685,470,52203,0.031247,...,9.0,0.0,0.0,,5040.799805,0.0,2,3236.74292,3131,4.35846
2,2,spec-0650-52143-0199,000009.42-102751.9,0.039271,-10.464426,1.8449,650,199,52143,0.036447,...,9.0,0.0,0.0,,5040.799805,0.0,2,4900.529785,3461,4.998147
3,3,spec-0750-52235-0499,000011.41+145545.6,0.047549,14.929355,0.4597,750,499,52235,0.040418,...,9.0,0.0,0.0,,5040.799805,0.0,0,4066.879639,3308,9.377895
4,4,spec-0387-51791-0200,000011.96+000225.3,0.049839,0.040365,0.4789,387,200,51791,0.031272,...,9.0,0.0,0.0,,5040.799805,0.0,0,3972.735352,3412,15.738253


In [10]:
idx = df[(df['REDSHIFT'] < 0.3)]
len(idx)

1937

In [11]:
url_spectra = 'https://dr10.sdss.org/sas/dr10/sdss/spectro/redux/26/spectra/'
path_fits = 'dataset/fits/'
path_json = 'dataset/fits.json'

PATH_FILE = "PATH_FILE"
SPEC = "SPEC"
PLATE = "PLATE"
REDSHIFT = "REDSHIFT"

# create the dictionary to store the fits information in the json
fits_dic = []

#### LOOP to download spectra ######
for index, spectra in idx.iterrows():
    spec = spectra[SPEC]
    plate = spectra[PLATE]
    redshift = spectra[REDSHIFT]
    
    #acomodando para plates de 3 digitos
    if len(str(plate)) == 3:
        zeroplate = '0'+ str(plate)
    else:
        zeroplate = str(plate)

    # create the url to download and download the spectra
    url = url_spectra + zeroplate +'/'+ spec +'.fits' 
    r = requests.get(url)

    # store the file
    path = path_fits + spec + '.fits'
    os.makedirs(os.path.dirname(path), exist_ok=True)
    with open(path, 'wb') as f:
        f.write(r.content)

    # store the information in the dictionary
    fit_dic = {}
    fit_dic[PATH_FILE] = path
    fit_dic[SPEC] = spec
    fit_dic[PLATE] = plate
    fit_dic[REDSHIFT] = redshift

    fits_dic.append(fit_dic)

    with open(path_json, 'w') as outfile:
        json.dump(fits_dic, outfile)