# Script for obtaining SDSS bulk field image and psf fits files 

# 1) Images:
## These can be downloaded from http://data.sdss3.org/bulkFields/runCamcolFields
## by uploading your own files, or you can download directly as below,  using the form:
http://data.sdss3.org/sas/dr12/boss/photoObj/frames/301/RUN(4)/CAMCOL(1)/frame-u-RUN(6)-CAMCOL(1)-FIELD(4).fits.bz2

In [30]:
import numpy as np
from astropy.table import Table
from astropy.io import fits
import matplotlib.pyplot as plt
import bz2
import pdb
import warnings
import os.path
%matplotlib inline
from matplotlib.colors import LogNorm
import matplotlib.gridspec as gridspec


## Fits file containing data for the new ferengi candidates. Important columns are 'run', 'camcol', and 'field.'

In [31]:
data = Table.read('/home/mel/Documents/GZ_HUBBLE/gzh_red_disks/new_ferengi/data/ferengi_candidates_1527.fits')

In [3]:
def get_filename(run,camcol,field,bnd):
    r = run
    c = camcol
    f = field
    strname = strname = 'http://data.sdss3.org/sas/dr12/boss/photoObj/frames/301/'+'{:04d}'.format(r)+'/'+'{:1d}'.format(c)+'/frame-'+'{:1s}'.format(bnd)+'-'+'{:06d}'.format(r)+'-'+'{:1d}'.format(c)+'-'+'{:04d}'.format(f)+'.fits.bz2'+'\n'
    return strname

## Create text file with each run/camcol/field combo. Some galaxies are in the same bulk image, so there are duplicate combinations of run/camcol/field. Only one of each combination are needed, so duplicates are removed. 

In [106]:
text_file = open("sdss_ugriz_downloads.txt","w")
n=0
combos = []
bands = ['u','g','r','i','z']
for gal in data:
    combo = '{:04d}'.format(gal['run'])+'{:1d}'.format(gal['camcol']) + '{:04d}'.format(gal['field'])
    if combo not in combos: #new tile, save name for download
        for band in bands: #download file for u,g,r,i, and z for each run/camcol/field combo
            text_file.write(get_filename(gal['run'],gal['camcol'],gal['field'],band))
            combos.append(combo) #record this combination so it's not used twice later
            n=n+1 #number of unique files
    else: #tile already recorded for downlaod, skip 
        pass

text_file.close()


## Run wget -i sdss_ugriz_downloads.txt to download all the files!

## Check - make sure all were downloaded correctly

In [14]:
bands = ['u','g','r','i','z']
missing_files = []
files=[]
for gal in data:
    r = gal['run']
    c = gal['camcol']
    f = gal['field']

    for bnd in bands: 
        fname = 'frame-'+'{:1s}'.format(bnd)+'-'+'{:06d}'.format(r)+'-'+'{:1d}'.format(c)+'-'+'{:04d}'.format(f)+'.fits.bz2'
        if os.path.isfile('../ugriz_bulk_images/'+fname) == False:
            missing_files.append(fname)
        files.append(fname)
    
print '{:d} files were not downloaded'.format(len(missing_files))

115 files were not downloaded


In [17]:
missing_files

['frame-u-000756-1-0474.fits.bz2',
 'frame-g-000756-1-0474.fits.bz2',
 'frame-r-000756-1-0474.fits.bz2',
 'frame-i-000756-1-0474.fits.bz2',
 'frame-z-000756-1-0474.fits.bz2',
 'frame-u-000752-3-0363.fits.bz2',
 'frame-g-000752-3-0363.fits.bz2',
 'frame-r-000752-3-0363.fits.bz2',
 'frame-i-000752-3-0363.fits.bz2',
 'frame-z-000752-3-0363.fits.bz2',
 'frame-u-000756-2-0505.fits.bz2',
 'frame-g-000756-2-0505.fits.bz2',
 'frame-r-000756-2-0505.fits.bz2',
 'frame-i-000756-2-0505.fits.bz2',
 'frame-z-000756-2-0505.fits.bz2',
 'frame-u-000756-4-0127.fits.bz2',
 'frame-g-000756-4-0127.fits.bz2',
 'frame-r-000756-4-0127.fits.bz2',
 'frame-i-000756-4-0127.fits.bz2',
 'frame-z-000756-4-0127.fits.bz2',
 'frame-u-000752-4-0264.fits.bz2',
 'frame-g-000752-4-0264.fits.bz2',
 'frame-r-000752-4-0264.fits.bz2',
 'frame-i-000752-4-0264.fits.bz2',
 'frame-z-000752-4-0264.fits.bz2',
 'frame-u-000756-2-0122.fits.bz2',
 'frame-g-000756-2-0122.fits.bz2',
 'frame-r-000756-2-0122.fits.bz2',
 'frame-i-000756-2-0

In [24]:
#The error - run names 752 and 756 shouldn't have a leading zero - the url was a lie! (should be RUN, not RUN(4))

#new file to download extras:
missing_data_756 = data[(data['run']==756)]
missing_data_752 = data[data['run']==752]
missing_data_745 = data[data['run']==745]
def get_filename_3run(run,camcol,field,bnd):
    r = run
    c = camcol
    f = field
    strname = strname = 'http://data.sdss3.org/sas/dr12/boss/photoObj/frames/301/'+'{:03d}'.format(r)+'/'+'{:1d}'.format(c)+'/frame-'+'{:1s}'.format(bnd)+'-'+'{:06d}'.format(r)+'-'+'{:1d}'.format(c)+'-'+'{:04d}'.format(f)+'.fits.bz2'+'\n'
    return strname

text_file = open("sdss_ugriz_downloads_752_756.txt","w")
n=0
combos = []
bands = ['u','g','r','i','z']
for gal in missing_data_752:
    combo = '{:04d}'.format(gal['run'])+'{:1d}'.format(gal['camcol']) + '{:04d}'.format(gal['field'])
    if combo not in combos: #new tile, save name for download
        for band in bands: #download file for u,g,r,i, and z for each run/camcol/field combo
            text_file.write(get_filename_3run(gal['run'],gal['camcol'],gal['field'],band))
            combos.append(combo) #record this combination so it's not used twice later
            n=n+1 #number of unique files
    else: #tile already recorded for downlaod, skip 
        pass
for gal in missing_data_756:
    combo = '{:04d}'.format(gal['run'])+'{:1d}'.format(gal['camcol']) + '{:04d}'.format(gal['field'])
    if combo not in combos: #new tile, save name for download
        for band in bands: #download file for u,g,r,i, and z for each run/camcol/field combo
            text_file.write(get_filename_3run(gal['run'],gal['camcol'],gal['field'],band))
            combos.append(combo) #record this combination so it's not used twice later
            n=n+1 #number of unique files
    else: #tile already recorded for downlaod, skip 
        pass


text_file.close()

text_file = open("sdss_ugriz_downloads_745.txt","w")
n=0
combos = []
bands = ['u','g','r','i','z']
for gal in missing_data_745:
    combo = '{:04d}'.format(gal['run'])+'{:1d}'.format(gal['camcol']) + '{:04d}'.format(gal['field'])
    if combo not in combos: #new tile, save name for download
        for band in bands: #download file for u,g,r,i, and z for each run/camcol/field combo
            text_file.write(get_filename_3run(gal['run'],gal['camcol'],gal['field'],band))
            combos.append(combo) #record this combination so it's not used twice later
            n=n+1 #number of unique files
    else: #tile already recorded for downlaod, skip 
        pass

text_file.close()


In [25]:
#check again:
bands = ['u','g','r','i','z']
missing_files = []
files=[]
for gal in data:
    r = gal['run']
    c = gal['camcol']
    f = gal['field']

    for bnd in bands: 
        fname = 'frame-'+'{:1s}'.format(bnd)+'-'+'{:06d}'.format(r)+'-'+'{:1d}'.format(c)+'-'+'{:04d}'.format(f)+'.fits.bz2'
        if os.path.isfile('../ugriz_bulk_images/'+fname) == False:
            missing_files.append(fname)
        files.append(fname)
    
print '{:d} files were not downloaded'.format(len(missing_files))

0 files were not downloaded


# 2) PSF Field Files

## These can be downloaded with the form:
https://data.sdss.org/sas/dr12/boss/photo/redux/301/RUN(2)/objcs/CAMCOL(1)/psField-RUN(6)-CAMCOL(1)-FIELD(4).fit

In [24]:
def get_psfname(run,camcol,field):
    r = run
    c = camcol
    f = field
    strname = strname = 'https://data.sdss.org/sas/dr12/boss/photo/redux/301/'+'{:02d}'.format(r)+'/objcs/'+'{:1d}'.format(c)+'/psField-'+'{:06d}'.format(r)+'-'+'{:1d}'.format(c)+'-'+'{:04d}'.format(f)+'.fit'+'\n'
    return strname

In [25]:
text_file = open("sdss_psf_downloads.txt","w")

combos = []
for gal in data:
    combo = '{:04d}'.format(gal['run'])+'{:1d}'.format(gal['camcol']) + '{:04d}'.format(gal['field'])
    if combo not in combos: #new tile, save name for download
        text_file.write(get_psfname(gal['run'],gal['camcol'],gal['field']))
        combos.append(combo) #record this combination so it's not used twice later
    else: #tile already recorded for downlaod, skip 
        pass

text_file.close()

## Check - make sure all were downloaded correctly

In [34]:
missing_files = []
for gal in data:
    r = gal['run']
    c = gal['camcol']
    f = gal['field']
    fname = 'psField-'+'{:06d}'.format(r)+'-'+'{:1d}'.format(c)+'-'+'{:04d}'.format(f)+'.fit'
    if os.path.isfile('../psField_files/'+fname) == False:
        missing_files.append(fname)
print '{:d} files were downloaded incorrectly'.format(len(missing_files))

0 files were downloaded incorrectly


# 3) TSF Field Files
## These contain the zero-point magnitudes for each band, and are downloaded with the form:
http://das.sdss.org/imaging/RUN(4)/40/calibChunks/CAMCOL(1)/tsField-RUN(6)-CAMCOL(1)-40-FIELD(4).fit


In [32]:
def get_tsfname(run,camcol,field):
    r = run
    c = camcol
    f = field
    strname = strname = 'http://das.sdss.org/imaging/'+'{:02d}'.format(r)+'/40/calibChunks/'+'{:1d}'.format(c)+'/tsField-'+'{:06d}'.format(r)+'-'+'{:1d}'.format(c)+'-'+'40-'+'{:04d}'.format(f)+'.fit'+'\n'
    return strname

In [127]:
text_file = open("sdss_tsf_downloads.txt","w")

combos = []
for gal in data:
    combo = '{:04d}'.format(gal['run'])+'{:1d}'.format(gal['camcol']) + '{:04d}'.format(gal['field'])
    if combo not in combos: #new tile, save name for download
        text_file.write(get_tsfname(gal['run'],gal['camcol'],gal['field']))
        combos.append(combo) #record this combination so it's not used twice later
    else: #tile already recorded for downlaod, skip 
        pass

text_file.close()

## Check - make sure all were downloaded correctly

In [45]:
missing_files = []
missing_objid = []
for gal in data:
    r = gal['run']
    c = gal['camcol']
    f = gal['field']
    fname = 'tsField-'+'{:06d}'.format(r)+'-'+'{:1d}'.format(c)+'-'+'40-'+'{:04d}'.format(f)+'.fit'
    if os.path.isfile('../tsField_files/'+fname) == False:
        missing_files.append(fname)
        missing_objid.append(gal['dr12objid'])
print '{:d} files were not downloaded'.format(len(missing_files))

424 files were not downloaded


In [55]:
missing_objid[0]in missing_objid


True

In [58]:
#these should all be 41 instead of 40, for some reason. 

def get_tsfname_41(run,camcol,field):
    r = run
    c = camcol
    f = field
    strname = strname = 'http://das.sdss.org/imaging/'+'{:02d}'.format(r)+'/41/calibChunks/'+'{:1d}'.format(c)+'/tsField-'+'{:06d}'.format(r)+'-'+'{:1d}'.format(c)+'-'+'41-'+'{:04d}'.format(f)+'.fit'+'\n'
    return strname

text_file = open("sdss_tsf_downloads_41.txt","w")

combos = []
for gal in data:
    if gal['dr12objid'] in missing_objid: #tsf file for galaxy hasn't been downloaded yet
        combo = '{:04d}'.format(gal['run'])+'{:1d}'.format(gal['camcol']) + '{:04d}'.format(gal['field'])
        if combo not in combos: #new tile, save name for download
            text_file.write(get_tsfname_41(gal['run'],gal['camcol'],gal['field']))
            combos.append(combo) #record this combination so it's not used twice later
        else: #tile already recorded for downlaod, skip 
            pass

text_file.close()

In [61]:
#recheck - downloaded correctly? 
missing_files = []
missing_objid_2 = []
for gal in data:
    r = gal['run']
    c = gal['camcol']
    f = gal['field']
    if gal['dr12objid'] in missing_objid: #should be 41 maybe
        fname = 'tsField-'+'{:06d}'.format(r)+'-'+'{:1d}'.format(c)+'-'+'41-'+'{:04d}'.format(f)+'.fit'
    else: #should be 40
        fname = 'tsField-'+'{:06d}'.format(r)+'-'+'{:1d}'.format(c)+'-'+'40-'+'{:04d}'.format(f)+'.fit'

    if os.path.isfile('../tsField_files/'+fname) == False:
        missing_files.append(fname)
        missing_objid_2.append(gal['dr12objid'])
print '{:d} files were not downloaded'.format(len(missing_files))

103 files were not downloaded


In [62]:
missing_files

['tsField-000756-1-41-0474.fit',
 'tsField-000756-2-41-0505.fit',
 'tsField-001302-6-41-0374.fit',
 'tsField-001302-3-41-0382.fit',
 'tsField-000756-4-41-0127.fit',
 'tsField-002126-4-41-0442.fit',
 'tsField-000756-2-41-0122.fit',
 'tsField-000756-5-41-0217.fit',
 'tsField-001302-1-41-0336.fit',
 'tsField-002168-5-41-0147.fit',
 'tsField-000756-6-41-0421.fit',
 'tsField-001302-1-41-0336.fit',
 'tsField-003058-6-41-0024.fit',
 'tsField-001302-2-41-0269.fit',
 'tsField-001302-6-41-0331.fit',
 'tsField-001302-1-41-0337.fit',
 'tsField-002168-5-41-0148.fit',
 'tsField-001302-1-41-0337.fit',
 'tsField-001302-5-41-0374.fit',
 'tsField-000756-1-41-0474.fit',
 'tsField-000756-2-41-0502.fit',
 'tsField-000756-3-41-0300.fit',
 'tsField-003063-2-41-0044.fit',
 'tsField-002336-3-41-0035.fit',
 'tsField-002247-3-41-0335.fit',
 'tsField-000756-1-41-0367.fit',
 'tsField-003063-1-41-0050.fit',
 'tsField-007717-1-41-0119.fit',
 'tsField-003063-3-41-0047.fit',
 'tsField-005390-2-41-0033.fit',
 'tsField-

In [90]:
#these should all be 44 instead of 40, for some reason. 

def get_tsfname_44(run,camcol,field):
    r = run
    c = camcol
    f = field
    strname = strname = 'http://das.sdss.org/imaging/'+'{:02d}'.format(r)+'/44/calibChunks/'+'{:1d}'.format(c)+'/tsField-'+'{:06d}'.format(r)+'-'+'{:1d}'.format(c)+'-'+'44-'+'{:04d}'.format(f)+'.fit'+'\n'
    return strname

text_file = open("sdss_tsf_downloads_44.txt","w")

combos = []
for gal in data:
    if gal['dr12objid'] in missing_objid_2: #tsf file for galaxy hasn't been downloaded yet
        combo = '{:04d}'.format(gal['run'])+'{:1d}'.format(gal['camcol']) + '{:04d}'.format(gal['field'])
        if combo not in combos: #new tile, save name for download
            text_file.write(get_tsfname_44(gal['run'],gal['camcol'],gal['field']))
            combos.append(combo) #record this combination so it's not used twice later
        else: #tile already recorded for downlaod, skip 
            pass

text_file.close()

In [94]:
#recheck - downloaded correctly? 
missing_files = []
missing_objid_3 = []
for gal in data:
    r = gal['run']
    c = gal['camcol']
    f = gal['field']
        
    fname_41 = 'tsField-'+'{:06d}'.format(r)+'-'+'{:1d}'.format(c)+'-'+'41-'+'{:04d}'.format(f)+'.fit'
    fname_44 = 'tsField-'+'{:06d}'.format(r)+'-'+'{:1d}'.format(c)+'-'+'44-'+'{:04d}'.format(f)+'.fit'
    fname_40 = 'tsField-'+'{:06d}'.format(r)+'-'+'{:1d}'.format(c)+'-'+'40-'+'{:04d}'.format(f)+'.fit'
        
    #if no versions exist:     
    if os.path.isfile('../tsField_files/'+fname_41) == False and os.path.isfile('../tsField_files/'+fname_44) == False and os.path.isfile('../tsField_files/'+fname_40) == False:
        missing_files.append(fname_40)
        missing_objid_3.append(gal['dr12objid'])
print '{:d} files were not downloaded'.format(len(missing_files))

89 files were not downloaded


In [95]:
missing_files

['tsField-001302-6-40-0374.fit',
 'tsField-001302-3-40-0382.fit',
 'tsField-002126-4-40-0442.fit',
 'tsField-001302-1-40-0336.fit',
 'tsField-002168-5-40-0147.fit',
 'tsField-001302-1-40-0336.fit',
 'tsField-003058-6-40-0024.fit',
 'tsField-001302-2-40-0269.fit',
 'tsField-001302-6-40-0331.fit',
 'tsField-001302-1-40-0337.fit',
 'tsField-002168-5-40-0148.fit',
 'tsField-001302-1-40-0337.fit',
 'tsField-001302-5-40-0374.fit',
 'tsField-003063-2-40-0044.fit',
 'tsField-002336-3-40-0035.fit',
 'tsField-002247-3-40-0335.fit',
 'tsField-003063-1-40-0050.fit',
 'tsField-007717-1-40-0119.fit',
 'tsField-003063-3-40-0047.fit',
 'tsField-005390-2-40-0033.fit',
 'tsField-001331-1-40-0274.fit',
 'tsField-003063-3-40-0072.fit',
 'tsField-002190-1-40-0031.fit',
 'tsField-003058-3-40-0071.fit',
 'tsField-003058-2-40-0065.fit',
 'tsField-003903-2-40-0021.fit',
 'tsField-002322-6-40-0044.fit',
 'tsField-003059-2-40-0152.fit',
 'tsField-002304-4-40-0119.fit',
 'tsField-002322-6-40-0024.fit',
 'tsField-

In [97]:
#89 of the 1527 galaxies just don't have tsf files. I don't think investigating why this is the case is worth it right now due to time. 

In [105]:
from astropy.table import Column
c1 = Column([obj for obj in missing_objid_3],name='dr12objid')

ex_table = Table()
ex_table.add_columns([c1])
ex_table.write('missing_galaxies_objids.fits',overwrite=True)

In [103]:
missing_objid_3

[1237651067889909771,
 1237651066279821325,
 1237654605873676293,
 1237651065203064903,
 1237654786779840587,
 1237651065203064912,
 1237658609829544019,
 1237651065735544848,
 1237651067887091821,
 1237651065203130479,
 1237654786779906160,
 1237651065203130387,
 1237651067353038970,
 1237658629158207659,
 1237655507253264573,
 1237655125020836042,
 1237658628621730055,
 1237678617404047646,
 1237658629695275302,
 1237668623546384411,
 1237651189753053513,
 1237658629696913469,
 1237654879114035273,
 1237658608222011488,
 1237658607684747281,
 1237662236929228903,
 1237655448734924906,
 1237658611985416295,
 1237655370356686967,
 1237655448733614107,
 1237658628621861003,
 1237658629158863005,
 1237662239077498913,
 1237658612513112067,
 1237658629695275065,
 1237656529992614121,
 1237658607147483189,
 1237658608221946049,
 1237661351631454347,
 1237655448732762298,
 1237657222025576517,
 1237659342661681257,
 1237658614129754192,
 1237661435917762567,
 1237668624620257434,
 123765862

In [106]:
1438+89

1527