In [10]:
import pandas as pd
from pprint import pprint
from astropy.table import Table, join
from astroquery.sdss import SDSS

In [11]:
# astroquery is weird - can query by region, plate, fiber id, or mjd
# query sdss objects by plate number ~ 500 objects per plate


# read in the csv file, and get objects on one plate
file = 'sdss_objects.csv'
df = pd.read_csv(file)
print(f"Total number of objects: {len(df)}")

plate_number = [445] 

# filter the data set by plate number
objects = df[df['plate'].isin(plate_number)]

print(f"Number of objects from plate: {len(objects)}")

# query sdss by plate numer, print available columns

results = SDSS.query_specobj(plate = 445)
print(results.columns)


# match the queries objects with the objects in the csv
objects = objects.copy()
query_df = results.to_pandas()

objects['specobjid'] = objects['specobjid'].astype('int64')
query_df['specobjid'] = query_df['specobjid'].astype('int64')

# merge the data frames to get the matched objects
matched_objects = pd.merge(objects, query_df, on='specobjid', how='inner')


obj_matches = Table.from_pandas(matched_objects)
obj_matches



Total number of objects: 860684
Number of objects from plate: 587
<TableColumns names=('ra','dec','objid','run','rerun','camcol','field','z','plate','mjd','fiberID','specobjid','run2d')>


specobjid,SOURCETYPE,RA,Dec,plate_x,ra,dec,objid,run,rerun,camcol,field,z,plate_y,mjd,fiberID,run2d
int64,str25,float64,float64,int64,float64,float64,uint64,int64,int64,int64,int64,float64,int64,int64,int64,int64
501025764846626816,GALAXY,130.61346,49.754185,445,130.61349960302,49.7542050007829,1237651189746565323,1331,301,1,175,0.1864123,445,51873,1,26
501026039724533760,GALAXY,130.50245,49.952646,445,130.502496814982,49.9526452500273,1237651249874534681,1345,301,1,151,0.3371834,445,51873,2,26
501026314602440704,GALAXY,130.80185,50.011891,445,130.801833171431,50.0118885755988,1237651249874600266,1345,301,1,152,0.186489,445,51873,3,26
501026589480347648,GALAXY,130.62652,50.037049,445,130.62652811744,50.037047131032,1237651249874600531,1345,301,1,152,0.4206361,445,51873,4,26
501026864358254592,GALAXY,130.66302,50.07183,445,130.663029522217,50.0718255938113,1237651249874600173,1345,301,1,152,0.09534973,445,51873,5,26
501027139236161536,GALAXY,130.65008,49.928713,445,130.650100893538,49.9287158883059,1237651249874534526,1345,301,1,151,0.1664238,445,51873,6,26
501027414114068480,QSO,130.55542,49.843956,445,130.555470999751,49.8439569299889,1237651249874534583,1345,301,1,151,2.030451,445,51873,7,26
501027688991975424,GALAXY,130.44962,49.955264,445,130.449688077166,49.9552790767358,1237651065191858693,1302,301,1,165,0.402256,445,51873,8,26
501027963869882368,GALAXY,130.54073,50.024759,445,130.540791289279,50.0247573027193,1237651190283501764,1331,301,2,176,0.18413,445,51873,9,26
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...


In [12]:
# adjust columns such that dataframes match

if 'plate_y' in obj_matches.columns:
    obj_matches.remove_columns('plate_y')


if 'plate_x' in obj_matches.columns:
    obj_matches.rename_column('plate_x', 'plate')

# printt available columns
print("Columns in obj_matches:", obj_matches.columns)

# get spectra of matched objects
spec = SDSS.get_spectra(matches=obj_matches)


Columns in obj_matches: <TableColumns names=('specobjid','SOURCETYPE','RA','Dec','plate','ra','dec','objid','run','rerun','camcol','field','z','mjd','fiberID','run2d')>


In [13]:
# download data from spectra
spectra_table = []

for hdu_list in spec:
    # Extract the spectrum's binary table (usually in the 1st HDU)
    data = hdu_list[1].data
    header = hdu_list[0].header
    
    # get wavelength and flux data
    
    wavelength = data['loglam']
    flux = data['flux']


    # get information from header
    obj_id = header['SPEC_ID']      
    g_sn = header['SPEC1_G']
    r_sn = header['SPEC1_R']
    i_sn = header['SPEC1_I']
    
    red_shift = header['SHIFT']
    red_shift_err = header['SHIFTERR']
    
    central_wavelength = header['COEFF0']
    
    # append the data to a table
    spectra_table.append([obj_id, wavelength, flux, red_shift, red_shift_err, g_sn, r_sn, i_sn, central_wavelength])

# create an astropy table
spectra_table = Table(rows=spectra_table, names=['specobjid', 'wavelength', 'red shift', 'red shift error', 'flux', 'g sn', 'r sn', 'i sn', 'central wavelength'])

# add the spectra 
spectra_table['specobjid'] = spectra_table['specobjid'].astype('int64')
matched_with_sourcetype = join(obj_matches, spectra_table, keys='specobjid', join_type='left')

# save to csv
spectra_df = matched_with_sourcetype.to_pandas()
spectra_table.write('spectra_table.fits', overwrite=True)
spectra_df.to_csv('spectral_data2.csv', index=False)
print(len(spectra_df))

print(f"Rows in obj_matches: {len(obj_matches)}")



587
Rows in obj_matches: 587


In [19]:
from astropy.table import vstack

# repeat above process but query over multiple plate numbers to get over 1000 spectra
# List of plate numbers
plates = [945, 946, 947]

objects = []
for plate in plates: 
    # Filter rows based on the current plate
    print(plate)
    objects_df = df[df['plate'] == plate]
    objects_table = Table.from_pandas(objects_df)
    objects.append(objects_table)
    # Append the filtered DataFrame to the list
    
    
    # Print the number of objects for the current plate
    print(f"Number of objects from plate {plate}: {len(objects_df)}")
    
objects = vstack(objects)


# Loop through the plates and query SDSS
results = []
for plate in plates:
    query = SDSS.query_specobj(plate = plate)
    if query is not None:
        results.append(query)

# Combine results into a single table
results = vstack(results)



# match the queries objects with the objects in the csv
objects = objects.to_pandas()
query_df = results.to_pandas()

objects['specobjid'] = objects['specobjid'].astype('int64')
query_df['specobjid'] = query_df['specobjid'].astype('int64')

# merge the data frames to get the matched objects
matched_objects = pd.merge(objects, query_df, on='specobjid', how='inner')


obj_matches = Table.from_pandas(matched_objects)
obj_matches


945
Number of objects from plate 945: 578
946
Number of objects from plate 946: 568
947
Number of objects from plate 947: 496


specobjid,SOURCETYPE,RA,Dec,plate_x,ra,dec,objid,run,rerun,camcol,field,z,plate_y,mjd,fiberID,run2d
int64,str25,float64,float64,int64,float64,float64,uint64,int64,int64,int64,int64,float64,int64,int64,int64,int64
1063975731337390080,GALAXY,152.69115,54.19951,945,152.691169940386,54.199516953674,1237657772321538135,2863,301,6,186,0.0471576,945,52652,1,26
1063976006215297024,GALAXY,152.46477,54.285261,945,152.464779875372,54.2852746020127,1237657772321472648,2863,301,6,185,0.04465302,945,52652,2,26
1063976281093203968,GALAXY,152.54256,54.308625,945,152.542524520288,54.3086405602035,1237657772321472671,2863,301,6,185,0.04384306,945,52652,3,26
1063976555971110912,GALAXY,152.4358,54.36866,945,152.435768775204,54.3686711829338,1237655106759360789,2243,301,1,215,0.3776088,945,52652,4,26
1063976830849017856,GALAXY,152.43383,54.387151,945,152.433787775776,54.3871666343831,1237655106759360750,2243,301,1,215,0.3766828,945,52652,5,26
1063977105726924800,GALAXY,152.69627,54.392,945,152.69625694128,54.3919810415265,1237657772321538231,2863,301,6,186,0.04771653,945,52652,6,26
1063977380604831744,GALAXY,152.42572,54.288932,945,152.4257838094,54.2889841760877,1237657772321472799,2863,301,6,185,-0.0005457028,945,52652,7,26
1063977930360645632,GALAXY,152.61384,54.333608,945,152.613768396619,54.3336386403989,1237657772321538401,2863,301,6,186,0.489183,945,52652,9,26
1063978205238552576,QSO,152.60462,54.360848,945,152.604606636525,54.3608459656354,1237657772321538195,2863,301,6,186,3.172779,945,52652,10,26
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...


In [21]:
if 'plate_y' in obj_matches.columns:
    obj_matches.remove_columns('plate_y')


if 'plate_x' in obj_matches.columns:
    obj_matches.rename_column('plate_x', 'plate')

# printt available columns
print("Columns in obj_matches:", obj_matches.columns)

# get spectra of matched objects
spec = SDSS.get_spectra(matches=obj_matches)


Columns in obj_matches: <TableColumns names=('specobjid','SOURCETYPE','RA','Dec','plate','ra','dec','objid','run','rerun','camcol','field','z','mjd','fiberID','run2d')>


In [22]:
# download data from spectra
spectra_table = []

for hdu_list in spec:
    # Extract the spectrum's binary table (usually in the 1st HDU)
    data = hdu_list[1].data
    header = hdu_list[0].header
    
    # get wavelength and flux data
    
    wavelength = data['loglam']
    flux = data['flux']


    # get information from header
    obj_id = header['SPEC_ID']      
    g_sn = header['SPEC1_G']
    r_sn = header['SPEC1_R']
    i_sn = header['SPEC1_I']
    
    red_shift = header['SHIFT']
    red_shift_err = header['SHIFTERR']
    
    central_wavelength = header['COEFF0']
    
    # append the data to a table
    spectra_table.append([obj_id, wavelength, flux, red_shift, red_shift_err, g_sn, r_sn, i_sn, central_wavelength])

# create an astropy table
spectra_table = Table(rows=spectra_table, names=['specobjid', 'wavelength', 'red shift', 'red shift error', 'flux', 'g sn', 'r sn', 'i sn', 'central wavelength'])

# add the spectra 
spectra_table['specobjid'] = spectra_table['specobjid'].astype('int64')
matched_with_sourcetype = join(obj_matches, spectra_table, keys='specobjid', join_type='left')

# save to csv
spectra_df = matched_with_sourcetype.to_pandas()
spectra_table.write('spectra_table.fits', overwrite=True)
spectra_df.to_csv('spectral_data3.csv', index=False)
print(len(spectra_df))

print(f"Rows in obj_matches: {len(obj_matches)}")



1636
Rows in obj_matches: 1636


In [23]:
# save each spectrum as fits file
import os

output_folder = "spectra_fits_files2"
os.makedirs(output_folder, exist_ok=True)

# Save each spectrum
for spectrum in spec:
    data = spectrum[1].data
    header = spectrum[0].header

    # Save to FITS file
    output_filename = os.path.join(output_folder, f"spectrum_{spectrum}.fits")
    data.writeto(output_filename, overwrite=True)
    print(f"Saved: {output_filename}")

print(f"All spectra saved in folder: {output_folder}")


AttributeError: recarray has no attribute writeto