### This is code written for the production of a .txt query list from object IDs (Plate,MJD,Fiber) 
### contained within the Sloan Extended Quasar, Emission Line Galaxy, and Luminous Red Galaxy
### (SEQUELS) DR7 sample. 

In [1]:
import numpy as np
import pandas as pd

In [2]:
# Read in the data file and assign to variable. 
datframe = pd.read_csv('SEQUELS_TDSS_visualclass.txt', sep='\s+', header = None ,
engine='python')

# Take a look at the the first 5 entries. 
datframe.head(5)

#It looks like its got a weird format, lets fix that.

Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13,0
"plate,mjd,fiber,ra,dec,zpipe,targselflag,class-subclass-pipe(string),class-subclass-pipe(numeric),class1eye,class2eye,class3eye,class4eye(emission),zeye,comment(string)",,,,,,,,,,,,,,
438,51884.0,145.0,121.35873,46.711475,-0.00011,-999.0,STARK3,1053.0,1053.0,-999.0,-999.0,0.0,-0.00011,nc
439,51877.0,582.0,123.3413,45.469274,-0.000303,-999.0,STARCV,1200.0,1200.0,1300.0,-999.0,1.0,-0.000303,CVorsymbiotic;veryinteresting;nice;composite
440,51885.0,305.0,121.7117,48.670706,-0.000532,-999.0,STARM6,1066.0,1300.0,1066.0,-999.0,1.0,-0.000532,veryinterestingcomposite;blueflux
442,51882.0,120.0,126.98843,50.724368,-0.000734,-999.0,STARF5,1035.0,1035.0,-999.0,-999.0,0.0,-0.000734,nc


In [3]:
# Re-read in data file without recognizing the header row (row of column names). 

datframe = pd.read_csv('SEQUELS_TDSS_visualclass.txt', sep='\s+', header = None ,skiprows=1,
engine='python')

datframe.head()

# Now, without the first row, the format for the table is right and everything aligns nicely.

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
0,438,51884,145,121.35873,46.711475,-0.00011,-999,STARK3,1053,1053,-999,-999,0.0,-0.00011,nc
1,439,51877,582,123.3413,45.469274,-0.000303,-999,STARCV,1200,1200,1300,-999,1.0,-0.000303,CVorsymbiotic;veryinteresting;nice;composite
2,440,51885,305,121.7117,48.670706,-0.000532,-999,STARM6,1066,1300,1066,-999,1.0,-0.000532,veryinterestingcomposite;blueflux
3,442,51882,120,126.98843,50.724368,-0.000734,-999,STARF5,1035,1035,-999,-999,0.0,-0.000734,nc
4,442,51882,377,125.43852,51.57009,-6.4e-05,-999,STARF9,1039,1039,-999,-999,0.0,-6.4e-05,nc


In [12]:
# Lets take a peek into the file's first line and format it by stripping any trailing/leading whitespace and 
# splitting into string elements on the comma delimiter.

with open('SEQUELS_TDSS_visualclass.txt', 'r') as f :
    col = f.readline()
    col = col.strip().split(',')
    

print('col =',col)

# Now we have a list assigned to the 'col' variable that we can use to set the column 
# names in the dataframe.

col = ['plate', 'mjd', 'fiber', 'ra', 'dec', 'zpipe', 'targselflag', 'class-subclass-pipe(string)', 'class-subclass-pipe(numeric)', 'class1eye', 'class2eye', 'class3eye', 'class4eye(emission)', 'zeye', 'comment(string)']


In [5]:
# Assign newly formatted column header to the dataFrame
datframe.columns = col

In [6]:
# Take a quick glance at the dataframe to make sure everything looks right this time.

datframe.head(5)

Unnamed: 0,plate,mjd,fiber,ra,dec,zpipe,targselflag,class-subclass-pipe(string),class-subclass-pipe(numeric),class1eye,class2eye,class3eye,class4eye(emission),zeye,comment(string)
0,438,51884,145,121.35873,46.711475,-0.00011,-999,STARK3,1053,1053,-999,-999,0.0,-0.00011,nc
1,439,51877,582,123.3413,45.469274,-0.000303,-999,STARCV,1200,1200,1300,-999,1.0,-0.000303,CVorsymbiotic;veryinteresting;nice;composite
2,440,51885,305,121.7117,48.670706,-0.000532,-999,STARM6,1066,1300,1066,-999,1.0,-0.000532,veryinterestingcomposite;blueflux
3,442,51882,120,126.98843,50.724368,-0.000734,-999,STARF5,1035,1035,-999,-999,0.0,-0.000734,nc
4,442,51882,377,125.43852,51.57009,-6.4e-05,-999,STARF9,1039,1039,-999,-999,0.0,-6.4e-05,nc


In [7]:
# Isolate M dwarfs in sample per Sloan class1eye cuts (Mdwarfs are indicated by class1eye codes 1060-1069 for
# M0-M9 spectral types).
M_objects = datframe[(datframe.class1eye >= 1060) & (datframe.class1eye <= 1069) ]

In [8]:
M_objects

Unnamed: 0,plate,mjd,fiber,ra,dec,zpipe,targselflag,class-subclass-pipe(string),class-subclass-pipe(numeric),class1eye,class2eye,class3eye,class4eye(emission),zeye,comment(string)
7,447,51877,174,132.18879,51.998145,-0.000060,-999,STARM3,1063,1063,-999,-999,0.0,-0.000060,nc
27,898,52606,367,135.66281,46.963478,-0.000020,-999,STARM1,1061,1061,-999,-999,0.0,-0.000020,nc
29,899,52620,242,136.86492,45.933967,0.000133,-999,STARM2,1062,1062,-999,-999,0.0,0.000133,nc
30,900,52637,48,141.06706,47.346877,0.000183,-999,STARM3,1063,1063,-999,-999,0.0,0.000183,nc
34,901,52641,602,145.79420,50.890481,0.000008,-999,STARM2,1062,1062,-999,-999,0.0,0.000008,nc
37,944,52614,70,155.69999,45.484867,0.000037,-999,STARM6,1066,1066,-999,-999,0.0,0.000037,nc
39,944,52614,103,155.54584,45.545329,-0.000200,-999,STARM2,1062,1062,-999,-999,0.0,-0.000200,nc
40,944,52614,110,155.50366,45.355011,0.000112,-999,STARM0,1060,1060,-999,-999,0.0,0.000112,nc
43,944,52614,422,153.86145,46.154428,-0.000080,-999,STARM4,1064,1064,-999,-999,0.0,-0.000080,nc
44,962,52620,516,160.12631,47.633250,0.000146,-999,STARM2,1062,1062,-999,-999,0.0,0.000146,lowSN


In [17]:
# check how many M dwarfs are in the SEQUELS TDSS (SDSS-III) sample
len(M_objects[:])

2317

In [10]:
# from complete M sample dataframe, sample or choose random 100 objects for spectral-typing practice exercise.
M_100objects = M_objects.sample(n=100)

In [87]:
# Create empty Dataframe and assign only the plate, mjd, and fiber columns of spectral-typing-practice
# SEQUELS 100 M dwarf sample dataframe to a newly-constructed empty dataframe. Pad the fiber array elements 
# with zeros until its four digits long... the Skyserver query for the object spectra files will 
# not work unless the fiber number has leading zeros in it.

M_100_DL_frame = pd.DataFrame()
M_100_DL_frame['PLATE'] = M_100objects['plate']
M_100_DL_frame['MJD']   = M_100objects['mjd']
M_100_DL_frame['FIBER'] = M_100objects['fiber'].apply('{:0>4}'.format)
len(M_100_DL_frame)
M_100_DL_frame.head(10)

Unnamed: 0,PLATE,MJD,FIBER
491,7282,56660,818
4852,7513,56780,860
2662,7391,56781,873
4174,7425,56777,675
3233,7407,56772,6
2627,7391,56781,312
908,7300,56707,829
2125,7380,56753,636
1040,7304,56745,490
4169,7425,56777,488


In [81]:
# write dataframe to space-delimited file and ignore array element indices.
# the file will be used to query skyserver for the M-dwarf spectra.

M_100_DL_frame.to_csv('SEQUELS_DL_100Ms.txt',sep = ' ', index = False)

In [84]:
# follow same steps as above to produce query list for the complete SEQUELS M-dwarf sample.

SEQUELS_M_SAMPLE = pd.DataFrame()
SEQUELS_M_SAMPLE['PLATE'] = M_objects['plate']
SEQUELS_M_SAMPLE['MJD']   = M_objects['mjd']
SEQUELS_M_SAMPLE['FIBER'] = M_objects['fiber'].apply('{:0>4}'.format)
SEQUELS_M_SAMPLE.to_csv('DL_SEQUELS_M_SAMPLE.txt', sep = ' ', index=False)

In [85]:
# lets do a quick check to make sure things look ok. Now a query list for the 
# entire sequels M-sample has been outputted to the current directory. You can upload this
# using the cross id tool on the SDSS website and download your spectra!


SEQUELS_M_SAMPLE.head(5)

Unnamed: 0,PLATE,MJD,FIBER
7,447,51877,174
27,898,52606,367
29,899,52620,242
30,900,52637,48
34,901,52641,602


Now with your query .txt files you can go to the SDSS DR7 cross-ID page found at:
http://skyserver.sdss.org/dr7/en/tools/crossid/crossid.asp
