# RRISA Quickstart

All of the IGRINS data is stored using Box. We've generated all of the download links you'll need to access both raw and reduced IGRINS data products.

In this tutorial we walk through how to import the RRISA files into pandas DataFrame objects, manipulate DataFrames in various ways to find data subsets, and how to download IGRINS data products using Python.

In [152]:
#only need one package
import pandas as pd

### Reading in a RRISA file

pandas has a handy function (read_csv) that can easily interpret .csv files and turn them into a DataFrame object. DataFrames have a similar format to a spreadsheet, but can be parsed using Python

In [153]:
#read in the cross matched superlog
xmatch_superlog = pd.read_csv('../RRISA_XMatch/xmatch_log.csv')

In [154]:
#print the cross matched superlog Dataframe
# .head() just shows the first 5 rows in the DataFrame
xmatch_superlog.head()

Unnamed: 0,NAME,OBJNAME_super,OBJNAME_recipe,RA.2000,DEC.2000,FILENAME,FILES,SNRH,SNRK,URLS_H,...,APOGEE2_teff,APOGEE2_logg,APOGEE2_Vsini,APOGEE2_[M/H],APOGEE2_[a/M],APOGEE2_[Fe/H],PASTEL_Teff,PASTEL_logg,PASTEL_[Fe/H],PASTEL Flag
0,* mu.02 Her,LHS 3325,LHS 3325,266.771375,27.727028,SDCH_20140707_0095.fits,95 96 97 98,425.0,441.0,{'20140707/SDCH_20140707_0095.sn.fits': 'https...,...,,,,,,,,,,
1,* alf Lyr,vega,vega,279.393667,38.804667,SDCH_20140707_0099.fits,99 100 101 102 103 104 105 106 107 108 109 110,982.0,881.0,{'20140707/SDCH_20140707_0099.sn.fits': 'https...,...,,,,,,,9692.0,3.94,-0.73,
2,HD 164595B,NLTT45791,NLTT45791,270.353292,29.581639,SDCH_20140707_0111.fits,111 112 113 114,109.0,130.0,{'20140707/SDCH_20140707_0111.sn.fits': 'https...,...,,,,,,,,,-0.07,
3,BD+31 3330B,NLTT46858,NLTT46858,280.389208,31.558806,SDCH_20140707_0116.fits,116 117 118 119,226.0,217.0,{'20140707/SDCH_20140707_0116.sn.fits': 'https...,...,,,,,,,,,,
4,L 1288-4,GJ797B,GJ797B,310.365,19.958444,SDCH_20140707_0124.fits,124 125 126 127 128 129 130 131,193.0,186.0,{'20140707/SDCH_20140707_0124.sn.fits': 'https...,...,,,,,,,,,-0.07,


### DataFrame Manipulation

Now we can use the DataFrame to look for specific targets! We can start by looking at all of the columns avalible to us:

In [155]:
xmatch_superlog.columns

Index(['NAME', 'OBJNAME_super', 'OBJNAME_recipe', 'RA.2000', 'DEC.2000',
       'FILENAME', 'FILES', 'SNRH', 'SNRK', 'URLS_H', 'URLS_K', 'CIVIL',
       'FILENUMBER', 'JD', 'OBJTYPE', 'EXPTIME', 'ROTPA', 'BVC', 'FACILITY',
       'NIGHT_URLS', 'OTHER_URLS', 'PAPER_LOGS', 'MAIN_ID', 'RA_s', 'DEC_s',
       'IDS', 'OTYPE', 'SP_TYPE', 'SP_QUAL', 'PMRA', 'PMDEC', 'RV_VALUE',
       'PLX_VALUE', 'FLUX_U', 'FLUX_B', 'FLUX_V', 'FLUX_R', 'FLUX_G', 'FLUX_I',
       'FLUX_J', 'FLUX_H', 'FLUX_K', 'Teff', 'logg', 'Fe/H', '2MASS_ID',
       '2MASS_J', '2MASS_H', '2MASS_K', '2MASS_Flag', 'GaiaDR2_source',
       'GaiaDR2_plx', 'GaiaDR2_pmra', 'GaiaDR2_pmdec', 'GaiaDR2_gmag',
       'GaiaDR2_bprp', 'GaiaDR2_ebprp', 'GaiaDR2_rv', 'GaiaDR2_teff',
       'GaiaDR2_ag', 'GaiaDR2_radius', 'GaiaDR2_luminosity', 'GaiaDR2_Flag',
       'GaiaEDR3_source', 'GaiaEDR3_parallax', 'GaiaEDR3_bprp',
       'GaiaEDR3_ebprp', 'GaiaEDR3_gmag', 'GaiaEDR3 Flag', 'APOGEE2_HRV',
       'APOGEE2_teff', 'APOGEE2_logg', 'APOGE

Say we only want the highest H band signal to noise spectra for each object, we can start by sorting the DataFrame using the 'SNRH' column from highest to lowest.

In [156]:
xmatch_superlog.sort_values(by=['SNRH'], ascending = False, inplace = True)
xmatch_superlog.head()

Unnamed: 0,NAME,OBJNAME_super,OBJNAME_recipe,RA.2000,DEC.2000,FILENAME,FILES,SNRH,SNRK,URLS_H,...,APOGEE2_teff,APOGEE2_logg,APOGEE2_Vsini,APOGEE2_[M/H],APOGEE2_[a/M],APOGEE2_[Fe/H],PASTEL_Teff,PASTEL_logg,PASTEL_[Fe/H],PASTEL Flag
3355,* rho Ser,HR 5899,HR 5899,237.992167,20.924472,SDCH_20160716_0059.fits,59 60 61 62,1826.0,2008.0,{'20160716/SDCH_20160716_0059.sn.fits': 'https...,...,,,,,,,3920.0,1.68,-0.17,
3360,HD 198345,HR 7969,HR 7969,,,SDCH_20160716_0099.fits,99 100 101 102 103 104 105 106,1787.0,1800.0,{'20160716/SDCH_20160716_0099.sn.fits': 'https...,...,,,,,,,4010.0,1.78,-0.23,
3349,HD 194193,HR 7800,HR 7800,,,SDCH_20160714_0079.fits,79 80 81 82 83 84 85 86,1697.0,1720.0,{'20160714/SDCH_20160714_0079.sn.fits': 'https...,...,,,,,,,,,,
10760,L 98-59,L 98-59,L 98-59,124.5313,-68.31565,SDCH_20210210_0099.fits,99 100 101 102 103 104 105 106 107 108 109 11...,1657.0,1723.0,{'20210210/SDCH_20210210_0099.sn.fits': 'https...,...,,,,,,,,,,
3361,V* T Cyg,HR 7956,HR 7956,311.950667,34.423806,SDCH_20160716_0107.fits,107 108 109 110 111 112 113 114,1609.0,1604.0,{'20160716/SDCH_20160716_0107.sn.fits': 'https...,...,,,,,,,4190.0,2.12,-0.12,


Then we can drop all of the rows that have a repeat name, if we keep the first occurance of the name only then we will get the highest SNR H band spectra for each object!

In [157]:
xmatch_superlog.drop_duplicates(subset = ['MAIN_ID'], keep = 'first', inplace = True)
xmatch_superlog

Unnamed: 0,NAME,OBJNAME_super,OBJNAME_recipe,RA.2000,DEC.2000,FILENAME,FILES,SNRH,SNRK,URLS_H,...,APOGEE2_teff,APOGEE2_logg,APOGEE2_Vsini,APOGEE2_[M/H],APOGEE2_[a/M],APOGEE2_[Fe/H],PASTEL_Teff,PASTEL_logg,PASTEL_[Fe/H],PASTEL Flag
3355,* rho Ser,HR 5899,HR 5899,237.992167,20.924472,SDCH_20160716_0059.fits,59 60 61 62,1826.0,2008.0,{'20160716/SDCH_20160716_0059.sn.fits': 'https...,...,,,,,,,3920.0,1.68,-0.17,
3360,HD 198345,HR 7969,HR 7969,,,SDCH_20160716_0099.fits,99 100 101 102 103 104 105 106,1787.0,1800.0,{'20160716/SDCH_20160716_0099.sn.fits': 'https...,...,,,,,,,4010.0,1.78,-0.23,
3349,HD 194193,HR 7800,HR 7800,,,SDCH_20160714_0079.fits,79 80 81 82 83 84 85 86,1697.0,1720.0,{'20160714/SDCH_20160714_0079.sn.fits': 'https...,...,,,,,,,,,,
10760,L 98-59,L 98-59,L 98-59,124.531300,-68.315650,SDCH_20210210_0099.fits,99 100 101 102 103 104 105 106 107 108 109 11...,1657.0,1723.0,{'20210210/SDCH_20210210_0099.sn.fits': 'https...,...,,,,,,,,,,
3361,V* T Cyg,HR 7956,HR 7956,311.950667,34.423806,SDCH_20160716_0107.fits,107 108 109 110 111 112 113 114,1609.0,1604.0,{'20160716/SDCH_20160716_0107.sn.fits': 'https...,...,,,,,,,4190.0,2.12,-0.12,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45,2MASS J22385287+7511235,L1251B IRS2,L1251B IRS2,340.006458,75.259944,SDCH_20140713_0068.fits,68 69,0.0,1.0,{'20140713/SDCH_20140713_0068.sn.fits': 'https...,...,,,,,,,,,,
225,IRAS 22266+6845,IRAS 22266+6845,IRAS 22266+6845,337.124333,69.100389,SDCH_20141114_0086.fits,86 87 88 89,0.0,2.0,{'20141114/SDCH_20141114_0086.sn.fits': 'https...,...,,,,,,,,,,
2956,2MASS J18300136-0210256,IRAS18274-0212,IRAS18274-0212,277.710208,-2.151333,SDCH_20160429_0117.fits,117 118 119 120 121 122 123 124,0.0,7.0,{'20160429/SDCH_20160429_0117.sn.fits': 'https...,...,,,,,,,,,,
1656,LDN 1157,L1157,L1157,309.810292,68.081167,SDCH_20150925_0069.fits,69 70 71 72 73 74 75 76,-1.0,0.0,{'20150925/SDCH_20150925_0069.sn.fits': 'https...,...,,,,,,,,,,


If we require the SNR to be higher than a specific value, we can implement that too

In [158]:
subset = xmatch_superlog[xmatch_superlog['SNRH'] >= 150.]
subset

Unnamed: 0,NAME,OBJNAME_super,OBJNAME_recipe,RA.2000,DEC.2000,FILENAME,FILES,SNRH,SNRK,URLS_H,...,APOGEE2_teff,APOGEE2_logg,APOGEE2_Vsini,APOGEE2_[M/H],APOGEE2_[a/M],APOGEE2_[Fe/H],PASTEL_Teff,PASTEL_logg,PASTEL_[Fe/H],PASTEL Flag
3355,* rho Ser,HR 5899,HR 5899,237.992167,20.924472,SDCH_20160716_0059.fits,59 60 61 62,1826.0,2008.0,{'20160716/SDCH_20160716_0059.sn.fits': 'https...,...,,,,,,,3920.0,1.68,-0.17,
3360,HD 198345,HR 7969,HR 7969,,,SDCH_20160716_0099.fits,99 100 101 102 103 104 105 106,1787.0,1800.0,{'20160716/SDCH_20160716_0099.sn.fits': 'https...,...,,,,,,,4010.0,1.78,-0.23,
3349,HD 194193,HR 7800,HR 7800,,,SDCH_20160714_0079.fits,79 80 81 82 83 84 85 86,1697.0,1720.0,{'20160714/SDCH_20160714_0079.sn.fits': 'https...,...,,,,,,,,,,
10760,L 98-59,L 98-59,L 98-59,124.531300,-68.315650,SDCH_20210210_0099.fits,99 100 101 102 103 104 105 106 107 108 109 11...,1657.0,1723.0,{'20210210/SDCH_20210210_0099.sn.fits': 'https...,...,,,,,,,,,,
3361,V* T Cyg,HR 7956,HR 7956,311.950667,34.423806,SDCH_20160716_0107.fits,107 108 109 110 111 112 113 114,1609.0,1604.0,{'20160716/SDCH_20160716_0107.sn.fits': 'https...,...,,,,,,,4190.0,2.12,-0.12,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1716,PM J02159-0929,2MASS J02155892-0929121,2MASS J02155892-0929121,34.196792,-9.417333,SDCH_20150929_0077.fits,77 78 79 80,150.0,134.0,{'20150929/SDCH_20150929_0077.sn.fits': 'https...,...,,,,,,,3350.0,5.00,-0.25,
1192,G 258-33,GJ 4053,GJ 4053,274.770417,66.194833,SDCH_20150424_0186.fits,186 187 188 189 190 191,150.0,152.0,{'20150424/SDCH_20150424_0186.sn.fits': 'https...,...,,,,,,,,,,
1406,HD 91702,HD 91702,HD 91702,159.180208,36.843417,SDCH_20150608_0049.fits,49 50 51 52 53 54 55 56,150.0,132.0,{'20150608/SDCH_20150608_0049.sn.fits': 'https...,...,,,,,,,5628.0,,,
4113,LSPM J0310+1338,h673_p2_k9.1,h673_p2_k9.1,47.679708,13.644278,SDCH_20161123_0142.fits,142 143 144 145 146 147 148 149,150.0,141.0,{'20161123/SDCH_20161123_0142.sn.fits': 'https...,...,,,,,,,,,,


We can search for substrings within the possible identifiers for objects from SIMBAD to narrow our list further. Here is an example that shows how to to search for the substring "Tau" within the SIMBAD identifiers. 

In [159]:
#it is important to convert the Series (subset['IDS']) into a string (.astype(str)) so that way we do not get any errors from NaN values
subset_tau = subset[subset['IDS'].astype('str').str.contains('Tau')]
subset_tau

Unnamed: 0,NAME,OBJNAME_super,OBJNAME_recipe,RA.2000,DEC.2000,FILENAME,FILES,SNRH,SNRK,URLS_H,...,APOGEE2_teff,APOGEE2_logg,APOGEE2_Vsini,APOGEE2_[M/H],APOGEE2_[a/M],APOGEE2_[Fe/H],PASTEL_Teff,PASTEL_logg,PASTEL_[Fe/H],PASTEL Flag
2232,* s Tau,HR1061,HR1061,,,SDCH_20151203_0057.fits,57 58 59 60 61 62 63 64,1152.0,1020.0,{'20151203/SDCH_20151203_0057.sn.fits': 'https...,...,,,,,,,,,,
2222,* chi Tau,HR 1369,HR 1369,65.887958,25.667083,SDCH_20151202_0074.fits,74 75 76 77 78 79 80 81,1000.0,890.0,{'20151202/SDCH_20151202_0074.sn.fits': 'https...,...,,,,,,,10280.0,,,
2479,* k Tau,k Tau,k Tau,,,SDCH_20160129_0058.fits,58 59 60 61 62 63 64 65,755.0,656.0,{'20160129/SDCH_20160129_0058.sn.fits': 'https...,...,,,,,,,,,,
690,* ome01 Tau,43 Tau,43 Tau,62.505250,19.648694,SDCH_20141226_0086.fits,86 87 88 89,656.0,618.0,{'20141226/SDCH_20141226_0086.sn.fits': 'https...,...,,,,,,,4750.0,2.5,0.12,
2199,* 136 Tau,136 Tau,136 Tau,88.583500,27.615889,SDCH_20151125_0062.fits,62 63 64 65,545.0,484.0,{'20151125/SDCH_20151125_0062.sn.fits': 'https...,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8471,HD 285778,TAP 35,TAP 35,66.795125,17.845972,SDCH_20181221_0138.fits,138 139 140 141,155.0,134.0,{'20181221/SDCH_20181221_0138.sn.fits': 'https...,...,,,,,,,,,,
5508,V* V928 Tau,V928 Tau,V928 Tau,68.076958,24.374278,SDCH_20170929_0049.fits,49 50,154.0,160.0,{'20170929/SDCH_20170929_0049.sn.fits': 'https...,...,3930.688,3.8736,32.47880,-0.158269,-0.070804,-0.154909,,,,
834,V* V1321 Tau,RXJ0432.8+1735,RXJ0432.8+1735,68.459375,17.626861,SDCH_20150120_0073.fits,73 74 75 76 77 78,153.0,137.0,{'20150120/SDCH_20150120_0073.sn.fits': 'https...,...,,,,,,,,,,
6291,V* V409 Tau,IRAS 04151+2512,IRAS 04151+2512,64.545583,25.334194,SDCH_20171203_0061.fits,61 62 63 64 65 66,153.0,131.0,{'20171203/SDCH_20171203_0061.sn.fits': 'https...,...,,,,,,,,,,


Finally, we can select specifically targets:

In [160]:
targets_tau = subset_tau[subset_tau['OBJTYPE'].astype('str').str.contains('TAR')]
targets_tau.head()

Unnamed: 0,NAME,OBJNAME_super,OBJNAME_recipe,RA.2000,DEC.2000,FILENAME,FILES,SNRH,SNRK,URLS_H,...,APOGEE2_teff,APOGEE2_logg,APOGEE2_Vsini,APOGEE2_[M/H],APOGEE2_[a/M],APOGEE2_[Fe/H],PASTEL_Teff,PASTEL_logg,PASTEL_[Fe/H],PASTEL Flag
5401,V* V830 Tau,V830 Tau,V830 Tau,68.290917,24.56375,SDCH_20170915_0049.fits,49 50 51 52 53 54 55 56,467.0,412.0,{'20170915/SDCH_20170915_0049.sn.fits': 'https...,...,,,32.36682,,,,,,,
2600,* 21 Tau,HD23432,HD23432,56.695333,24.597806,SDCH_20160227_0041.fits,41 42 43 42 44 45 46 45,458.0,404.0,{'20160227/SDCH_20160227_0041.sn.fits': 'https...,...,,,,,,,11041.0,,,
10160,HD 32923,HD 32923 (RV Standard),HD 32923 (RV Standard),76.865542,18.646792,SDCH_20201223_0056.fits,56 57 58 59 60 61 62 63,457.0,449.0,{'20201223/SDCH_20201223_0056.sn.fits': 'https...,...,,,,,,,5651.0,4.05,-0.22,
4484,HD 28068,Solar Twin HD 28068,Solar Twin HD 28068,66.600708,16.850694,SDCH_20170208_0043.fits,43 44 45 46 47 48 49 50 51 52 53 54,441.0,401.0,{'20170208/SDCH_20170208_0043.sn.fits': 'https...,...,,,,,,,5305.0,,0.07,
2601,* h Tau,HD27397,HD27397,65.196667,14.076056,SDCH_20160227_0047.fits,47 48 49 50,439.0,400.0,{'20160227/SDCH_20160227_0047.sn.fits': 'https...,...,,,,,,,,,,


We can also filter by coordinates, but that process is more complicated and will be covered in another tutorial.

### Downloading Files

In [161]:
#three packages required
import requests
import ast
import os

The values of any of the URL columns are strings, but they can easily be converted into a dictionary using the ast package.

In [162]:
print(f"I am a {type(targets_tau['URLS_H'].iloc[0])}")
print(f"I am a {type(ast.literal_eval(targets_tau['URLS_H'].iloc[0]))}")

I am a <class 'str'>
I am a <class 'dict'>


The dictionaries contain filename-url pairs that make downloading files convenient.

In [163]:
test_dict = ast.literal_eval(targets_tau['URLS_H'].iloc[0])
test_dict

{'20170915/SDCH_20170915_0049.sn.fits': 'https://utexas.box.com/shared/static/z9y6jc8vds9i3erickm9pzqsztss6jgo.fits',
 '20170915/SDCH_20170915_0049.spec.fits': 'https://utexas.box.com/shared/static/xsgwr32t6vugc441a716p5nxk853eqdi.fits',
 '20170915/SDCH_20170915_0049.spec_a0v.fits': 'https://utexas.box.com/shared/static/t4qihosr0kcyt0jt9mu4w6elq4cjgbuc.fits',
 '20170915/SDCH_20170915_0049.variance.fits': 'https://utexas.box.com/shared/static/8pgo2yyiorxoxgujgv44beyu9m3ptkaa.fits'}

Here is an example of downloading one file from the above dictionary using the requests package

In [118]:
#using a session here means that requests can fetch the information faster since the connection is reused
session = requests.Session()
#the response holds lots of information gathered from the request (session.get())
response = session.get(test_dict['20170915/SDCH_20170915_0049.spec_a0v.fits'])
#we can check the status code to make sure our link was found successfully
if response.status_code == 200:
    #open a file with the corresponding filename to dump the file content into
    with open("SDCH_20170915_0049.spec_a0v.fits", 'wb') as f:
        #write the file
        f.write(response.content)
    #close the file--especially important when writing many files at once or trying to prevent against corrupted files
    f.close()
    #print that we downloaded the file
    print(f"Downloaded SDCH_20170915_0049.spec_a0v.fits")

Downloaded SDCH_20170915_0049.spec_a0v.fits


Going back to our above example of high SNR H band spectra of targets with "Tau" in the name, we can download all of the files using the following handy function [adapted from the muler tutorial for downloading files from UT Box](https://muler.readthedocs.io/en/latest/tutorials/Download_IGRINS_data_from_Box.html). I elect to add some file organization using os.makedirs, the built in open function will not create new directories to put files into so we have to do it ourselves.

In [164]:
'''
downloads files into organized directories

input:
    files: a dictionary of filename-url pairs
    civil: the civil date of observation
    name: the name of the object 
    session: a requests Session object 
'''
def download_files(files, civil, name, session):
    #for each filename in the dictionary
    for filename, url in files.items():
        #get the url for the filename
        response = session.get(url)
        #make sure the file url was found
        if response.status_code == 200:
            #create the directory for the file, if it is already created there will not be an error
            os.makedirs(os.path.dirname(f"{civil}/{name}/"), exist_ok=True)
            #open the file
            #we use filename.split because the key in the dictionary includes the civil directory
            with open(f"{civil}/{name}/{filename.split('/')[1]}", 'wb') as f:
                #write the information to the file
                f.write(response.content)
            #close the file
            f.close()
            #print that the file was downloaded
            print(f"Downloaded {filename.split('/')[1]}")

In [165]:
#create the reference session
session = requests.Session()
#for the first five rows in the DataFrame
for idx in targets_tau.index[0:5]:
    #get the civil date of the target
    civil = targets_tau['CIVIL'].loc[idx]
    #get the name of the target
    name = '_'.join(targets_tau['MAIN_ID'].loc[idx].replace('*', '').split())
    #download the H files
    download_files(ast.literal_eval(targets_tau['URLS_H'].loc[idx]), civil, name, session)
    #download the K files
    download_files(ast.literal_eval(targets_tau['URLS_K'].loc[idx]), civil, name, session)

Downloaded SDCH_20170915_0049.sn.fits
Downloaded SDCH_20170915_0049.spec.fits
Downloaded SDCH_20170915_0049.spec_a0v.fits
Downloaded SDCH_20170915_0049.variance.fits
Downloaded SDCK_20170915_0049.sn.fits
Downloaded SDCK_20170915_0049.spec.fits
Downloaded SDCK_20170915_0049.spec_a0v.fits
Downloaded SDCK_20170915_0049.variance.fits
Downloaded SDCH_20160227_0041.sn.fits
Downloaded SDCH_20160227_0041.spec.fits
Downloaded SDCH_20160227_0041.spec_a0v.fits
Downloaded SDCH_20160227_0041.variance.fits
Downloaded SDCK_20160227_0041.sn.fits
Downloaded SDCK_20160227_0041.spec.fits
Downloaded SDCK_20160227_0041.spec_a0v.fits
Downloaded SDCK_20160227_0041.variance.fits
Downloaded SDCH_20201223_0056.sn.fits
Downloaded SDCH_20201223_0056.spec.fits
Downloaded SDCH_20201223_0056.spec_a0v.fits
Downloaded SDCH_20201223_0056.variance.fits
Downloaded SDCK_20201223_0056.sn.fits
Downloaded SDCK_20201223_0056.spec.fits
Downloaded SDCK_20201223_0056.spec_a0v.fits
Downloaded SDCK_20201223_0056.variance.fits
Down