# `ApJdataFrames` 
Grankin et al. 2008
---
`Title`: Results of the ROTOR-program     
`Authors`: Grankin et al.  


Data is from this paper:  
http://www.aanda.org/articles/aa/full/2008/09/aa8476-07/aa8476-07.html


In [8]:
import pandas as pd

In [9]:
from astropy.io import ascii, votable, misc

### Download Data

In [10]:
#! mkdir ../data/Grankin08

In [11]:
#! curl http://www.aanda.org/articles/aa/full/2008/09/aa8476-07/table1.tex >> ../data/Grankin08/table1_orig.tex

In [12]:
#! curl http://www.aanda.org/articles/aa/full/2008/09/aa8476-07/table3.tex >> ../data/Grankin08/table3.tex

In [13]:
! du -hs ../data/Grankin08/table1.tex

8.0K	../data/Grankin08/table1.tex


Ew, it's in $\LaTeX$ format!

## Table 1

In [14]:
#! head -n 30 ../data/Grankin08/table1.tex

### Two problems from latex.
1. The values with binaries go onto a new line, which foils data reading.
2. The `\farcs` latex screws up the decimal point.

To deal with these I had to manually delete (horror!) the carriage returns with Sublime Text, then do this:

In [15]:
! cp ../data/Grankin08/table1.tex ../data/Grankin08/table1_mod.tex

In [16]:
! sed -i '' 's/\\farcs/./g' ../data/Grankin08/table1_mod.tex

In [17]:
names_1 = ['Name', 'HBC', 'SpT', 'JD_min_max', 'N_seasons', 'V_range', 'N_obs', 'avgB_V', 'avgV_R', 'mult', 'ref']

In [18]:
tab1 = pd.read_csv('../data/Grankin08/table1_mod.tex', sep='&',
                   skiprows=10, names=names_1, engine='python', skip_footer=8)

In [12]:
tab1.tail()

Unnamed: 0,Name,HBC,SpT,JD_min_max,N_seasons,V_range,N_obs,avgB_V,avgV_R,mult,ref
43,Wa CrA/2,678,G8 IV,48049--48100,1,10.69--10.53,28,0.85,0.74,,\\
44,Wa Oph/1,630,K2 IV,49145--53228,8,12.19--11.84,190,1.38,1.3,,\\
45,Wa Oph/2,633,K1 IV,51733--53228,5,11.77--11.60,101,1.16,1.11,,\\
46,Wa Oph/3,634,K0 IV,49145--53228,9,10.98--10.73,255,1.19,1.07,,\\
47,Wa Oph/4,652,K4,49147--52059,4,13.72--10.44,104,1.88,1.83,VB (8.7),re93 \\


In [19]:
tab1.to_csv('../data/Grankin08/table1.csv', index=False)

## Table 3

In [20]:
#! tail -n 15 ../data/Grankin08/table3.tex

In [21]:
names = ['Name', 'Epochs', 'delta_V_min', 'delta_V_max', 'HJD0-24000000', 'Period', 'Ref1', 'Ref2']

In [22]:
tab3 = pd.read_csv('../data/Grankin08/table3.tex', sep='&', comment='\\',
                   skiprows=10, names=names, engine='python', skip_footer=5)

In [23]:
tab3.tail()

Unnamed: 0,Name,Epochs,delta_V_min,delta_V_max,HJD0-24000000,Period,Ref1,Ref2
31,VY Tau,1985--2001,0.11,0.31,44610.7,5.36995,gr91,gr94
32,Wa CrA/1,1990,0.32,0.32,48048.3,2.24,sh95,sh95
33,Wa CrA/2,1990,0.16,0.16,48048.3,2.79,co92,sh95
34,Wa Oph/1,1993--2004,0.1,0.3,48986.1,3.792,za93,this paper
35,Wa Oph/3,1993--2004,0.06,0.23,49144.3,1.5214,sh98,this paper


In [24]:
tab3.to_csv('../data/Grankin08/table3.csv', index=False)

## Raw data files

Copied from Vizier and pasted into a text document...

In [19]:
! head ../data/Grankin08/grankin08_dat_files.txt

hbc263.dat.gz	08-Jan-2008 11:24	-r--r--r--	2.3K		- txt - txt.gz - fits - fits.gz - html
hbc264.dat.gz	08-Jan-2008 11:25	-r--r--r--	3.9K		- txt - txt.gz - fits - fits.gz - html
hbc366.dat.gz	08-Jan-2008 11:25	-r--r--r--	1.2K		- txt - txt.gz - fits - fits.gz - html
hbc374.dat.gz	08-Jan-2008 11:25	-r--r--r--	323		- txt - txt.gz - fits - fits.gz - html
hd283572.dat.gz	08-Jan-2008 11:25	-r--r--r--	4.9K		- txt - txt.gz - fits - fits.gz - html
lkca1.dat.gz	08-Jan-2008 11:25	-r--r--r--	1.1K		- txt - txt.gz - fits - fits.gz - html
lkca11.dat.gz	08-Jan-2008 11:25	-r--r--r--	2.3K		- txt - txt.gz - fits - fits.gz - html
lkca14.dat.gz	08-Jan-2008 11:25	-r--r--r--	742		- txt - txt.gz - fits - fits.gz - html
lkca16.dat.gz	08-Jan-2008 11:25	-r--r--r--	728		- txt - txt.gz - fits - fits.gz - html
lkca19.dat.gz	08-Jan-2008 11:25	-r--r--r--	3.6K		- txt - txt.gz - fits - fits.gz - html


In [20]:
gr_dat = pd.read_csv('../data/Grankin08/grankin08_dat_files.txt', usecols=[0],
                     delim_whitespace=True, names=['filename'])

Download the data:

In [21]:
import os

In [22]:
#! mkdir ../data/Grankin08/phot

Only need to run this once:

```python
for i in range(len(gr_dat)):
#for i in range(3):
    fn = gr_dat.filename[i]
    web_addr = 'http://vizier.cfa.harvard.edu/vizier/ftp/cats/J/A+A/479/827/phot/'
    cmd = 'curl '+ web_addr + fn +' > ' + '../data/Grankin08/phot/'+fn 
    os.system(cmd)
    print(cmd)
```

In [23]:
#! gzip -d ../data/Grankin08/phot/*.dat.gz

In [24]:
gr_dat['dat_fn'] = gr_dat.filename.str[0:-3]

## Match filenames with Table 1

In [25]:
tab1['filename_guess'] = tab1.Name.str.lower().str.replace(' ', '').str.replace('/', '')+'.dat.gz'

A few sources have different filenames than column 1 of Table 1.  Replace them with the correct guesses.

In [26]:
tab1['Name'] = tab1['Name'].str.rstrip().values

In [27]:
tab1 = tab1.set_value(tab1.index[tab1.Name=='Anon 1'], 'filename_guess', 'hbc366.dat.gz')
tab1 = tab1.set_value(tab1.index[tab1.Name=='Hubble 4'], 'filename_guess', 'hbc374.dat.gz')
tab1 = tab1.set_value(tab1.index[tab1.Name=='SR 9'], 'filename_guess', 'hbc264.dat.gz')
tab1 = tab1.set_value(tab1.index[tab1.Name=='SR 12'], 'filename_guess', 'hbc263.dat.gz')

In [28]:
out = pd.merge(tab1, gr_dat, how='outer', left_on='filename_guess', right_on='filename')

In [29]:
out.head()

Unnamed: 0,Name,HBC,SpT,JD_min_max,N_seasons,V_range,N_obs,avgB_V,avgV_R,mult,ref,filename_guess,filename,dat_fn
0,Anon 1,366.0,M0,48953--50760,6,13.58--13.36,96,1.84,1.84,,\\,hbc366.dat.gz,hbc366.dat.gz,hbc366.dat
1,HD 283572,380.0,G6 IV,48854--53300,13,9.16--8.89,425,0.77,0.7,,\\,hd283572.dat.gz,hd283572.dat.gz,hd283572.dat
2,Hubble 4,374.0,K7,49197--49251,1,12.71--12.59,27,1.62,,,\\,hbc374.dat.gz,hbc374.dat.gz,hbc374.dat
3,LkCa 1,365.0,M4,49213--50760,5,13.80--13.64,88,1.45,1.71,,\\,lkca1.dat.gz,lkca1.dat.gz,lkca1.dat
4,LkCa 2,,K7,48953--51157,7,12.37--12.19,119,1.39,1.34,,\\,lkca2.dat.gz,lkca2.dat.gz,lkca2.dat


In [30]:
#out.to_csv('../data/Grankin08/table1_plus.csv', index=False)

## Add RA, DEC, and other simbad info

In [31]:
from astroquery.simbad import Simbad

In [32]:
gr_t1 = out

In [33]:
gr_t1['HBC_name'] = 'HBC' + gr_t1.HBC

In [43]:
gr_t1['alt_name'] = gr_t1.HBC_name

In [54]:
gr_t1.alt_name[18] = 'TAP 10'
gr_t1.alt_name[19] = 'TAP 11'
gr_t1.alt_name[20] = 'TAP 14'

In [53]:
Simbad.add_votable_fields('sptype', 'otype')

In [47]:
gr_t1['pref_name'] = ''
gr_t1['RA'] = ''
gr_t1['DEC'] = ''
gr_t1['SpT_simbad'] = ''
gr_t1['Otype_simbad'] = ''

In [48]:
N_sources = len(gr_t1)

#### You only have to run this once:

```python
for i in range(N_sources):
    name = gr_t1.Name[i]
    name_alt = gr_t1.alt_name[i]
    result_table = Simbad.query_object(name)
    try:
        RA, DEC = result_table['RA'].data.data[0], result_table['DEC'].data.data[0]
        SpT, Otype = result_table['SP_TYPE'].data.data[0], result_table['OTYPE'].data.data[0]
        print("{} was found in Simbad.".format(name))
    except TypeError:
        print("Attempt 1 did not work for {}, trying HBC name: {}...".format(name, name_alt), end='')
        result_table = Simbad.query_object(name_alt)
        RA, DEC = result_table['RA'].data.data[0], result_table['DEC'].data.data[0]
        SpT, Otype = result_table['SP_TYPE'].data.data[0], result_table['OTYPE'].data.data[0]
        print(' success!')
        name = name_alt
    gr_t1 = gr_t1.set_value(i, 'pref_name', name)
    gr_t1 = gr_t1.set_value(i, 'RA', RA)
    gr_t1 = gr_t1.set_value(i, 'DEC', DEC)
    gr_t1 = gr_t1.set_value(i, 'SpT_simbad', SpT.decode())
    gr_t1 = gr_t1.set_value(i, 'Otype_simbad', Otype.decode())
```

In [52]:
#gr_t1.to_csv('../data/Grankin08/table1_plus.csv', index=False)

## The end!