In [1]:
import re
import requests
from bs4 import BeautifulSoup
import numpy as np

from astropy.coordinates import SkyCoord
import astropy.units as u

In [2]:
url = "https://safe.nrao.edu/vlba/psrpi"
r = requests.get("%s/release.html" % url)

link = re.findall("(?<=href=\")release/.+?(?=\")", r.text)

In [3]:
def parse_pmline(pm_line):
    """
    """
    
    # Location of - and +
    loc_m = pm_line.index("-")
    loc_p = pm_line.index("+")


    if pm_line.index("-") == 0:
        # negative pm
        loc_m = pm_line[1:].index("-") + 1
        
    pm = float(pm_line[:loc_m])
    pm_err_low = float(pm_line[loc_m+1:loc_p])
    pm_err_hig = float(pm_line[loc_p:])
    
    pm_err = max([pm_err_low, pm_err_hig])
    
    return pm, pm_err

In [4]:
with open("../data/psrpi.dat", "w") as fout:
    print(
        "psr_name, pos_epoch, ra, dec, pmra, pmdec, plx, "
        "ra_err, dec_err, pmra_err, pmdec_err, plx_err\n"
        "# , MJD, deg, deg, mas/yr, mas/yr, mas,"
        " mas, mas, mas/yr, mas/yr, mas",
        file=fout)

    for linki in link:
        r = requests.get("%s/%s" % (url, linki))
        soup = BeautifulSoup(r.text, "lxml")
        data = soup.select("body > table:nth-child(8) > tr")

        # PSR name
        psr_name = data[0].get_text()
        print(psr_name)

        # Position epoch
        pos_epoch = float(data[1].get_text().split("MJD")[1])

        # RA and its error
        ra_line = data[2].get_text().split("RA")[1]
        ra_str, ra_err_str = ra_line.split("(")
        sig_dig = len(ra_str.split(".")[1])
        ra_err = float(
            ra_err_str.split(")")[0]) * 10**(3 - sig_dig) * 15  # second -> mas

        # Decl. and its error
        dec_line = data[3].get_text().split("Dec")[1]
        dec_str, dec_err_str = dec_line.split("(")
        sig_dig = len(dec_str.split(".")[1])
        dec_err = float(dec_err_str.split(")")[0]) * 10**(3 - sig_dig
                                                          )  # arcsec -> mas
        if dec_str[:3] == "$-$":
            dec_str = "-" + dec_str[3:]

        ra_dec = SkyCoord(ra_str, dec_str, unit=(u.hourangle, u.degree))
        # ra_dec
        ra = ra_dec.ra.deg
        dec = ra_dec.dec.deg

        # Correct for the cos(decl.)
        ra_err = ra_err * np.cos(ra_dec.dec.rad)

        # PMRA
        pmra_line = data[4].get_text().split("PM_RA (mas/yr)")[1]
        pmra, pmra_err = parse_pmline(pmra_line)

        # PMDEC
        pmdec_line = data[5].get_text().split("PM_Dec (mas/yr)")[1]
        pmdec, pmdec_err = parse_pmline(pmdec_line)

        # Parallax
        plx_line = data[6].get_text().split("Parallax (mas)")[1]
        plx, plx_err = parse_pmline(plx_line)

        line_fmt = [
            "{:10s},{:7.1f},{:15.10f},{:15.10f},{:8.3f},{:8.3f},{:8.3f},"
            "{:6.3f},{:6.3f},{:6.3f},{:6.3f},{:6.3f}"
        ][0]

        print(line_fmt.format(psr_name, pos_epoch, ra, dec, pmra, pmdec, plx,
                              ra_err, dec_err, pmra_err, pmdec_err, plx_err),
              file=fout)

J0040+5716
J0055+5117
J0102+6537
J0108+6608
J0147+5922
J0151-0635
J0152-1637
J0157+6212
J0323+3944
J0332+5434
J0335+4555
J0357+5236
J0406+6138
J0601-0527
J0614+2229
J0629+2415
J0729-1836
J0823+0159
J0826+2637
J1022+1001
J1136+1551
J1257-1027
J1321+8323
J1532+2745
J1543-0620
J1607-0032
J1623-0908
J1645-0317
J1650-1654
J1703-1846
J1735-0724
J1741-0840
J1754+5201
J1820-0427
J1833-0338
J1840+5640
J1901-0906
J1912+2104
J1913+1400
J1917+1353
J1919+0021
J1937+2544
J2006-0807
J2010-1323
J2046+1540
J2046-0421
J2113+2754
J2113+4644
J2145-0750
J2149+6329
J2150+5247
J2212+2933
J2225+6535
J2248-0101
J2305+3100
J2317+1439
J2317+2149
J2325+6316
J2346-0609
J2354+6155
