In [None]:
import numpy as np
import pandas as pd
import glob
import astropy.coordinates as coord
import astropy.units as un
from astroquery.simbad import Simbad
import time

import warnings
warnings.filterwarnings('ignore')

In [None]:
def get_primary_beam_size(freq, D):
    '''
    Calculate the primary beam size

    Args:
    freq (float): frequency in Hz
    D (float): maximum baseline length
               in m

    Returns:
    A float of the primary beam
    diammeter in degrees
    '''
    # First convert the frequency
    # to wavelength in m
    lamda = 299792458/freq
    # Then calculate the primary
    # beam diammeter
    return np.rad2deg((1.22*lamda)/D)

# Find known flare objects in the FoV of MeerKAT LSP pointings

This code can be used to find which known flare type objects are in the field of view (FoV) of MeerKAT LSP pointings. It has three parts:

- Combine flare type object catalogues
- Combine MeerKAT LSP pointing information
- Find flare type objects that fall into the LSP fields of view

The results of each step are saved as pandas data frames in csv files. This is to keep track of what's in all the columns.

## Step 1: combine flare type object catalogues

Here we read and collate catalogues of flare type objects. The catalogues we use include flare stars, white dwarfs, RS CVns, unidentified transient flares and more. The catalogues we use here are:

- Lepine: bright M-dwarfs - 2011AJ....142..138L
- JGagne: ultra-cool dwarfs - jgagneastro.wordpress.com/list-of-ultracool-dwarfs
- sdss_mwds: magnetic white dwarfs from SDSS: 2013MNRAS.429.2934K
- Catalina: Transients in the Catalina Surveys Data Release 2 - 2009ApJ...696..870D
- simbad_fstars: flare stars (F*) from the SIMBAD catalogue (2000A&AS..143....9W)
- simbad_RSCVns: flare stars (RS*) from the SIMBAD catalogue (2000A&AS..143....9W)
- whitedwarfs: white dwarfs from the Montreal White Dwarf Database - http://www.montrealwhitedwarfdatabase.org/references.html

We read in each catalogue using pandas, make columns for the proper motion if there are none and rename the RA and DEC columns. Then we combine the catalogues. If the source does not have an official SIMBAD name already, we check SIMBAD by radius to see if the source is known. Please note that this means that the SIMBAD names are *not* confirmed, but are a good starting point for looking into the source. Also, not all sources are identified in SIMBAD, particularly Catalina transients.

In [None]:
# Get the files for the catalogues

# Path to catalogue files
path = '/raid/driessen/Catalogues/'

# The catalogue files, downloaded from the
# above sources
lepine = ('{}Lepine_BrightMDwarfs.csv'.format(path))
jgagne = ('{}List_of_UltraCool_Dwarfs.csv'.format(path))
sdss_mwds = ('{}magWDs.tsv'.format(path))
simbad_fstars = ('{}simbad_flarestars_pm.txt'.format(path))
catalina = ('{}CRTS_all_transients.tsv'.format(path))
catalinas = glob.glob('{}css_transientcandidates_?.tsv'.format(path))
whitedwarfs = ('{}MWDD-export.csv'.format(path))
simbad_RSCVns = ('{}SIMBAD_RSCVns.txt'.format(path))

### Read the files

Read in the files usin pandas. Each one has to be read in slightly differently because the catalogues are formatted very differently.

In [None]:
lepine_source_info = []
columns = ['lepine_name', 'cns3_name',
           'ra(deg)', 'dec(deg)',
           'pmra(arcsec/yr)', 'pmdec(arcsec/yr)']

# Lepine uses an odd file format that Pandas
# doesn't like, so read it in line by line
with open(lepine, 'r') as fn:
    for l, line in enumerate(fn):
        if l > 40:
            lepinename = line[:17].strip()
            cnsname = line[38:55].strip()
            ra = line[56:66].strip()
            dec = line[68:78].strip()
            pmra = line[86:92].strip()
            pmdec = line[93:99].strip()
            
            lepine_source_info.append([lepinename, cnsname,
                                       ra, dec, pmra, pmdec])  
lepine_source_info = np.array(lepine_source_info)

# Put everything into a Pandas table
lepine_dict = dict()
for c, col in enumerate(columns):
    lepine_dict[col] = lepine_source_info[:, c]
lepine_table = pd.DataFrame(data=lepine_dict)

# Remove rows that don't have known
# coordinates
lepine_table = lepine_table.dropna(axis='rows', subset=['ra(deg)',
                                                        'dec(deg)'])
# Make a column stating which catalogue these
# sources are from
lepine_table['Catalogue'] = 'Lepine'
# Add an empty column for SIMBAD names
lepine_table['simbad_names'] = ''

In [None]:
jgagne_table = pd.read_csv(jgagne)
# Change the column names so that every
# catalogue has the same name
# for the RA and Dec columns
jgagne_table = jgagne_table.rename(columns={'R.A. (deg)':'ra(deg)',
                                            'Decl. (deg)':'dec(deg)'})
jgagne_table = jgagne_table.dropna(axis='rows', subset=['ra(deg)',
                                                        'dec(deg)'])
# Make a column stating which catalogue these
# sources are from
jgagne_table['Catalogue'] = 'J.Gagne'
# Add an empty column for SIMBAD names
# and proper motions
jgagne_table['pmra(arcsec/yr)'] = ''
jgagne_table['pmdec(arcsec/yr)'] = ''
jgagne_table['simbad_names'] = ''

In [None]:
vizier_mwds_table = pd.read_csv(sdss_mwds, header=[71, 72], delimiter='\t')
vizier_mwds_table = vizier_mwds_table[1:]
vizier_mwds_table.columns = vizier_mwds_table.columns.map('_'.join)

# Change the column names so that every
# catalogue has the same name
# for the RA and Dec columns
vizier_mwds_table = vizier_mwds_table.rename(columns={'#_RAJ2000_#deg':'ra(deg)',
                                                      '_DEJ2000_deg':'dec(deg)'})
# Remove rows that don't have known
# coordinates
vizier_mwds_table = vizier_mwds_table.dropna(axis='rows', subset=['ra(deg)', 'dec(deg)'])
# Make a column stating which catalogue these
# sources are from
vizier_mwds_table['Catalogue'] = 'Vizier_MWDs'
# Add an empty column for SIMBAD names
# and proper motions
vizier_mwds_table['pmra(arcsec/yr)'] = ''
vizier_mwds_table['pmdec(arcsec/yr)'] = ''
vizier_mwds_table['simbad_names'] = ''

In [None]:
simbad_fstars_table = pd.read_csv(simbad_fstars, header=[4], delimiter='|')
simbad_fstars_table = simbad_fstars_table[1:-1]
simbad_fstars_table.columns = simbad_fstars_table.columns.str.strip()

# This catalogue has an odd coordinate
# format, so I first correct that
simbad_coords = []
simbad_coords_orig = simbad_fstars_table['coord1 (ICRS,J2000/2000)']

for c, coo in enumerate(simbad_coords_orig):
    if '+' in coo:
        coordi = coo.split(' +')
        coordi = [coordi[0], '+'+coordi[1]]
    elif '-' in coo:
        coordi = coo.split(' -')
        coordi = [coordi[0], '-'+coordi[1]]
    simbad_coords.append(coordi)
simbad_coords = coord.SkyCoord(simbad_coords, unit=(un.hourangle, un.deg))
# Add the corrected RA and DEC columns
simbad_fstars_table['ra(deg)'] = np.array(simbad_coords.ra.deg)
simbad_fstars_table['dec(deg)'] = np.array(simbad_coords.dec.deg)
# Remove rows that don't have known
# coordinates
simbad_fstars_table = simbad_fstars_table.dropna(axis='rows', subset=['ra(deg)', 'dec(deg)'])

# Get the proper motions
# and put them into the correct
# format
pmra = []
pmdec = []
for pm in simbad_fstars_table['pm']:
    if '~' in pm:
        pmra.append(np.nan)
        pmdec.append(np.nan)
    else:
        pms = pm.split()
        pmra.append(pms[0])
        pmdec.append(pms[1])
simbad_fstars_table['pmra(arcsec/yr)'] = pmra
simbad_fstars_table['pmdec(arcsec/yr)'] = pmdec

# Change the column name that has
# the SIMBAD names in it, for
# consistancy
simbad_fstars_table = simbad_fstars_table.rename(columns={'identifier':
                                                          'simbad_names'})
# Make a column stating which catalogue these
# sources are from
simbad_fstars_table['Catalogue'] = 'SimbadFlareStars'

In [None]:
simbad_rscvns_table = pd.read_csv(simbad_RSCVns, header=[4], delimiter='|')
simbad_rscvns_table = simbad_rscvns_table[1:-1]
simbad_rscvns_table.columns = simbad_rscvns_table.columns.str.strip()

# This catalogue has an odd coordinate
# format, so I first correct that
simbad_coords = []
simbad_coords_orig = simbad_rscvns_table['coord1 (ICRS,J2000/2000)']

for c, coo in enumerate(simbad_coords_orig):
    if '+' in coo:
        coordi = coo.split(' +')
        coordi = [coordi[0], '+'+coordi[1]]
    elif '-' in coo:
        coordi = coo.split(' -')
        coordi = [coordi[0], '-'+coordi[1]]
    simbad_coords.append(coordi)
simbad_coords = coord.SkyCoord(simbad_coords, unit=(un.hourangle,
                                                    un.deg))
# Add the corrected RA and DEC columns
simbad_rscvns_table['ra(deg)'] = np.array(simbad_coords.ra.deg)
simbad_rscvns_table['dec(deg)'] = np.array(simbad_coords.dec.deg)
# Remove rows that don't have known
# coordinates
simbad_rscvns_table = simbad_rscvns_table.dropna(axis='rows',
                                                 subset=['ra(deg)',
                                                         'dec(deg)'])

# Get the proper motions
# and put them into the correct
# format
pmra = []
pmdec = []
for pm in simbad_rscvns_table['pm']:
    if '~' in pm:
        pmra.append(np.nan)
        pmdec.append(np.nan)
    else:
        pms = pm.split()
        pmra.append(pms[0])
        pmdec.append(pms[1])
simbad_rscvns_table['pmra(arcsec/yr)'] = pmra
simbad_rscvns_table['pmdec(arcsec/yr)'] = pmdec
# Change the column name that has
# the SIMBAD names in it, for
# consistancy
simbad_rscvns_table = simbad_rscvns_table.rename(columns={'identifier':
                                                          'simbad_names'})

# Make a column stating which catalogue these
# sources are from
simbad_rscvns_table['Catalogue'] = 'SimbadRSCVns'

In [None]:
# Reading in the Catalina sources is a bit more
# complicated because the sources are in
# a set of files, rather than one file

headers = ['CRTS_ID', 'ra(deg)', 'dec(deg)', 'classification']
new_content = []
with open(catalina, 'r') as content:
    for l, line in enumerate(content):
        if l>0:
            line = line.split('\t')
            if len(line) >= 10:
                nc = [line[0].strip(), line[1].strip(), line[2].strip(), line[-1].strip()]
                new_content.append(nc)
values = np.array(new_content, dtype=str)

catalina_dict = dict()

for h, head in enumerate(headers):
    catalina_dict[head] = values[:, h]
catalina_table = pd.DataFrame(data=catalina_dict)

# Remove rows that don't have known
# coordinates
catalina_table = catalina_table.dropna(axis='rows', subset=['ra(deg)', 'dec(deg)'])

for cat in catalinas:
    headers = ['CRTS_ID', 'ra(deg)', 'dec(deg)', 'classification']
    new_content = []
    with open(cat, 'r') as content:
        for l, line in enumerate(content):
            if l>0:
                line = line.split('\t')
                if len(line) >= 10:
                    nc = [line[0].strip(), line[1].strip(), line[2].strip(), line[-1].strip()]
                    new_content.append(nc)
    values = np.array(new_content, dtype=str)

    cat_dict = dict()

    for h, head in enumerate(headers):
        cat_dict[head] = values[:, h]
    cat_table = pd.DataFrame(data=cat_dict)
    
    # Remove rows that don't have known
    # coordinates
    cat_table = cat_table.dropna(axis='rows', subset=['ra(deg)', 'dec(deg)'])

    catalina_table = catalina_table.append(cat_table)

subs = ['SN', 'Ast', 'AGN', 'Nothing', 'Blazar']
for sub in subs:
    catalina_table['match_indexes'] = catalina_table['classification'].str.find(sub)
    catalina_table = catalina_table[catalina_table['match_indexes'] == -1]
    catalina_table.drop('match_indexes', axis='columns', inplace=True)

# Make a column stating which catalogue these
# sources are from
catalina_table['Catalogue'] = 'CRTS'
# Add an empty column for SIMBAD names
# and proper motions
catalina_table['pmra(arcsec/yr)'] = ''
catalina_table['pmdec(arcsec/yr)'] = ''
catalina_table['simbad_names'] = ''

In [None]:
wd_table = pd.read_csv(whitedwarfs)

# Put the coordinates in the right
# format then add them as columns
wd_coords = coord.SkyCoord(wd_table['icrsra'],
                           wd_table['icrsdec'],
                           unit=(un.hourangle, un.deg))
wd_table['ra(deg)'] = wd_coords.ra.deg
wd_table['dec(deg)'] = wd_coords.dec.deg
# Change the name column for consistency
wd_table = wd_table.rename(columns={'wdid':'simbad_names'})
# Make a column stating which catalogue these
# sources are from
wd_table['Catalogue'] = 'MWDD'
# Make empty columns for the
# proper motions
wd_table['pmra(arcsec/yr)'] = ''
wd_table['pmdec(arcsec/yr)'] = ''

### Combine the dataframes

Combine the pandas dataframes for each catalogue. Use "inner" so that only identical columns are combined.

In [None]:
dataframes = [lepine_table,
              jgagne_table,
              vizier_mwds_table,
              simbad_fstars_table,
              catalina_table,
              wd_table,
              simbad_rscvns_table]

combined_df = pd.concat(dataframes, join='inner', ignore_index=True)
combined_df = combined_df.drop_duplicates()

combined_df[['ra(deg)',
             'dec(deg)',
             'pmra(arcsec/yr)',
             'pmdec(arcsec/yr)']] = combined_df[['ra(deg)',
                                                 'dec(deg)',
                                                 'pmra(arcsec/yr)',
                                                 'pmdec(arcsec/yr)']].apply(pd.to_numeric,
                                                                            downcast='float')
# Split the catalogue into sources that already
# have SIMBAD names, and sources that don't
no_names = combined_df[combined_df['simbad_names'] == '']
names = combined_df[combined_df['simbad_names'] != '']

### Find SIMBAD names for sources that don't have them

In [None]:
# Get the coordinates of the sources
# that don't have names yet
no_name_coords = coord.SkyCoord(np.array(no_names['ra(deg)'])*un.deg,
                                np.array(no_names['dec(deg)'])*un.deg,
                                pm_ra_cosdec=np.array(no_names['pmra(arcsec/yr)'])*un.arcsec/un.yr,
                                pm_dec=np.array(no_names['pmdec(arcsec/yr)'])*un.arcsec/un.yr)
no_name_names = []

# Divide the sources up into chunks,
# otherwise astroquery will chuck
# a hissy fit
for s, starts in enumerate(np.arange(0, 16)):
    start = starts * 1000
    end = start + 1000
    coords = no_name_coords[start:end]

    # Use the coordinates to search for nearby
    # sources for each source
    result_table = Simbad.query_region(coords, radius=2.*un.arcsec)
    # Match the results to the sources
    try:
        result_coords = coord.SkyCoord(list(result_table['RA']),
                                       list(result_table['DEC']),
                                       unit=(un.hourangle, un.deg))

        for c, coo in enumerate(coords):
            seps = coo.separation(result_coords)

            if np.nanmin(seps.deg) < 2./60./60.:
                no_name_names.append(result_table[np.nanargmin(seps.deg)]['MAIN_ID'].decode('UTF-8'))
            else:
                no_name_names.append('')
    except (TypeError, KeyError) as e:
        # Add a space for any source that doesn't
        # have any SIMBAD matches
        print('No SIMBAD matches within 2 asec: ', start, end)
        for c, coo in enumerate(coords):
            no_name_names.append('')
    # Take a break, because otherwise astroquery
    # and SIMBAD will have a different hissy fit
    time.sleep(10)

# Add the SIMBAD names you just found
# to the source without names
no_names['simbad_names'] = no_name_names

In [None]:
# Combine the tables again
combined_df_names = pd.concat([names, no_names], join='inner', ignore_index=True)
# Split them into sources with and without names again
new_no_names = combined_df_names[combined_df_names['simbad_names'] == '']
new_names = combined_df_names[combined_df_names['simbad_names'] != '']
# Remove sources that have the same name
new_names = new_names.drop_duplicates(subset='simbad_names')
# Combine them again, and ta-da! You have your dataframe!
final_df = pd.concat([new_names, new_no_names], join='inner', ignore_index=True)

In [None]:
# Save the dataframe, because you'll be
# mad if you don't and you have to run all
# this again
final_df.to_csv('{}FlareTypeStars_Pandas_SimbadNames.csv'.format(path))

## Step 2: combine MeerKAT LSP pointing information

Here we read in and reorganise the information about the MeerKAT LSP pointings. We include the RA and DEC of the phase centre of each pointing, the diameter of the primary beam, the source at the phase centre (if there is one), and the name of the LSP.

Start by reading in the information for the LSPs: MALS, ThunderKAT, Fornax, LADUMA, MHONGOOSE and MIGHTEE.

In [None]:
MALS_raw = np.genfromtxt('{}MALS-Lband-sample.txt'.format(path))

# Get the RA and DEC from the array
ras = MALS_raw[:, :3]
decs = MALS_raw[:, 3:6]
# Convert the RA and DEC into the
# required format for astropy SkyCoord
mals_ra = []
mals_dec = []
for r, ra in enumerate(ras):
    mals_ra.append('{0}h{1}m{2}s'.format(int(ra[0]),
                               int(ra[1]),
                               ra[2]))
    mals_dec.append('{0}d{1}m{2}s'.format(int(decs[r][0]),
                                int(decs[r][1]),
                                decs[r][2]))
radec = coord.SkyCoord(mals_ra, mals_dec, frame='icrs')
freq = MALS_raw[:, 7]*1e9
pb = get_primary_beam_size(freq, 13.5)

mals_dict = {'RA(deg)': radec.ra.deg,
             'DEC(deg)': radec.dec.deg,
             'Freq(Hz)':freq,
             'Beam_FWHM_Diameter(deg)': pb}
mals_df = pd.DataFrame(data=mals_dict)
mals_df['LSP'] = 'MALS'

In [None]:
laduma_df = pd.read_csv('{}LADUMA_coordinates.csv'.format(path))
laduma_df['Freq(Hz)'] = 1.4e9
laduma_df['LSP'] = 'LADUMA'
laduma_df['Beam_FWHM_Diameter(deg)'] = get_primary_beam_size(1.4e9,
                                                             13.5)

In [None]:
tkt_df = pd.read_csv('{}ThunderKAT_coordinates.csv'.format(path))
tkt_df['Freq(Hz)'] = 1.4e9
tkt_df['LSP'] = 'ThunderKAT'
tkt_df['Beam_FWHM_Diameter(deg)'] = get_primary_beam_size(1.4e9,
                                                          13.5)

In [None]:
mhon_raw = np.genfromtxt('{}mhongoose_candidates.txt'.format(path),
                         dtype=str)
mhon_coords = coord.SkyCoord(mhon_raw[:, 3], mhon_raw[:, 4], frame='icrs')

mhon_dict = {'RA(deg)': mhon_coords.ra.deg,
             'DEC(deg)': mhon_coords.dec.deg}
mhon_df = pd.DataFrame(data=mhon_dict)
mhon_df['Freq(Hz)'] = 1.4e9
mhon_df['LSP'] = 'MHONGOOSE'
mhon_df['Beam_FWHM_Diameter(deg)'] = get_primary_beam_size(1.4e9,
                                                           13.5)

In [None]:
fornax_raw = np.genfromtxt(('{}Fornax_mfs_pointingcentres_'
                            '2018_radec.txt').format(path))
fornax_coords = coord.SkyCoord(fornax_raw,
                               unit=(un.deg, un.deg))
fornax_dict = {'RA(deg)': fornax_coords.ra.deg,
               'DEC(deg)': fornax_coords.dec.deg}
fornax_df = pd.DataFrame(data=fornax_dict)
fornax_df['Freq(Hz)'] = 1.4e9
fornax_df['LSP'] = 'Fornax'
fornax_df['Beam_FWHM_Diameter(deg)'] = get_primary_beam_size(1.4e9,
                                                             13.5)

In [None]:
mightee_raw = np.genfromtxt('{}MIGHTEE_pointings.tsv'.format(path),
                            dtype=str)
mightee_info = []
for row in mightee_raw:
    if row[0] == 'COSMOS':
        mightee_info.append([float(row[1]), float(row[2]), 1.4e9])
    elif 'CDFS' in row[0]:
        mightee_info.append([float(row[1]), float(row[2]), 1.4e9])
        mightee_info.append([float(row[1]), float(row[2]), 0.85e9])
        mightee_info.append([float(row[1]), float(row[2]), 2.125e9])
    else:
        mightee_info.append([float(row[1]), float(row[2]), 1.4e9])
mightee_info = np.array(mightee_info)

mightee_coords = coord.SkyCoord(mightee_info[:, 0].astype(float)*un.deg,
                                mightee_info[:, 1].astype(float)*un.deg)

freq = mightee_info[:, 2]
pb = get_primary_beam_size(freq,
                           13.5)

mightee_dict = {'RA(deg)': mightee_coords.ra.deg,
                'DEC(deg)': mightee_coords.dec.deg,
                'Freq(Hz)':freq,
                'Beam_FWHM_Diameter(deg)': pb}
mightee_df = pd.DataFrame(data=mightee_dict)
mightee_df['LSP'] = 'MIGHTEE'

In [None]:
# Read in all of the known pulsars as
# a proxy for MeerTIME pointing positions
meerTime_raw = np.genfromtxt('{}formatted_ATNF_psrs.csv'.format(path),
                             delimiter=',', dtype=str)

meertime_coords = coord.SkyCoord(meerTime_raw[:, 0].astype(float)*un.deg,
                                 meerTime_raw[:, 1].astype(float)*un.deg)

meertime_dict = {'RA(deg)': meertime_coords.ra.deg,
                 'DEC(deg)': meertime_coords.dec.deg}
meertime_df = pd.DataFrame(data=meertime_dict)
meertime_df['Freq(Hz)'] = 1.4e9
meertime_df['LSP'] = 'pulsars'
meertime_df['Beam_FWHM_Diameter(deg)'] = get_primary_beam_size(1.4e9,
                                                               13.5)

### Combine the data frames for all of the LSPs

In [None]:
dataframes = [mals_df, laduma_df, tkt_df, mhon_df, fornax_df, meertime_df, mightee_df]

combined_df = pd.concat(dataframes, join='inner', ignore_index=True)

Get the name of the source at the phase centre of each pointing (if there is one). This is just some nice extra information to have.

In [None]:
all_coords = coord.SkyCoord(combined_df['RA(deg)']*un.deg,
                            combined_df['DEC(deg)']*un.deg)
centre_source = []

# Divide the sources up into chunks,
# otherwise astroquery will chuck
# a hissy fit
for s, starts in enumerate(np.arange(0, 4)):
    start = starts * 1000
    end = start + 1000
    coords = all_coords[start:end]

    # Use the coordinates to search for nearby
    # sources for each source
    result_table = Simbad.query_region(coords, radius=2.*un.arcsec)
    # Match the results to the sources
    try:
        result_coords = coord.SkyCoord(list(result_table['RA']),
                                       list(result_table['DEC']),
                                       unit=(un.hourangle, un.deg))

        for c, coo in enumerate(coords):
            seps = coo.separation(result_coords)

            if np.nanmin(seps.deg) < 2./60./60.:
                centre_source.append(result_table[np.nanargmin(seps.deg)]['MAIN_ID'].decode('UTF-8'))
            else:
                centre_source.append('')
    except (TypeError, KeyError) as e:
        # Add a space for any source that doesn't
        # have any SIMBAD matches
        print('No SIMBAD matches within 2 asec: ', start, end)
        for c, coo in enumerate(coords):
            centre_source.append('')
combined_df['PhaseCentreSource'] = centre_source

Save the data frame

In [None]:
combined_df.to_csv('{}MeerKAT_LSP_AllPointings.csv'.format(path),
                   index=False)

## Step 3: find flare type objects that fall into the LSP fields of view

Check whether any flare type objects are in the field of view for MeerKAT LSPs.

In the final data frame, there is a row for every flare star that is within the primary beam of an LSP. That means that some flare stars are counted twice, if an LSP has overlapping beams and the flare star falls in both. You can easily remove stars that are counted twice by using drop_duplicates, I'll demonstrate this near the bottom of the notebook.

In [None]:
# Read in the flare objects file
flarestars = pd.read_csv(('{}FlareTypeStars_'
                          'Pandas_'
                          'SimbadNames.csv').format(path)).drop(['Unnamed: 0'],
                                                                axis=1)
# Get the coordinates of each source
fstar_coords = SkyCoord(flarestars['ra(deg)'].astype(float)*un.degree,
                        flarestars['dec(deg)'].astype(float)*un.degree,
                        pm_ra_cosdec=(np.array(flarestars['pmra(arcsec/yr)']).astype(float)*un.mas/un.yr),
                        pm_dec=(np.array(flarestars['pmdec(arcsec/yr)']).astype(float)*un.mas/un.yr),
                        obstime=Time('2019-01-01T00:00:00.00'))

# Read in the LSP catalogue
lsps = pd.read_csv('MeerKAT_LSP_AllPointings.csv')
# Remove MeerTIME, since these coordinates
# are educated guesses, and the integration
# time per pointing is very short
lsps_nomeertime = lsps[lsps['LSP'] != 'pulsars']
# Get the LSP coordinates
lsp_coords = SkyCoord(lsps_nomeertime['RA(deg)'].astype(float)*un.degree,
                      lsps_nomeertime['DEC(deg)'].astype(float)*un.degree,
                      obstime=Time('2019-01-01T00:00:00.00'))

In [None]:
# Set up the columns for your final
# data frame by combining the flare star
# (FSTAR) and LSP (LSP) file
# columns
cols = []
for col in flarestars.columns:
    cols.append('FSTAR '+col)
for col in lsps.columns:
    if '#' in col:
        cols.append('LSP index')
    elif 'LSP' in col:
        cols.append('LSP')
    else:
        cols.append('LSP '+col.strip())
cols.append('min_sep(deg)')

matched_sources = []
# For each LSP pointing, check which
# stars are within the primary beam
# at the centre of the observing band
for index, row in lsps_nomeertime.iterrows():
    lsp_coord = SkyCoord(row['RA(deg)']*un.degree,
                         row['DEC(deg)']*un.degree,
                         obstime=Time('2019-01-01T00:00:00.00'))
    beam_size = float(row['Beam_FWHM_Diameter(deg)'])
    seps = lsp_coord.separation(fstar_coords)
    in_beam = np.where(seps.deg<0.5*beam_size)[0]

    if len(in_beam)> 0:
        for b in in_beam:
            source_info = []
            for col in flarestars.columns:
                fstar_b = flarestars.iloc[b]
                source_info.append(str(fstar_b[col]))
            for col in lsps.columns:
                source_info.append(str(row[col]))
            source_info.append(str(seps.deg[b]))
            matched_sources.append(source_info)
matched_sources = np.array(matched_sources)

# Take your matched sources and put
# them in a data frame
new_table = dict()
for c, col in enumerate(cols):
    new_table[col] = matched_sources[:, c]
fstar_lsps = pd.DataFrame(data=new_table)

# Save that data frame
fstar_lsps.to_csv('{}FStar_LSP_Matches.csv'.format(path), index=False)

fstar_lsps is the final data frame with all of the matches. But maybe you want just the unique stars that are in at least one LSP. Then you can use drop_duplicates to get rid of copies of the same star.

In [None]:
unique_fstars = fstar_lsps.drop_duplicates(subset=['FSTAR ra(deg)', 'FSTAR dec(deg)'])

Now you can, for example, have a look at just the SIMBAD RS CVns.

In [None]:
unique_fstars[unique_fstars['FSTAR Catalogue']=='SimbadRSCVns']

Or you could have a look at all the matches for a single LSP, such as THUNDERKAT

In [None]:
unique_fstars[unique_fstars['LSP']=='ThunderKAT']