In [139]:
## SETTINGS

search_radius = 1/3600.0 #in degrees

In [140]:
import math
import numpy as np
import pandas as pd
import astropy


import astroquery
from astroquery.ipac.ned import Ned
import pandas as pd

def query_ned_by_name(name):
    result_table = None
    success = False
    msg = "Match Found!"
    try:
        result_table = Ned.query_object(name) #astropy.table.Table
        success = True
    except astroquery.exceptions.RemoteServiceError as error:
        msg = error
    except Exception as error:
        msg = error
    return result_table, success, msg


def query_ned_by_coords(pass_ra, pass_dec, pass_radius): # uses deg
    result_table = None
    success = False
    msg = "Match Found!"
    try:

        position = SkyCoord(ra=pass_ra, dec=pass_dec, unit=(u.deg, u.deg), frame='icrs')
        result_table = Ned.query_region(position, radius=pass_radius * u.deg)
        success = True
    except astroquery.exceptions.RemoteServiceError as error:
        msg = error
    except Exception as error:
        msg = error
    return result_table, success, msg



def is_within_rad(ra_1, ra_2, dec_1, dec_2):
    return (search_rad / 3600.0) > (math.sqrt((math.pow((((ra_1 * (math.pi / 180.0)) - (ra_2 * (math.pi / 180.0))) * (math.cos((dec_1 * (math.pi / 180.0))))), 2)) + math.pow(((dec_1 * (math.pi / 180.0)) - (dec_2 * (math.pi / 180.0))), 2))) * (180.0 / math.pi)



# load csv files
all_surveyed_path = './data/ConstantinResearchGroup.mcpikeej.allsurveyed_noduplicates_rudmingb.csv'
masers_path = './data/MCPMaser2020_rudmingb.csv'

all_surveyed = pd.read_csv(all_surveyed_path)
masers = pd.read_csv(masers_path)





## All Surveyed Data Sample: 

In [141]:
print(all_surveyed)

      count2019_01 source_name_01_y       ra_01     dec_01
0                1   RXSJ00001+0523    0.049083   5.388170
1                2      KUG2358+330    0.242083  33.343900
2                3  0001233+4733537    0.347083  47.564900
3                5         NGC-7806    0.375250  31.441900
4                6  0001383+2329011    0.409667  23.483600
...            ...              ...         ...        ...
4213          6348    MCG-03.01.002  359.199000 -16.509600
4214          6350        UGC-12864  359.350000  30.992200
4215          6351    235744+003919  359.432000   0.655194
4216          6352         UGC12879  359.755000  18.834200
4217          6353           IC1524  359.795000  -4.126920

[4218 rows x 4 columns]


## Masers Data Sample: 

In [142]:
print(masers)

     count2019    Source_Name  RA_(J2000)  Dec_(J2000) Vsys_(km/s)   Lum Class
0            1         NGC_23    2.473333    25.923060        4565   180     ?
1            2         NGC_17    2.777083   -12.107220        5931   <10     ?
2            3     J0011-0054    2.938333    -0.908611       14384   527     ?
3            4     J0027+4544    6.855417    45.740830       12003   507     ?
4            5           IC10    5.074584    59.308610        -350  0.02    SF
..         ...            ...         ...          ...         ...   ...   ...
175        176       NGC_7479  346.236100    12.322890        2381    19     ?
176        177        IC_1481  349.854600     5.906111        6120   320     ?
177        178  MCG+05-55-041  354.318500    31.800720        9356    93     ?
178        179       NGC_7738  356.008300     0.516667        6762   468     ?
179        180    CGCG498-038  358.934200    30.212220        9240   268     ?

[180 rows x 7 columns]


## Column Identifiers
Use the data sample to record the relevant column headers 

In [143]:
## create a key-lookup table
# Column identifiers for all_surveyed
all_surveyed_cols = dict(
ra = 'ra_01',
dec = 'dec_01',
uid = 'count2019_01',
name = 'source_name_01_y')

# Column identifiers for masers
masers_cols = dict(
ra = 'RA_(J2000)',
dec = 'Dec_(J2000)',
uid = 'count2019_01',
name = 'source_name_01_y')

# print(masers_cols["ra"])

## Cleansing the data

First, we want to make sure that the data is clean. We will use NED to make sure that each object is in fact unique in its table, as well as we can.

In [155]:
from astropy.coordinates import SkyCoord  # High-level coordinates
from astropy.coordinates import ICRS, Galactic, FK4, FK5  # Low-level frames
from astroquery.ipac.ned import Ned
from astropy.table import Table

def cleanse_data(data,keydict):

    
    cleansed_dataframe = data.copy()

    ra_key = keydict['ra']
    dec_key = keydict['dec']
    uid_key = keydict['uid']
    name_key = keydict['name']

    # first, get objects to compare; find sets of close objects

    ra = data[ra_key].values
    dec = data[dec_key].values

    coords = SkyCoord(ra*u.deg, dec*u.deg)

    # Set the search radius
    seplimit = search_radius * u.deg   

    # Perform the search within the same dataset
    idx1, idx2, sep2d, dist3d = astropy.coordinates.search_around_sky(coords, coords, seplimit)

    sep2d = np.round(sep2d.value,decimals=13) * u.deg #the distance calculations gives different digits at around 15 decimals. We will use separation to eliminate double entries, so some rounding is needed
    # Filter out self-matching results
    mask = sep2d != 0.0 * u.deg
    idx1 = idx1[mask]
    idx2 = idx2[mask]
    sep2d = sep2d[mask]

    dupidx = []
    dupidx2 = []
    dup_sep2d = []

    # filter out one result of each double (which has the exact same separation)
    for i in range(len(idx1)):
        if not(sep2d[i] in dup_sep2d):
            dupidx2.insert(len(dupidx2)+1,idx2[i])
            dupidx.insert(len(dupidx)+1,idx1[i])
            dup_sep2d.insert(len(dup_sep2d)+1,sep2d[i])


    # for i in range(len(dupidx)):
    #     print(f"Duplicate found: idx1={dupidx[i]}, idx2={dupidx2[i]}, sep2d={dup_sep2d[i]}")
    print(str(len(dupidx)) + ' "duplicates" found.\nBeginning Analysis ...')

    sep2d = dup_sep2d
    ## Now I have a list of duplicates ... idx1 is the first match of the duplicate, idx2 is the second match index of the duplicate, sep2d is the angular separation of the duplicates.
    ## Next, I need to determine whether each duplicate is in fact a duplicate or not using NED

    is_dup = [] ## this is a flag determining whether I think the match is a dup based on NED data
    for i in range(len(dupidx)): 
        is_dup.insert(i,False) # init with false, we will start with the assumption that a potential duplicate is not a duplicate. If Ned confirms it is, then let it be a duplicate.
        
        ## step 1: search NED for the two object names. If there is a definite match, then the object is a duplicate. If there is not an obvious match then we will require manual intervention.
        idxindata1 = dupidx[i]
        idxindata2 = dupidx2[i]


        obj1_data = data.iloc[idxindata1]
        obj2_data = data.iloc[idxindata2]

        obj1_name = obj1_data[name_key]
        obj2_name = obj2_data[name_key]

        print('###########################\n\n')
        print(f'Comparing record [{idxindata1}] ({obj1_name}) against record [{idxindata2}] ({obj2_name})...')
        print("Performing NED Name Search...")

        result_table1, success, msg = query_ned_by_name(obj1_name)
        result_table2, success, msg = query_ned_by_name(obj2_name)
        finished = False
        if type(result_table1)==Table and type(result_table2)==Table:
            if len(result_table1) > 0 and len(result_table2) > 0:
                if result_table1['Object Name'][0] == result_table2['Object Name'][0]:
                    print('Name Match Successful, this is a duplicate!\n\n')
                    is_dup[i] = True
                    finished = True
        if not(finished):
            print('Name Match Unsuccessful; Human Intervention Required.\n')

            result_table_rad, success, msg = query_ned_by_coords(obj1_data[ra_key],obj1_data[dec_key], search_radius)

            print("        #############################################################")
            print("        #")
            print("        #    Object 1:")
            print(f"        #        Name: [{obj1_data[uid_key]}] {obj1_name}")
            print(f"        #        RA  : {obj1_data[ra_key]}")
            print(f"        #        DEC : {obj1_data[dec_key]}")
            print("        #")
            print("        #############################################################")
            print("        #")
            print("        #    Object 2:")
            print(f"        #        Name: [{obj2_data[uid_key]}] {obj2_name}")
            print(f"        #        RA  : {obj2_data[ra_key]}")
            print(f"        #        DEC : {obj2_data[dec_key]}")
            print("        #")
            print("        #############################################################")
            print("        #")
            print("        #   The Following are results from a NED search around object 1's position:")
            result_table_rad.pprint(max_lines=-1, max_width=-1)
            print("        #")
            print("        #")
            print("        #    Are these two objects duplicates of each other?")
            userinput = input("        #   Enter yes or no: ")
            print(f"        #        USER ENTERED : {userinput}")
            if userinput == 'yes':
                is_dup[i] = True


        # Still need to save and return the cleaned data. Also, if there is exactly 1 object returned by the NED search, is it safe to assume it is a duplicate?








        

        

    
cleanse_data(all_surveyed,all_surveyed_cols)


31 "duplicates" found.
Beginning Analysis ...
###########################


Comparing record [5] (0001419+2329452) against record [6] (UGC12915)...
Performing NED Name Search...
Name Match Unsuccessful; Human Intervention Required.

        #############################################################
        #
        #    Object 1:
        #        Name: [7] 0001419+2329452
        #        RA  : 0.424583
        #        DEC : 23.4958
        #
        #############################################################
        #
        #    Object 2:
        #        Name: [8] UGC12915
        #        RA  : 0.424667
        #        DEC : 23.4959
        #
        #############################################################
        #
        #   The Following are results from a NED search around object 1's position:
No. Object Name     RA        DEC     Type  Velocity   Redshift  Redshift Flag Magnitude and Filter Separation References Notes Photometry Points Positions Redshift Points 

##  

In [None]:
# from astropy.coordinates import SkyCoord
# from astropy import units as u

# # Define the coordinates for searching
# ra = [10.0, 20.0, 30.0, 40.0,10.0005,10.0000001]
# dec = [0.0, 0.5, 1.0, 0.5,0.0005,0.0000001]
# coords = SkyCoord(ra*u.deg, dec*u.deg)

# # Set the search radius
# seplimit = 0.5/3600.0 * u.deg

# # Perform the search within the same dataset
# idx1, idx2, sep2d, dist3d = astropy.coordinates.search_around_sky(coords, coords, seplimit)

# sep2d = np.round(sep2d.value,decimals=15) * u.deg
# # Filter out self-matching results
# mask = sep2d != 0.0 * u.deg
# idx1 = idx1[mask]
# idx2 = idx2[mask]
# sep2d = sep2d[mask]

# dupidx = []
# dupidx2 = []
# dup_sep2d = []

# dupidx = idx1
# dupidx2 = idx2
# dup_sep2d = sep2d

# # for i in range(len(idx1)):
# #     if not(sep2d[i] in dup_sep2d):
# #         dupidx2.insert(len(dupidx2)+1,idx2[i])
# #         dupidx.insert(len(dupidx)+1,idx1[i])
# #         dup_sep2d.insert(len(dup_sep2d)+1,sep2d[i])


# for i in range(len(dupidx)):
#     print(f"Duplicate found: idx1={dupidx[i]}, idx2={dupidx2[i]}, sep2d={dup_sep2d[i]}")
# print(dupidx2)

# Duplicate found: idx1=0, idx2=4, sep2d=0.0007071067811824923 deg, dist3d=0.0
# Duplicate found: idx1=0, idx2=5, sep2d=0.0014142135623368037 deg, dist3d=1.2341341494735253e-05
# Duplicate found: idx1=4, idx2=0, sep2d=0.0007071067811824923 deg, dist3d=2.4682682988508734e-05
# Duplicate found: idx1=4, idx2=5, sep2d=0.0007071067811543116 deg, dist3d=0.0
# Duplicate found: idx1=5, idx2=0, sep2d=0.001414213562336804 deg, dist3d=0.0
# Duplicate found: idx1=5, idx2=4, sep2d=0.0007071067811543117 deg, dist3d=0.0



from astroquery.ipac.ned import Ned
import astropy.units as u
from astropy.coordinates import SkyCoord
position = SkyCoord(ra=40.66963, dec=-0.01328, unit=(u.deg, u.deg), frame='icrs')
result_table = Ned.query_region(position, radius=search_radius * u.deg)
print(result_table)


No.       Object Name           RA     ... Diameter Points Associations
                             degrees   ...                             
--- ----------------------- ---------- ... --------------- ------------
  1  NGC 1068:[GPA2005] NE4   40.66958 ...               0            0
  2  NGC 1068:[GPA2005] NE3   40.66958 ...               0            0
  3 2MASS J02424070-0000480   40.66958 ...               0            0
  4  NGC 1068:[GPA2005] NE1   40.66958 ...               0            0
  5             MESSIER 077   40.66963 ...              14            0
