In [52]:
!pip install ipywidgets
!pip install pandas
!pip install astropy
!pip install numpy
!pip install jupyter_ui_poll

In [53]:
## SETTINGS

search_radius = 1/3600.0 #in degrees

In [54]:
import math
import numpy as np
import pandas as pd
import astropy


import astroquery
from astroquery.ipac.ned import Ned
import pandas as pd

def query_ned_by_name(name):
    result_table = None
    success = False
    msg = "Match Found!"
    try:
        result_table = Ned.query_object(name) #astropy.table.Table
        success = True
    except astroquery.exceptions.RemoteServiceError as error:
        msg = error
    except Exception as error:
        msg = error
    return result_table, success, msg


def query_ned_by_coords(pass_ra, pass_dec, pass_radius): # uses deg
    result_table = None
    success = False
    msg = "Match Found!"
    try:

        position = SkyCoord(ra=pass_ra, dec=pass_dec, unit=(u.deg, u.deg), frame='icrs')
        result_table = Ned.query_region(position, radius=pass_radius * u.deg)
        success = True
    except astroquery.exceptions.RemoteServiceError as error:
        msg = error
    except Exception as error:
        msg = error
    return result_table, success, msg



def is_within_rad(ra_1, ra_2, dec_1, dec_2):
    return (search_rad / 3600.0) > (math.sqrt((math.pow((((ra_1 * (math.pi / 180.0)) - (ra_2 * (math.pi / 180.0))) * (math.cos((dec_1 * (math.pi / 180.0))))), 2)) + math.pow(((dec_1 * (math.pi / 180.0)) - (dec_2 * (math.pi / 180.0))), 2))) * (180.0 / math.pi)



# load csv files
all_surveyed_path = './data/ConstantinResearchGroup.mcpikeej.allsurveyed_noduplicates_rudmingb.csv'
masers_path = './data/MCPMaser2020_rudmingb.csv'

all_surveyed = pd.read_csv(all_surveyed_path)
masers = pd.read_csv(masers_path)





## All Surveyed Data Sample: 

In [55]:
print(all_surveyed)

      count2019_01 source_name_01_y       ra_01     dec_01
0                1   RXSJ00001+0523    0.049083   5.388170
1                2      KUG2358+330    0.242083  33.343900
2                3  0001233+4733537    0.347083  47.564900
3                5         NGC-7806    0.375250  31.441900
4                6  0001383+2329011    0.409667  23.483600
...            ...              ...         ...        ...
4213          6348    MCG-03.01.002  359.199000 -16.509600
4214          6350        UGC-12864  359.350000  30.992200
4215          6351    235744+003919  359.432000   0.655194
4216          6352         UGC12879  359.755000  18.834200
4217          6353           IC1524  359.795000  -4.126920

[4218 rows x 4 columns]


## Masers Data Sample: 

In [56]:
print(masers)

     count2019    Source_Name  RA_(J2000)  Dec_(J2000) Vsys_(km/s)   Lum Class
0            1         NGC_23    2.473333    25.923060        4565   180     ?
1            2         NGC_17    2.777083   -12.107220        5931   <10     ?
2            3     J0011-0054    2.938333    -0.908611       14384   527     ?
3            4     J0027+4544    6.855417    45.740830       12003   507     ?
4            5           IC10    5.074584    59.308610        -350  0.02    SF
..         ...            ...         ...          ...         ...   ...   ...
175        176       NGC_7479  346.236100    12.322890        2381    19     ?
176        177        IC_1481  349.854600     5.906111        6120   320     ?
177        178  MCG+05-55-041  354.318500    31.800720        9356    93     ?
178        179       NGC_7738  356.008300     0.516667        6762   468     ?
179        180    CGCG498-038  358.934200    30.212220        9240   268     ?

[180 rows x 7 columns]


## Column Identifiers
Use the data sample to record the relevant column headers 

In [57]:
## create a key-lookup table
# Column identifiers for all_surveyed
all_surveyed_cols = dict(
ra = 'ra_01',
dec = 'dec_01',
uid = 'count2019_01',
name = 'source_name_01_y')

# Column identifiers for masers
masers_cols = dict(
ra = 'RA_(J2000)',
dec = 'Dec_(J2000)',
uid = 'count2019_01',
name = 'source_name_01_y')

# print(masers_cols["ra"])

## Cleansing the data

First, we want to make sure that the data is clean. We will use NED to make sure that each object is in fact unique in its table, as well as we can.
If the data has already been cleansed, or if you want to skip cleansing the data, set cleansed_path to the relative path of the data you want to use and set Cleanse_data to False

In [58]:
Cleanse_data = True # if true, saved cleansed data will be overwitten. If False, saved data will be loaded and the cleansing process skipped.
                    # data is only overrwitten at the end of the cleansing processes, so feel free to abort before then.
cleansed_path = './cleansed_data/ConstantinResearchGroup.mcpikeej.allsurveyed_noduplicates_rudmingb.csv'


In [72]:
from astropy.coordinates import SkyCoord  # High-level coordinates
from astropy.coordinates import ICRS, Galactic, FK4, FK5  # Low-level frames
from astroquery.ipac.ned import Ned
from astropy.table import Table
from astropy import units as u
from jupyter_ui_poll import ui_events
import sys

import ipywidgets as widgets
from ipywidgets import Button
from IPython.display import display
from astropy.table import Table

def cleanse_data(data,keydict):

    
    ra_key = keydict['ra']
    dec_key = keydict['dec']
    uid_key = keydict['uid']
    name_key = keydict['name']

    # first, get objects to compare; find sets of close objects

    ra = data[ra_key].values
    dec = data[dec_key].values

    coords = SkyCoord(ra*u.deg, dec*u.deg)

    # Set the search radius
    seplimit = search_radius * u.deg   

    # Perform the search within the same dataset
    idx1, idx2, sep2d, dist3d = astropy.coordinates.search_around_sky(coords, coords, seplimit)

    sep2d = np.round(sep2d.value,decimals=13) * u.deg #the distance calculations gives different digits at around 15 decimals. We will use separation to eliminate double entries, so some rounding is needed
    # Filter out self-matching results
    mask = sep2d != 0.0 * u.deg
    idx1 = idx1[mask]
    idx2 = idx2[mask]
    sep2d = sep2d[mask]

    dupidx = []
    dupidx2 = []
    dup_sep2d = []

    # filter out one result of each double (which has the exact same separation)
    for i in range(len(idx1)):
        if not(sep2d[i] in dup_sep2d):
            dupidx2.insert(len(dupidx2)+1,idx2[i])
            dupidx.insert(len(dupidx)+1,idx1[i])
            dup_sep2d.insert(len(dup_sep2d)+1,sep2d[i])

    print(dupidx)

    # for i in range(len(dupidx)):
    #     print(f"Duplicate found: idx1={dupidx[i]}, idx2={dupidx2[i]}, sep2d={dup_sep2d[i]}")
    print(str(len(dupidx)+1) + ' "duplicates" found.\nBeginning Analysis ...')

    sep2d = dup_sep2d
    ## Now I have a list of duplicates ... idx1 is the first match of the duplicate, idx2 is the second match index of the duplicate, sep2d is the angular separation of the duplicates.
    ## Next, I need to determine whether each duplicate is in fact a duplicate or not using NED

    is_dup = [] ## this is a flag determining whether I think the match is a dup based on NED data
    for i in range(len(dupidx)): 
        is_dup.insert(i,False) # init with false, we will start with the assumption that a potential duplicate is not a duplicate. If Ned confirms it is, then let it be a duplicate.
        
        ## step 1: search NED for the two object names. If there is a definite match, then the object is a duplicate. If there is not an obvious match then we will require manual intervention.
        idxindata1 = dupidx[i]
        idxindata2 = dupidx2[i]


        obj1_data = data.iloc[idxindata1]
        obj2_data = data.iloc[idxindata2]

        obj1_name = obj1_data[name_key]
        obj2_name = obj2_data[name_key]

        print('###########################\n\n')
        print(f'Comparing record [{idxindata1}] ({obj1_name}) against record [{idxindata2}] ({obj2_name})...')
        print("Performing NED Name Search...")

        result_table1, success, msg = query_ned_by_name(obj1_name)
        result_table2, success, msg = query_ned_by_name(obj2_name)
        finished = False
        if type(result_table1)==Table and type(result_table2)==Table:
            if len(result_table1) > 0 and len(result_table2) > 0:
                if result_table1['Object Name'][0] == result_table2['Object Name'][0]:
                    print('Name Match Successful, this is a duplicate!\n\n')
                    is_dup[i] = True
                    finished = True
        if not(finished):
            print('Name Match Unsuccessful; Human Intervention Required.\n')

            result_table_rad_obj1, success, msg = query_ned_by_coords(obj1_data[ra_key],obj1_data[dec_key], search_radius)
            result_table_rad_obj2, success, msg = query_ned_by_coords(obj2_data[ra_key],obj2_data[dec_key], search_radius)

            
            #########   My UI    ###########
            # print("        #############################################################")
            # print("        #")
            # print("        #    Object 1:")
            # print(f"        #        Name: [{obj1_data[uid_key]}] {obj1_name}")
            # print(f"        #        RA  : {obj1_data[ra_key]}")
            # print(f"        #        DEC : {obj1_data[dec_key]}")
            # print("        #")
            # print("        #############################################################")
            # print("        #")
            # print("        #    Object 2:")
            # print(f"        #        Name: [{obj2_data[uid_key]}] {obj2_name}")
            # print(f"        #        RA  : {obj2_data[ra_key]}")
            # print(f"        #        DEC : {obj2_data[dec_key]}")
            # print("        #")
            # print("        #############################################################")
            # print("        #")
            # print("        #   The Following are results from a NED search around object 1's position:")
            # result_table_rad.pprint(max_lines=-1, max_width=-1)
            # print("        #")
            # print("        #")
            # print("        #    Are these two objects duplicates of each other?")
            # userinput = input("        #   Enter yes or no or quit: ")
            # print(f"        #        USER ENTERED : {userinput}")
            # if userinput == 'yes':
            #     is_dup[i] = True
            # if userinput == 'quit':
            #     sys.exit("Program terminated by user")
            #########   End My UI    ###########

            ########  AI Generated User Interface   #########

            user_input = ''

            # Create GUI widgets
            total_obj = str(len(dupidx)+1)
            this_obj = str(i+1)
            title_widget = widgets.HTML(value=f"<h1>Object Comparison {this_obj} / {total_obj} </h1>")
            obj1_widget = widgets.HTML()
            obj1_rad_search = widgets.HTML()
            obj2_widget = widgets.HTML()
            obj2_rad_search = widgets.HTML()
            question_widget = widgets.HTML(value="<h3>Are these two objects duplicates of each other?</h3>")
            button_yes = widgets.Button(description='Yes')
            button_no = widgets.Button(description='No')
            button_quit = widgets.Button(description='Quit')
            waiting_widget = widgets.HTML(value="<h3>\</h3>")


            # Create layout for the widgets
            container = widgets.VBox([
                title_widget,
                obj1_widget,
                obj1_rad_search,
                obj2_widget,
                obj2_rad_search,
                question_widget,
                widgets.HBox([button_yes, button_no, button_quit]),
                waiting_widget
            ])

            # Display the GUI
            display(container)

            # Event handlers for button clicks
            def on_yes_clicked(b):
                nonlocal user_input
                user_input = 'yes'
                b.desciption = 'yes 👍'

            def on_no_clicked(b):
                nonlocal user_input
                user_input = 'no'
                b.desciption = 'no 👍'

            def on_quit_clicked(b):
                global user_input
                user_input = 'quit'

            # Assign event handlers to buttons
            button_yes.on_click(on_yes_clicked)
            button_no.on_click(on_no_clicked)
            button_quit.on_click(on_quit_clicked)

            # Update the widgets with the information
            obj1_widget.value = f"<h3>Object 1 (from our Data table):<br>Name: [{obj1_data[uid_key]}] {obj1_name}<br>RA: {obj1_data[ra_key]}<br>DEC: {obj1_data[dec_key]}</p>"
            obj1_rad_search.value = f"<p>The Following are results from a NED search around object 1's position with radius of the search_radius:</p>{result_table_rad_obj1._repr_html_()}"

            obj2_widget.value = f"<h3>Object 2:<br>Name: [{obj2_data[uid_key]}] {obj2_name}<br>RA: {obj2_data[ra_key]}<br>DEC: {obj2_data[dec_key]}</p>"
            obj2_rad_search.value = f"<p>The Following are results from a NED search around object 2's position with radius of the search_radius:</p>{result_table_rad_obj2._repr_html_()}"

            waiting_icons = ['-','\\','|','/']
            j = 0
            # # Disable execution until a button is clicked
            with ui_events() as poll:
                while user_input == '':
                    poll(10)          # React to UI events (upto 10 at a time)
                    if user_input == '':
                        waiting_widget.value = "<h3>"+ waiting_icons[j]+"</h3>"
                        j+=1
                        if j==4:
                            j=0
                        time.sleep(0.1)


            if user_input == 'yes':
                is_dup[i] = True
            if user_input == 'quit':
                sys.exit("Program terminated by user")
            button_yes.disabled = True
            button_no.disabled = True
            button_quit.disabled = True
            waiting_widget.value = 'User entered:'+ user_input



            ########  End AI Generated User Interface   #########

    
    # Still need to save and return the cleaned data. Also, if there is exactly 1 object returned by the NED search, is it safe to assume it is a duplicate?

    # create a new cleansed dataset

    new_rows = []

    for idxofdata, row in data.iterrows():
        insert_data = False
        if not(idxofdata in dupidx) and not(idxofdata in dupidx2):
            insert_data = True
        else:
            if idxofdata in dupidx:
                idxofduprecord = dupidx.index(idxofdata)
            if idxofdata in dupidx2:
                idxofduprecord = dupidx2.index(idxofdata)
            dup_record_is_dup = is_dup[idxofduprecord]
            if dup_record_is_dup and idxofdata in dupidx:
                insert_data = True
            if not(dup_record_is_dup):
                insert_data = True
        if insert_data:
            new_rows.append(row)
    
    cleansed_data = pd.DataFrame(new_rows)

    return cleansed_data



 

        
if Cleanse_data:
    cleansed_data = cleanse_data(all_surveyed,all_surveyed_cols)
    cleansed_data.to_csv(cleansed_path, index=False)
    print('\n\nSaved cleansed data to '+cleansed_path)
    print('Be sure to change Cleanse_Data to False next time!')
else:
    cleansed_data = pd.read_csv(cleansed_path)
    print('Cleansing Skipped, and Cleansed data loaded from '+cleansed_path)

    

# else


[5, 33, 37, 51, 92, 106, 158, 200, 204, 216, 220, 258, 276, 299, 374, 432, 450, 452, 544, 601, 659, 673, 686, 748, 792, 824, 932, 1301, 2337, 3194, 3231]
32 "duplicates" found.
Beginning Analysis ...
###########################


Comparing record [5] (0001419+2329452) against record [6] (UGC12915)...
Performing NED Name Search...
Name Match Unsuccessful; Human Intervention Required.



VBox(children=(HTML(value='<h1>Object Comparison 1 / 32 </h1>'), HTML(value=''), HTML(value=''), HTML(value=''…

###########################


Comparing record [33] (NGC-21) against record [34] (NGC-29)...
Performing NED Name Search...
Name Match Successful, this is a duplicate!


###########################


Comparing record [37] (J0011-0054) against record [38] (001145-005430)...
Performing NED Name Search...
Name Match Unsuccessful; Human Intervention Required.



VBox(children=(HTML(value='<h1>Object Comparison 3 / 32 </h1>'), HTML(value=''), HTML(value=''), HTML(value=''…

###########################


Comparing record [51] (IRAS00160-0719) against record [52] (2MASXJ00183589-)...
Performing NED Name Search...
Name Match Unsuccessful; Human Intervention Required.



VBox(children=(HTML(value='<h1>Object Comparison 4 / 32 </h1>'), HTML(value=''), HTML(value=''), HTML(value=''…

###########################


Comparing record [92] (003443-000226) against record [93] (FGC0061)...
Performing NED Name Search...
Name Match Unsuccessful; Human Intervention Required.



VBox(children=(HTML(value='<h1>Object Comparison 5 / 32 </h1>'), HTML(value=''), HTML(value=''), HTML(value=''…

###########################


Comparing record [106] (Mrk955) against record [107] (003736+001650)...
Performing NED Name Search...
Name Match Unsuccessful; Human Intervention Required.



VBox(children=(HTML(value='<h1>Object Comparison 6 / 32 </h1>'), HTML(value=''), HTML(value=''), HTML(value=''…

###########################


Comparing record [158] (005329-084604) against record [159] (NGC291)...
Performing NED Name Search...
Name Match Unsuccessful; Human Intervention Required.



VBox(children=(HTML(value='<h1>Object Comparison 7 / 32 </h1>'), HTML(value=''), HTML(value=''), HTML(value=''…

###########################


Comparing record [200] (HS0106+1304) against record [201] (3C033)...
Performing NED Name Search...
Name Match Successful, this is a duplicate!


###########################


Comparing record [204] (J0109-0332) against record [205] (2MASXJ01094510-)...
Performing NED Name Search...
Name Match Unsuccessful; Human Intervention Required.



VBox(children=(HTML(value='<h1>Object Comparison 9 / 32 </h1>'), HTML(value=''), HTML(value=''), HTML(value=''…

###########################


Comparing record [216] (MARK975) against record [217] (UGC774)...
Performing NED Name Search...
Name Match Successful, this is a duplicate!


###########################


Comparing record [220] (011429+001254) against record [221] (2MASXJ01142985+)...
Performing NED Name Search...
Name Match Unsuccessful; Human Intervention Required.



VBox(children=(HTML(value='<h1>Object Comparison 11 / 32 </h1>'), HTML(value=''), HTML(value=''), HTML(value='…

###########################


Comparing record [258] (NGC526a) against record [259] (NGC0526A)...
Performing NED Name Search...
Name Match Successful, this is a duplicate!


###########################


Comparing record [276] (012601-041756) against record [277] (J0126-0417)...
Performing NED Name Search...
Name Match Unsuccessful; Human Intervention Required.



VBox(children=(HTML(value='<h1>Object Comparison 13 / 32 </h1>'), HTML(value=''), HTML(value=''), HTML(value='…

###########################


Comparing record [299] (013402-010432) against record [300] (UGC-1120)...
Performing NED Name Search...
Name Match Unsuccessful; Human Intervention Required.



VBox(children=(HTML(value='<h1>Object Comparison 14 / 32 </h1>'), HTML(value=''), HTML(value=''), HTML(value='…

###########################


Comparing record [374] (NGC0708) against record [375] (NGC708)...
Performing NED Name Search...
Name Match Successful, this is a duplicate!


###########################


Comparing record [432] (NGC0833) against record [433] (020920-100759)...
Performing NED Name Search...
Name Match Unsuccessful; Human Intervention Required.



VBox(children=(HTML(value='<h1>Object Comparison 16 / 32 </h1>'), HTML(value=''), HTML(value=''), HTML(value='…

###########################


Comparing record [450] (J0214-0016) against record [451] (021405-001637)...
Performing NED Name Search...
Name Match Unsuccessful; Human Intervention Required.



VBox(children=(HTML(value='<h1>Object Comparison 17 / 32 </h1>'), HTML(value=''), HTML(value=''), HTML(value='…

###########################


Comparing record [452] (Mrk590) against record [453] (021433-004600)...
Performing NED Name Search...
Name Match Unsuccessful; Human Intervention Required.



VBox(children=(HTML(value='<h1>Object Comparison 18 / 32 </h1>'), HTML(value=''), HTML(value=''), HTML(value='…

###########################


Comparing record [544] (024240-000048) against record [546] (NGC1068test)...
Performing NED Name Search...
Name Match Unsuccessful; Human Intervention Required.



VBox(children=(HTML(value='<h1>Object Comparison 19 / 32 </h1>'), HTML(value=''), HTML(value=''), HTML(value='…

###########################


Comparing record [601] (NGC1144) against record [602] (NGC1142)...
Performing NED Name Search...
Name Match Successful, this is a duplicate!


###########################


Comparing record [659] (031702+011518) against record [660] (UGC02638)...
Performing NED Name Search...
Name Match Unsuccessful; Human Intervention Required.



VBox(children=(HTML(value='<h1>Object Comparison 21 / 32 </h1>'), HTML(value=''), HTML(value=''), HTML(value='…

###########################


Comparing record [673] (3C84) against record [674] (NGC1275)...
Performing NED Name Search...
Name Match Successful, this is a duplicate!


###########################


Comparing record [686] (032258+000315) against record [687] (032258.0+000315)...
Performing NED Name Search...
Name Match Unsuccessful; Human Intervention Required.



VBox(children=(HTML(value='<h1>Object Comparison 23 / 32 </h1>'), HTML(value=''), HTML(value=''), HTML(value='…

###########################


Comparing record [748] (034545-071526) against record [749] (034545-071527)...
Performing NED Name Search...
Name Match Unsuccessful; Human Intervention Required.



VBox(children=(HTML(value='<h1>Object Comparison 24 / 32 </h1>'), HTML(value=''), HTML(value=''), HTML(value='…

###########################


Comparing record [792] (0414502+0146013) against record [793] (0414502)...
Performing NED Name Search...
Name Match Unsuccessful; Human Intervention Required.



VBox(children=(HTML(value='<h1>Object Comparison 25 / 32 </h1>'), HTML(value=''), HTML(value=''), HTML(value='…

###########################


Comparing record [824] (J0437+2456) against record [825] (04370366)...
Performing NED Name Search...
Name Match Unsuccessful; Human Intervention Required.



VBox(children=(HTML(value='<h1>Object Comparison 26 / 32 </h1>'), HTML(value=''), HTML(value=''), HTML(value='…

###########################


Comparing record [932] (EXO055620-3820.) against record [933] (J0558-3820)...
Performing NED Name Search...
Name Match Unsuccessful; Human Intervention Required.



VBox(children=(HTML(value='<h1>Object Comparison 27 / 32 </h1>'), HTML(value=''), HTML(value=''), HTML(value='…

###########################


Comparing record [1301] (J0847-0022) against record [1302] (2MASXJ08474769-)...
Performing NED Name Search...
Name Match Unsuccessful; Human Intervention Required.



VBox(children=(HTML(value='<h1>Object Comparison 28 / 32 </h1>'), HTML(value=''), HTML(value=''), HTML(value='…

###########################


Comparing record [2337] (120057+064823) against record [2338] (CGCG041-020)...
Performing NED Name Search...
Name Match Unsuccessful; Human Intervention Required.



VBox(children=(HTML(value='<h1>Object Comparison 29 / 32 </h1>'), HTML(value=''), HTML(value=''), HTML(value='…

###########################


Comparing record [3194] (NGC5765b) against record [3195] (NGC5765b-off1)...
Performing NED Name Search...
Name Match Unsuccessful; Human Intervention Required.



VBox(children=(HTML(value='<h1>Object Comparison 30 / 32 </h1>'), HTML(value=''), HTML(value=''), HTML(value='…

###########################


Comparing record [3231] (150000+015328) against record [3232] (NGC5806)...
Performing NED Name Search...
Name Match Unsuccessful; Human Intervention Required.



VBox(children=(HTML(value='<h1>Object Comparison 31 / 32 </h1>'), HTML(value=''), HTML(value=''), HTML(value='…

##  