In [140]:
import sys
import os 
from pathlib import Path
import pandas as pd
import json

project_path = Path(os.path.dirname(os.path.realpath("__file__"))).parent

from scripts.utils import log_df
from scripts.catalog_utils import (
format_ngc_catalog, 
merge_type_constellation, 
catalog_columns,
decToHMS
)

In [267]:
orginal_openngc_path = project_path / 'raw_data' / 'OpenNGC' / 'database_files' / 'NGC.csv'
orginal_openngc_addendum_path = project_path / 'raw_data' / 'OpenNGC' / 'database_files' / 'addendum.csv'

openngc_path = project_path / 'raw_data' / 'OpenNGC.csv'

openngc_types_path = project_path / 'raw_data' / 'OpenNGC_types.csv' 
mike_path = project_path / 'raw_data' / 'mike_camilleri_list.csv' 

constellation_path = project_path/'raw_data'/'constellations.csv'
solar_system_path = project_path/'raw_data'/'moon_planets.csv'
hyg_path = project_path / 'raw_data' / 'HYG-Database' / 'hyg' / 'v3' / 'hyg_v35.csv' 

ngc_15_draft_path = project_path/'data'/'draft'/'openngc_15min_catalog_draft.csv'
ngc_draft_path = project_path/'data'/'draft'/'openngc_catalog_draft.csv'

solar_system_catalog_path = project_path/'data'/'catalogs'/'moon_planets.csv'
solar_system_catalog_json_path = project_path/'data'/'catalogs'/'moon_planets.json'

stars_catalog_path = project_path/'data'/'catalogs'/'hyg_stars.csv'
dso_catalog_path = project_path/'data'/'catalogs'/'openngc_dso.csv'
dso_catalog_json_path = project_path/'data'/'catalogs'/'dso_catalog.json'

demo_path = project_path/'data'/'draft'/'demo_catalog.json'

In [142]:
openngc_columns = [
    'Name', 'Type', 'RA', 'Dec', 'Const', 'MajAx', 'MinAx', 'V-Mag', 
    'M', 'NGC', 'IC',  'Common names'
]
mike_columns = [
    'Catalog', 
    'Name', 'Name (no leading zero)', 'Name (final)', 'Common Name', 'NGC/ID', 
    "Width (')", "Height (')", 'Mag', 'Type', 'RA', 'Dec'
]

hyg_columns = [
    'hip',  'hd', 'hr', 
    'proper', 'ra', 'dec', 'mag', 'con'
]


## Create DSO Draft Catalog

In [210]:
ngc_df = pd.read_csv(openngc_path, usecols=openngc_columns, dtype={'M': pd.Int64Dtype()})
ngc_df = ngc_df[ngc_df['Type'] != 'Dup']
ngc_df.loc[ngc_df['M'] == 65, 'Common names'] = 'Leo Triplet'

log_df(ngc_df)
# (13340, 12)

(13340, 12)


Unnamed: 0,Name,Type,RA,Dec,Const,MajAx,MinAx,V-Mag,M,NGC,IC,Common names
0,IC0001,**,00:08:27.05,+27:43:03.6,Peg,,,,,,,
1,IC0002,G,00:11:00.88,-12:49:22.3,Cet,0.98,0.32,,,,,
2,IC0003,G,00:12:06.09,-00:24:54.8,Psc,0.93,0.67,,,,,
3,IC0004,G,00:13:26.94,+17:29:11.2,Peg,1.17,0.84,,,,,
4,IC0005,G,00:17:34.93,-09:32:36.1,Cet,0.99,0.66,,,,,


In [211]:
mike_df = pd.read_csv(mike_path, usecols=mike_columns)

log_df(mike_df)
# (180, 12)

(180, 12)


Unnamed: 0,Catalog,Name,Name (no leading zero),Name (final),NGC/ID,Type,Common Name,Mag,RA,Dec,Width ('),Height (')
0,Caldwell,C 102,C 102,C 102,I2602,Oc,Theta Car Cluster,1.9,10:43:12,-64.4,50.0,50.0
1,Caldwell,C 103,C 103,C 103,2070,Bn,Tarantula Nebula,,05:38:42,-69.1,40.0,25.0
2,Caldwell,C 106,C 106,C 106,104,Gc,47 Tucanae,4.0,00:24:06,-72.08,31.0,31.0
3,Caldwell,C 108,C 108,C 108,4372,Gc,,7.8,12:25:48,-72.67,19.0,19.0
4,Caldwell,C 014,C 14,C 14,869/884,Oc,"Double Cluster, h & chi Persei",4.3,02:20:00,57.13,30.0,30.0


In [212]:
# get rid of typos
mike_df.loc[(mike_df['NGC/ID'] == '-'), 'NGC/ID']  = pd.NA
mike_df.loc[(mike_df['NGC/ID'] == '2237-9'), 'NGC/ID']  = '2237'

# change names of objects that OpenNGC marks as duplicates
mike_df.loc[(mike_df['NGC/ID'] == '869/884'), 'NGC/ID']  = pd.NA # C 014
mike_df.loc[(mike_df['NGC/ID'] == '6992/5'), 'NGC/ID']  = '6992' # C 033
mike_df.loc[(mike_df['NGC/ID'] == '2244'), 'NGC/ID']  = '2239' # C 050
mike_df.loc[(mike_df['NGC/ID'] == 'Sh2-155'), 'NGC/ID']  = pd.NA # C 009
mike_df.loc[(mike_df['NGC/ID'] == 'I2118'), 'Name (no leading zero)']  = 'NGC 1909' # IC 2118
mike_df.loc[(mike_df['NGC/ID'] == 'I2118'), 'NGC/ID']  = '1909' # IC 2118
mike_df.loc[(mike_df['NGC/ID'] == 'I2169'), 'Name (no leading zero)']  = 'IC 447' # IC 2169
mike_df.loc[(mike_df['NGC/ID'] == 'I2169'), 'NGC/ID']  = 'I0447' # IC 2169
mike_df.loc[(mike_df['NGC/ID'] == '2527'), 'Name (no leading zero)']  = 'NGC 2520' # NGC 2527
mike_df.loc[(mike_df['NGC/ID'] == '2527'), 'NGC/ID']  = '2520' # NGC 2527
mike_df.loc[(mike_df['NGC/ID'] == '6383'), 'Name (no leading zero)']  = 'NGC 6374' # NGC 6383
mike_df.loc[(mike_df['NGC/ID'] == '6383'), 'NGC/ID']  = '6374' # NGC 6383

# add IC columns
mike_df.loc[(mike_df['NGC/ID'].notna()) & (mike_df['NGC/ID'].str.startswith('I')), 'IC']  = mike_df['NGC/ID']
mike_df['IC'] = mike_df['IC'].str.replace('I', '')
mike_df['IC name'] = 'IC ' + mike_df['IC'].str.strip()

# add NGC columns
mike_df.loc[(mike_df['NGC/ID'].notna()) & (mike_df['NGC/ID'].str.match('^(?![I])')), 'NGC']  = mike_df['NGC/ID']
mike_df['NGC name'] = 'NGC ' + mike_df['NGC'].str.strip()

mike_df['Name normalized']  = mike_df['Name (no leading zero)']

# manually set name for Messier, IC and NGC objects
mike_df.loc[(mike_df['Name (final)'] == 'IC 4715 = M 24'), 'Name normalized']  = 'M 24'
mike_df.loc[(mike_df['Name (final)'] == 'IC 4725 = M 25'), 'Name normalized']  = 'M 25'
mike_df.loc[(mike_df['Name (final)'] == 'NGC 2682 = M 67'), 'Name normalized']  = 'M 67'
mike_df.loc[(mike_df['Name (final)'] == 'NGC 6611 = M 16'), 'Name normalized']  = 'M 16'
mike_df.loc[(mike_df['Name (final)'] == 'NGC 6618 = M 17'), 'Name normalized']  = 'M 17'
mike_df.loc[(mike_df['Catalog'] == 'Caldwell') & (mike_df['IC'].notna()), 'Name normalized']  = mike_df['IC name']
mike_df.loc[(mike_df['Catalog'] == 'Caldwell') & (mike_df['NGC'].notna()), 'Name normalized']  =  mike_df['NGC name']

mike_df['mike'] = True

log_df(mike_df)
# (180, 18)

(180, 18)


Unnamed: 0,Catalog,Name,Name (no leading zero),Name (final),NGC/ID,Type,Common Name,Mag,RA,Dec,Width ('),Height ('),IC,IC name,NGC,NGC name,Name normalized,mike
0,Caldwell,C 102,C 102,C 102,I2602,Oc,Theta Car Cluster,1.9,10:43:12,-64.4,50.0,50.0,2602.0,IC 2602,,,IC 2602,True
1,Caldwell,C 103,C 103,C 103,2070,Bn,Tarantula Nebula,,05:38:42,-69.1,40.0,25.0,,,2070.0,NGC 2070,NGC 2070,True
2,Caldwell,C 106,C 106,C 106,104,Gc,47 Tucanae,4.0,00:24:06,-72.08,31.0,31.0,,,104.0,NGC 104,NGC 104,True
3,Caldwell,C 108,C 108,C 108,4372,Gc,,7.8,12:25:48,-72.67,19.0,19.0,,,4372.0,NGC 4372,NGC 4372,True
4,Caldwell,C 014,C 14,C 14,,Oc,"Double Cluster, h & chi Persei",4.3,02:20:00,57.13,30.0,30.0,,,,,C 14,True


In [213]:
df = format_ngc_catalog(ngc_df)

log_df(df)
# (13340, 18)

(13340, 18)


Unnamed: 0,Name,Type,RA,Dec,Const,MajAx,MinAx,V-Mag,M,NGC,IC,Common names,Name catalog,Name number (with zeros),Name normalized,IC name,M name,Name number
0,IC0001,**,00:08:27.05,+27:43:03.6,Peg,,,,,,,,IC,1,IC 1,IC 1,,1
1,IC0002,G,00:11:00.88,-12:49:22.3,Cet,0.98,0.32,,,,,,IC,2,IC 2,IC 2,,2
2,IC0003,G,00:12:06.09,-00:24:54.8,Psc,0.93,0.67,,,,,,IC,3,IC 3,IC 3,,3
3,IC0004,G,00:13:26.94,+17:29:11.2,Peg,1.17,0.84,,,,,,IC,4,IC 4,IC 4,,4
4,IC0005,G,00:17:34.93,-09:32:36.1,Cet,0.99,0.66,,,,,,IC,5,IC 5,IC 5,,5


In [214]:
df[df['Name']=='IC0359A'][['Name', 'M',  'Name catalog', 'Name number (with zeros)', 'Name number']]

Unnamed: 0,Name,M,Name catalog,Name number (with zeros),Name number
371,IC0359A,,IC,0359A,359


In [215]:
df[df['Name']=='IC4715'][['Name', 'M',  'Name catalog','Name number (with zeros)', 'Name number']]

Unnamed: 0,Name,M,Name catalog,Name number (with zeros),Name number
4903,IC4715,24,M,4715,24


In [216]:
merge_df = df.merge(mike_df,
                    left_on='Name normalized', right_on='Name normalized',
                    how='outer', suffixes=[None, '_MIKE'])

merge_df['Name number'] = merge_df['Name number'].astype(pd.Int64Dtype())
log_df(merge_df)
# (13349, 35)

(13349, 35)


Unnamed: 0,Name,Type,RA,Dec,Const,MajAx,MinAx,V-Mag,M,NGC,...,Mag,RA_MIKE,Dec_MIKE,Width ('),Height ('),IC_MIKE,IC name_MIKE,NGC_MIKE,NGC name,mike
0,IC0001,**,00:08:27.05,+27:43:03.6,Peg,,,,,,...,,,,,,,,,,
1,IC0002,G,00:11:00.88,-12:49:22.3,Cet,0.98,0.32,,,,...,,,,,,,,,,
2,IC0003,G,00:12:06.09,-00:24:54.8,Psc,0.93,0.67,,,,...,,,,,,,,,,
3,IC0004,G,00:13:26.94,+17:29:11.2,Peg,1.17,0.84,,,,...,,,,,,,,,,
4,IC0005,G,00:17:34.93,-09:32:36.1,Cet,0.99,0.66,,,,...,,,,,,,,,,


In [217]:
merge_df[merge_df['Name']=='IC4715'][['Name', 'Name number']]

Unnamed: 0,Name,Name number
4582,IC4715,24
4583,IC4715,24


In [218]:
filter_df = merge_df.copy()

size_limit = 15
mag_limit = 10

size_bool = (filter_df['MajAx'] >= size_limit) | (filter_df['MinAx'] >= size_limit)

# filter_df = filter_df[size_bool]
filter_df = filter_df[(size_bool) | (filter_df['mike'] == True)]

filter_df = filter_df[(filter_df['V-Mag'] <= mag_limit) | filter_df['V-Mag'].isna()]
# filter_df = filter_df[filter_df['V-Mag'] <= mag_limit]
filter_df['Notes'] = 'large_dso'

log_df(filter_df)
# (210, 35) size 15 + NA, mag 10,  mike
# (137, 35) size 15 + NA, mag 10
# (122, 35) size 15,  mag 10, mike
# (66, 35) size 15, mag 10
# (201, 35) size 15 + NA, mag 9,  mike

(210, 36)


Unnamed: 0,Name,Type,RA,Dec,Const,MajAx,MinAx,V-Mag,M,NGC,...,RA_MIKE,Dec_MIKE,Width ('),Height ('),IC_MIKE,IC name_MIKE,NGC_MIKE,NGC name,mike,Notes
333,IC0341,Neb,03:40:55.69,+21:57:36.7,Tau,134.9,,,,,...,,,,,,,,,,large_dso
334,IC0342,G,03:46:48.50,+68:05:46.9,Cam,19.77,18.79,,,,...,03:46:48,68.1,18.0,17.0,342.0,IC 342,,,True,large_dso
341,IC0349,RfN,03:46:20.11,+23:56:23.3,Tau,25.7,,,,,...,,,,,,,,,,large_dso
345,IC0353,Neb,03:53:01.07,+25:50:52.8,Tau,181.97,30.2,,,,...,03:55:00,25.48,180.0,180.0,353.0,IC 353,,,True,large_dso
346,IC0354,Neb,03:53:57.91,+23:08:49.2,Tau,128.82,,,,,...,,,,,,,,,,large_dso


In [219]:
filter2_df = merge_df.copy()

size_limit = 1
mag_limit = 10

size2_bool = (filter2_df['MajAx'] > size_limit) | (filter2_df['MinAx'] > size_limit)

filter2_df = filter2_df[(size2_bool) & (filter2_df['mike'].isna())]
filter2_df = filter2_df[filter2_df['Common names'].notna()]
filter2_df = filter2_df[filter2_df['V-Mag'] <= mag_limit]

filter2_df['Notes'] = 'small_dso'
 
log_df(filter2_df)
# (34, 36)

(34, 36)


Unnamed: 0,Name,Type,RA,Dec,Const,MajAx,MinAx,V-Mag,M,NGC,...,RA_MIKE,Dec_MIKE,Width ('),Height ('),IC_MIKE,IC name_MIKE,NGC_MIKE,NGC name,mike,Notes
4570,IC4703,Neb,18:18:56.22,-13:50:43.4,Se2,5.05,5.05,6.0,,,...,,,,,,,,,,small_dso
4991,IC5146,Cl+N,21:53:28.76,+47:16:00.9,Cyg,10.0,10.0,7.2,,,...,,,,,,,,,,small_dso
5523,NGC0292,G,00:52:44.78,-72:49:43.0,Tuc,299.92,179.89,2.3,,,...,,,,,,,,,,small_dso
5699,NGC0457,OCl,01:19:32.65,+58:17:26.5,Cas,7.8,,6.4,,,...,,,,,,,,,,small_dso
6577,NGC1316,G,03:22:41.72,-37:12:29.6,For,13.46,7.71,8.48,,,...,,,,,,,,,,small_dso


In [220]:
combine_df = pd.concat([filter_df, filter2_df])

cols = [col for col in combine_df.columns if col != 'Notes']
combine_df.drop_duplicates(inplace=True, subset=cols, keep='first')

log_df(combine_df)
# (239, 36)

(239, 36)


Unnamed: 0,Name,Type,RA,Dec,Const,MajAx,MinAx,V-Mag,M,NGC,...,RA_MIKE,Dec_MIKE,Width ('),Height ('),IC_MIKE,IC name_MIKE,NGC_MIKE,NGC name,mike,Notes
333,IC0341,Neb,03:40:55.69,+21:57:36.7,Tau,134.9,,,,,...,,,,,,,,,,large_dso
334,IC0342,G,03:46:48.50,+68:05:46.9,Cam,19.77,18.79,,,,...,03:46:48,68.1,18.0,17.0,342.0,IC 342,,,True,large_dso
341,IC0349,RfN,03:46:20.11,+23:56:23.3,Tau,25.7,,,,,...,,,,,,,,,,large_dso
345,IC0353,Neb,03:53:01.07,+25:50:52.8,Tau,181.97,30.2,,,,...,03:55:00,25.48,180.0,180.0,353.0,IC 353,,,True,large_dso
346,IC0354,Neb,03:53:57.91,+23:08:49.2,Tau,128.82,,,,,...,,,,,,,,,,large_dso


In [221]:
combine_df.to_csv(ngc_draft_path, index=False)

## Create DSO  Catalog

In [222]:
draft_df = pd.read_csv(ngc_draft_path, dtype={'Name number': pd.Int64Dtype()})
log_df(draft_df)
# (239, 36)

(239, 36)


Unnamed: 0,Name,Type,RA,Dec,Const,MajAx,MinAx,V-Mag,M,NGC,...,RA_MIKE,Dec_MIKE,Width ('),Height ('),IC_MIKE,IC name_MIKE,NGC_MIKE,NGC name,mike,Notes
0,IC0341,Neb,03:40:55.69,+21:57:36.7,Tau,134.9,,,,,...,,,,,,,,,,large_dso
1,IC0342,G,03:46:48.50,+68:05:46.9,Cam,19.77,18.79,,,,...,03:46:48,68.1,18.0,17.0,342.0,IC 342,,,True,large_dso
2,IC0349,RfN,03:46:20.11,+23:56:23.3,Tau,25.7,,,,,...,,,,,,,,,,large_dso
3,IC0353,Neb,03:53:01.07,+25:50:52.8,Tau,181.97,30.2,,,,...,03:55:00,25.48,180.0,180.0,353.0,IC 353,,,True,large_dso
4,IC0354,Neb,03:53:57.91,+23:08:49.2,Tau,128.82,,,,,...,,,,,,,,,,large_dso


In [223]:
draft_df.columns

Index(['Name', 'Type', 'RA', 'Dec', 'Const', 'MajAx', 'MinAx', 'V-Mag', 'M',
       'NGC', 'IC', 'Common names', 'Name catalog', 'Name number (with zeros)',
       'Name normalized', 'IC name', 'M name', 'Name number', 'Catalog',
       'Name_MIKE', 'Name (no leading zero)', 'Name (final)', 'NGC/ID',
       'Type_MIKE', 'Common Name', 'Mag', 'RA_MIKE', 'Dec_MIKE', 'Width (')',
       'Height (')', 'IC_MIKE', 'IC name_MIKE', 'NGC_MIKE', 'NGC name', 'mike',
       'Notes'],
      dtype='object')

add name columns that have names from NGC and Mike

In [224]:
tmp_df =  draft_df.copy()
tmp_df.dropna(subset=['Name'], inplace=True)

for index, row in tmp_df.iterrows():
    # create string with multiple names
    names = set([row['Name normalized'],
                 row['IC name'], row['M name'],
                 row['Name (no leading zero)'], row['IC name_MIKE'], row['NGC name']])
    names = [name for name in names if pd.notna(name)]
    names.sort()
    tmp_df.at[index, 'Names'] = ', '.join(names)

    # create string with multiple common names
    common_names = set([row['Common names'], row['Common Name']])
    common_names = [name for name in common_names if pd.notna(name)]
    common_names.sort()
    tmp_df.at[index, 'Common Names'] = ', '.join(common_names)

log_df(tmp_df)
# (238, 38)

(238, 38)


Unnamed: 0,Name,Type,RA,Dec,Const,MajAx,MinAx,V-Mag,M,NGC,...,Width ('),Height ('),IC_MIKE,IC name_MIKE,NGC_MIKE,NGC name,mike,Notes,Names,Common Names
0,IC0341,Neb,03:40:55.69,+21:57:36.7,Tau,134.9,,,,,...,,,,,,,,large_dso,IC 341,
1,IC0342,G,03:46:48.50,+68:05:46.9,Cam,19.77,18.79,,,,...,18.0,17.0,342.0,IC 342,,,True,large_dso,"C 5, IC 342",
2,IC0349,RfN,03:46:20.11,+23:56:23.3,Tau,25.7,,,,,...,,,,,,,,large_dso,IC 349,Barnard's Merope Nebula
3,IC0353,Neb,03:53:01.07,+25:50:52.8,Tau,181.97,30.2,,,,...,180.0,180.0,353.0,IC 353,,,True,large_dso,IC 353,
4,IC0354,Neb,03:53:57.91,+23:08:49.2,Tau,128.82,,,,,...,,,,,,,,large_dso,IC 354,


In [225]:
tmp_df = tmp_df[
    [
        'Name normalized', 'Names', 'Common Names', 
        'Type', 'RA', 'Dec', 'Const', 'MajAx', 'MinAx', 'V-Mag',  
        'Name catalog', 'Name number', "Width (')", "Height (')", 'Notes'
    ]
].copy()

tmp_df.rename(columns={'Name normalized': 'Name'},  inplace=True)

tmp_df = tmp_df.sort_values(['Name catalog', 'Name number'])

log_df(tmp_df)
# (238, 15)

(238, 15)


Unnamed: 0,Name,Names,Common Names,Type,RA,Dec,Const,MajAx,MinAx,V-Mag,Name catalog,Name number,Width ('),Height ('),Notes
200,C 9,C 9,Cave Nebula,HII,22:57:54.0,+62:31:06,Cep,50.0,30.0,,C,9,50.0,10.0,large_dso
201,C 14,C 14,"Double Cluster, h & chi Persei",*Ass,02:20:42.0,+57:08:15,Per,50.0,50.0,,C,14,30.0,30.0,large_dso
202,C 41,C 41,Hyades,OCl,04:26:54.0,+15:52:00,Tau,329.0,,,C,41,330.0,330.0,large_dso
203,C 99,C 99,Coalsack Nebula,DrkN,12:31:19.0,-63:44:36,Cru,,,,C,99,400.0,300.0,large_dso
204,Cl 399,Cl 399,"Brocchi's Cluster, Al Sufi's Cluster, Coathang...",*Ass,19:25:24.0,+20:11:00,Vul,70.0,,3.6,Cl,399,,,large_dso


delete rows with duplicate names

In [226]:
dups = tmp_df[tmp_df.duplicated(subset=['Name'])]['Name'].values

In [227]:
dup_df = tmp_df[tmp_df['Name'].isin(dups)][['Name','Names', 'Common Names', 'Type']]
dup_df

Unnamed: 0,Name,Names,Common Names,Type
7,IC 405,"C 31, IC 405",Flaming Star Nebula,Neb
8,IC 405,IC 405,"Flaming Star Nebula, Flaming Star nebula",Neb
29,IC 2391,"C 85, IC 2391","Omicron Vel Cluster, omi Vel Cluster",OCl
30,IC 2391,IC 2391,omi Vel Cluster,OCl
32,IC 2602,"C 102, IC 2602","Theta Car Cluster, tet Car Cluster",OCl
33,IC 2602,IC 2602,"Southern Pleiades, tet Car Cluster",OCl
46,M 24,"IC 4715, M 24",Small Sgr Star Cloud,*Ass
47,M 24,"IC 4715, M 24","Sagittarius Star Cloud, Small Sgr Star Cloud",*Ass
48,M 25,"IC 4725, M 25",M 25,OCl
49,M 25,"IC 4725, IC C4725, M 25",,OCl


In [228]:
ngc_catalog_df = tmp_df.copy()

def get_index_by_name(name):
    try:
        return dup_df[dup_df['Names'] == name].index[0]
    except:
        print(name, 'not found')
 

def get_index_by_common_name(name):
    try:
        return dup_df[dup_df['Common Names'] == name].index[0]
    except:
        print(name, 'not found..')
 
names = ['IC 405', 'IC 2391', 'IC 2602', 'IC 4725, IC C4725, M 25', 'NGC 6960', 'NGC 6992']
indexes = [get_index_by_name(name) for name in names]

names = ['Small Sgr Star Cloud', 'M  67']
indexes2 = [get_index_by_common_name(name) for name in names]

ngc_catalog_df.drop(indexes + indexes2, inplace=True)

ngc_catalog_df.at[get_index_by_name('C 102, IC 2602'), 'Common Names'] = 'Theta Car Cluster, Southern Pleiades'
ngc_catalog_df.at[get_index_by_name('IC 4725, M 25'), 'Common Names'] = ""

log_df(ngc_catalog_df)
# (230, 15)

(230, 15)


Unnamed: 0,Name,Names,Common Names,Type,RA,Dec,Const,MajAx,MinAx,V-Mag,Name catalog,Name number,Width ('),Height ('),Notes
200,C 9,C 9,Cave Nebula,HII,22:57:54.0,+62:31:06,Cep,50.0,30.0,,C,9,50.0,10.0,large_dso
201,C 14,C 14,"Double Cluster, h & chi Persei",*Ass,02:20:42.0,+57:08:15,Per,50.0,50.0,,C,14,30.0,30.0,large_dso
202,C 41,C 41,Hyades,OCl,04:26:54.0,+15:52:00,Tau,329.0,,,C,41,330.0,330.0,large_dso
203,C 99,C 99,Coalsack Nebula,DrkN,12:31:19.0,-63:44:36,Cru,,,,C,99,400.0,300.0,large_dso
204,Cl 399,Cl 399,"Brocchi's Cluster, Al Sufi's Cluster, Coathang...",*Ass,19:25:24.0,+20:11:00,Vul,70.0,,3.6,Cl,399,,,large_dso


In [229]:
ngc_catalog_df[ngc_catalog_df.duplicated(subset=['Name'])]['Name'].values

array([], dtype=object)

In [230]:
merge_df = merge_type_constellation(ngc_catalog_df, openngc_types_path, constellation_path)

log_df(merge_df)
# (230, 16)

(230, 16)


Unnamed: 0,Catalogue Entry,Alternative Entries,Familiar Name,Right Ascension,Declination,Major Axis,Minor Axis,Magnitude,Name catalog,Name number,Type,Type Category,Constellation,Width ('),Height ('),Notes
0,C 9,C 9,Cave Nebula,22:57:54.0,+62:31:06,50.0,30.0,,C,9,HII Ionized region,nebulae,Cepheus,50.0,10.0,large_dso
1,C 14,C 14,"Double Cluster, h & chi Persei",02:20:42.0,+57:08:15,50.0,50.0,,C,14,Association of stars,stars,Perseus,30.0,30.0,large_dso
2,C 41,C 41,Hyades,04:26:54.0,+15:52:00,329.0,,,C,41,Open Cluster,clusters,Taurus,330.0,330.0,large_dso
3,C 99,C 99,Coalsack Nebula,12:31:19.0,-63:44:36,,,,C,99,Dark Nebula,nebulae,Crux,400.0,300.0,large_dso
4,Cl 399,Cl 399,"Brocchi's Cluster, Al Sufi's Cluster, Coathang...",19:25:24.0,+20:11:00,70.0,,3.6,Cl,399,Association of stars,stars,Vulpecula,,,large_dso


In [231]:
merge_df.to_csv(dso_catalog_path, index=False)

## create HYG stars catalogue

In [159]:
types = {'hip': pd.Int64Dtype(), 'hd': pd.Int64Dtype(), 'hr': pd.Int64Dtype()}
df = pd.read_csv(hyg_path, dtype=types, usecols=hyg_columns)
log_df(df)

(119614, 8)


Unnamed: 0,hip,hd,hr,proper,ra,dec,mag,con
0,,,,Sol,0.0,0.0,-26.7,
1,1.0,224700.0,,,6e-05,1.089009,9.1,Psc
2,2.0,224690.0,,,0.000283,-19.49884,9.27,Cet
3,3.0,224699.0,,,0.000335,38.859279,6.61,And
4,4.0,224707.0,,,0.000569,-51.893546,8.06,Phe


In [160]:
cons_df = pd.read_csv(constellation_path, usecols=['Abbreviations IAU', 'name'])
cons_df.rename(columns={'name': 'Constellation'}, inplace=True)
log_df(cons_df)

(88, 2)


Unnamed: 0,Abbreviations IAU,Constellation
0,And,Andromeda
1,Ant,Antlia
2,Aps,Apus
3,Aqr,Aquarius
4,Aql,Aquila


In [161]:
filter_df = df.copy()
filter_df = filter_df[filter_df['proper'].notna()]
filter_df = filter_df[filter_df['mag'] <= 2]
filter_df.drop([0], inplace=True)

log_df(filter_df)
# (48, 8)

(48, 8)


Unnamed: 0,hip,hd,hr,proper,ra,dec,mag,con
7574,7588,10144,472,Achernar,1.628556,-57.236757,0.45,Eri
11734,11767,8890,424,Polaris,2.52975,89.264109,1.97,UMi
15824,15863,20902,1017,Mirfak,3.405378,49.86118,1.79,Per
21368,21421,29139,1457,Aldebaran,4.598677,16.509301,0.87,Tau
24378,24436,34085,1713,Rigel,5.242298,-8.20164,0.18,Ori


In [162]:
merge_df = filter_df.merge(cons_df, left_on='con', right_on='Abbreviations IAU')

log_df(merge_df)
# (48, 10)

(48, 10)


Unnamed: 0,hip,hd,hr,proper,ra,dec,mag,con,Abbreviations IAU,Constellation
0,7588,10144,472,Achernar,1.628556,-57.236757,0.45,Eri,Eri,Eridanus
1,11767,8890,424,Polaris,2.52975,89.264109,1.97,UMi,UMi,Ursa Minor
2,15863,20902,1017,Mirfak,3.405378,49.86118,1.79,Per,Per,Perseus
3,21421,29139,1457,Aldebaran,4.598677,16.509301,0.87,Tau,Tau,Taurus
4,25428,35497,1791,Elnath,5.438198,28.60745,1.65,Tau,Tau,Taurus


In [163]:
cat_df = merge_df.copy()
cat_df['Catalogue Entry'] = 'HIP ' + cat_df['hip'].astype(str)
cat_df['Name catalog'] = 'HIP'
cat_df['Alternative Entries'] = ('HIP ' + cat_df['hip'].astype(str) 
    + ', HD ' + cat_df['hd'].astype(str) + ', HR ' + cat_df['hr'].astype(str))

cat_df['ra'] = cat_df['ra'].apply(decToHMS)
cat_df['dec'] = cat_df['dec'].apply(lambda row: decToHMS(row, True))

cat_df.rename(columns={
    'proper': 'Familiar Name',
    'ra': 'Right Ascension',
    'dec': 'Declination',
    'hip': 'Name number',
    'mag': 'Magnitude'
}, inplace=True)


cat_df['Major Axis'] = pd.NA
cat_df['Minor Axis'] = pd.NA
cat_df['Surface Brightness'] = pd.NA
cat_df["Width (')"] = pd.NA
cat_df["Height (')"] = pd.NA
cat_df['Surface Brightness'] = pd.NA
cat_df['Type'] = 'Star'
cat_df['Type Category'] = 'stars'
cat_df['Notes'] = 'bright_named_stars'

cat_df = cat_df[catalog_columns]

log_df(cat_df)
# (48, 16)

(48, 16)


Unnamed: 0,Catalogue Entry,Alternative Entries,Familiar Name,Right Ascension,Declination,Major Axis,Minor Axis,Magnitude,Name catalog,Name number,Type,Type Category,Constellation,Width ('),Height ('),Notes
0,HIP 7588,"HIP 7588, HD 10144, HR 472",Achernar,01:37:42.80,-57:14:12.33,,,0.45,HIP,7588,Star,stars,Eridanus,,,bright_named_stars
1,HIP 11767,"HIP 11767, HD 8890, HR 424",Polaris,02:31:47.10,+89:15:50.79,,,1.97,HIP,11767,Star,stars,Ursa Minor,,,bright_named_stars
2,HIP 15863,"HIP 15863, HD 20902, HR 1017",Mirfak,03:24:19.36,+49:51:40.25,,,1.79,HIP,15863,Star,stars,Perseus,,,bright_named_stars
3,HIP 21421,"HIP 21421, HD 29139, HR 1457",Aldebaran,04:35:55.24,+16:30:33.48,,,0.87,HIP,21421,Star,stars,Taurus,,,bright_named_stars
4,HIP 25428,"HIP 25428, HD 35497, HR 1791",Elnath,05:26:17.51,+28:36:26.82,,,1.65,HIP,25428,Star,stars,Taurus,,,bright_named_stars


In [164]:
cat_df.to_csv(stars_catalog_path, index=False)

## create solar system catalogue

In [148]:
df = pd.read_csv(solar_system_path)

log_df(df)

(8, 9)


Unnamed: 0,Apparent magnitude (V),Object,Name,Seen from...,Notes,type,category,catalog,order
0,-3.69,Moon,Moon,"lit by earthlight, reflecting earthshine seen ...",,Moon,moon_planets,-,0
1,0.23,planet Mercury,Mercury,seen from Earth,mean brightness[32],Planet,moon_planets,-,1
2,-4.14,planet Venus,Venus,seen from Earth,mean brightness[32],Planet,moon_planets,-,2
3,0.71,planet Mars,Mars,seen from Earth,mean brightness[32],Planet,moon_planets,-,4
4,-2.2,planet Jupiter,Jupiter,seen from Earth,mean brightness[32],Planet,moon_planets,-,5


In [149]:
cat_df = df.copy()


cat_df.rename(columns={
    'Name': 'Catalogue Entry',
    'Apparent magnitude (V)': 'Magnitude', 
    'catalog': 'Name catalog',
    'order': 'Name number',
    'type': 'Type',
    'category': 'Type Category'
}, inplace=True)

cat_df['Alternative Entries'] = cat_df['Catalogue Entry']
cat_df['Familiar Name'] = pd.NA
cat_df['Right Ascension'] = pd.NA
cat_df['Declination'] = pd.NA
cat_df['Major Axis'] = pd.NA
cat_df['Minor Axis'] = pd.NA
cat_df['Constellation'] = pd.NA
cat_df["Width (')"] = pd.NA
cat_df["Height (')"] = pd.NA
cat_df['notes'] = pd.NA
cat_df = cat_df[catalog_columns]

log_df(cat_df)

(8, 16)


Unnamed: 0,Catalogue Entry,Alternative Entries,Familiar Name,Right Ascension,Declination,Major Axis,Minor Axis,Magnitude,Name catalog,Name number,Type,Type Category,Constellation,Width ('),Height ('),Notes
0,Moon,Moon,,,,,,-3.69,-,0,Moon,moon_planets,,,,
1,Mercury,Mercury,,,,,,0.23,-,1,Planet,moon_planets,,,,mean brightness[32]
2,Venus,Venus,,,,,,-4.14,-,2,Planet,moon_planets,,,,mean brightness[32]
3,Mars,Mars,,,,,,0.71,-,4,Planet,moon_planets,,,,mean brightness[32]
4,Jupiter,Jupiter,,,,,,-2.2,-,5,Planet,moon_planets,,,,mean brightness[32]


In [150]:
cat_df.to_csv(solar_system_catalog_path, index=False)

In [151]:
cat_df.to_json(solar_system_catalog_json_path, orient='records')

## create DSO catalogue

In [232]:
dso_df = pd.read_csv(dso_catalog_path)
log_df(dso_df)
# (230, 16)

(230, 16)


Unnamed: 0,Catalogue Entry,Alternative Entries,Familiar Name,Right Ascension,Declination,Major Axis,Minor Axis,Magnitude,Name catalog,Name number,Type,Type Category,Constellation,Width ('),Height ('),Notes
0,C 9,C 9,Cave Nebula,22:57:54.0,+62:31:06,50.0,30.0,,C,9,HII Ionized region,nebulae,Cepheus,50.0,10.0,large_dso
1,C 14,C 14,"Double Cluster, h & chi Persei",02:20:42.0,+57:08:15,50.0,50.0,,C,14,Association of stars,stars,Perseus,30.0,30.0,large_dso
2,C 41,C 41,Hyades,04:26:54.0,+15:52:00,329.0,,,C,41,Open Cluster,clusters,Taurus,330.0,330.0,large_dso
3,C 99,C 99,Coalsack Nebula,12:31:19.0,-63:44:36,,,,C,99,Dark Nebula,nebulae,Crux,400.0,300.0,large_dso
4,Cl 399,Cl 399,"Brocchi's Cluster, Al Sufi's Cluster, Coathang...",19:25:24.0,+20:11:00,70.0,,3.6,Cl,399,Association of stars,stars,Vulpecula,,,large_dso


In [233]:

stars_df = pd.read_csv(stars_catalog_path)
log_df(stars_df)
# (48, 16)

(48, 16)


Unnamed: 0,Catalogue Entry,Alternative Entries,Familiar Name,Right Ascension,Declination,Major Axis,Minor Axis,Magnitude,Name catalog,Name number,Type,Type Category,Constellation,Width ('),Height ('),Notes
0,HIP 7588,"HIP 7588, HD 10144, HR 472",Achernar,01:37:42.80,-57:14:12.33,,,0.45,HIP,7588,Star,stars,Eridanus,,,bright_named_stars
1,HIP 11767,"HIP 11767, HD 8890, HR 424",Polaris,02:31:47.10,+89:15:50.79,,,1.97,HIP,11767,Star,stars,Ursa Minor,,,bright_named_stars
2,HIP 15863,"HIP 15863, HD 20902, HR 1017",Mirfak,03:24:19.36,+49:51:40.25,,,1.79,HIP,15863,Star,stars,Perseus,,,bright_named_stars
3,HIP 21421,"HIP 21421, HD 29139, HR 1457",Aldebaran,04:35:55.24,+16:30:33.48,,,0.87,HIP,21421,Star,stars,Taurus,,,bright_named_stars
4,HIP 25428,"HIP 25428, HD 35497, HR 1791",Elnath,05:26:17.51,+28:36:26.82,,,1.65,HIP,25428,Star,stars,Taurus,,,bright_named_stars


In [234]:
dso_df.columns == stars_df.columns

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True])

In [235]:
combine_df = pd.concat([dso_df, stars_df])
combine_df.drop_duplicates(inplace=True)
log_df(combine_df)
# (278, 16)

(278, 16)


Unnamed: 0,Catalogue Entry,Alternative Entries,Familiar Name,Right Ascension,Declination,Major Axis,Minor Axis,Magnitude,Name catalog,Name number,Type,Type Category,Constellation,Width ('),Height ('),Notes
0,C 9,C 9,Cave Nebula,22:57:54.0,+62:31:06,50.0,30.0,,C,9,HII Ionized region,nebulae,Cepheus,50.0,10.0,large_dso
1,C 14,C 14,"Double Cluster, h & chi Persei",02:20:42.0,+57:08:15,50.0,50.0,,C,14,Association of stars,stars,Perseus,30.0,30.0,large_dso
2,C 41,C 41,Hyades,04:26:54.0,+15:52:00,329.0,,,C,41,Open Cluster,clusters,Taurus,330.0,330.0,large_dso
3,C 99,C 99,Coalsack Nebula,12:31:19.0,-63:44:36,,,,C,99,Dark Nebula,nebulae,Crux,400.0,300.0,large_dso
4,Cl 399,Cl 399,"Brocchi's Cluster, Al Sufi's Cluster, Coathang...",19:25:24.0,+20:11:00,70.0,,3.6,Cl,399,Association of stars,stars,Vulpecula,,,large_dso


In [236]:
combine_df.sort_values(by=['Name catalog', 'Name number'], inplace=True)

In [237]:
combine_df.to_json(dso_catalog_json_path, orient='records')

## create messier catalog

In [239]:
dso_df = pd.read_csv(dso_catalog_path, dtype={'Name number': pd.Int64Dtype()})
log_df(dso_df)
# (230, 36)

(230, 16)


Unnamed: 0,Catalogue Entry,Alternative Entries,Familiar Name,Right Ascension,Declination,Major Axis,Minor Axis,Magnitude,Name catalog,Name number,Type,Type Category,Constellation,Width ('),Height ('),Notes
0,C 9,C 9,Cave Nebula,22:57:54.0,+62:31:06,50.0,30.0,,C,9,HII Ionized region,nebulae,Cepheus,50.0,10.0,large_dso
1,C 14,C 14,"Double Cluster, h & chi Persei",02:20:42.0,+57:08:15,50.0,50.0,,C,14,Association of stars,stars,Perseus,30.0,30.0,large_dso
2,C 41,C 41,Hyades,04:26:54.0,+15:52:00,329.0,,,C,41,Open Cluster,clusters,Taurus,330.0,330.0,large_dso
3,C 99,C 99,Coalsack Nebula,12:31:19.0,-63:44:36,,,,C,99,Dark Nebula,nebulae,Crux,400.0,300.0,large_dso
4,Cl 399,Cl 399,"Brocchi's Cluster, Al Sufi's Cluster, Coathang...",19:25:24.0,+20:11:00,70.0,,3.6,Cl,399,Association of stars,stars,Vulpecula,,,large_dso


In [260]:
df = dso_df.copy()
df = df[(df['Name catalog']=='M') & (df['Familiar Name'].notna()) & (df['Major Axis'] < 17)]
log_df(df)

(18, 16)


Unnamed: 0,Catalogue Entry,Alternative Entries,Familiar Name,Right Ascension,Declination,Major Axis,Minor Axis,Magnitude,Name catalog,Name number,Type,Type Category,Constellation,Width ('),Height ('),Notes
57,M 1,M 1,Crab Nebula,05:34:31.97,+22:00:52.1,8.0,4.0,8.4,M,1,Supernova remnant,nebulae,Taurus,,,small_dso
61,M 6,"M 6, NGC 6405",Butterfly Cluster,17:40:20.75,-32:15:15.0,15.6,,4.2,M,6,Open Cluster,clusters,Scorpius,25.0,25.0,large_dso
64,M 11,M 11,"Amas de l'Ecu de Sobieski,Wild Duck Cluster",18:51:05.99,-06:16:12.1,9.0,,5.8,M,11,Open Cluster,clusters,Scutum,,,small_dso
65,M 13,"M 13, NGC 6205","Great Hercules Globular, Hercules Globular Clu...",16:41:41.63,+36:27:40.7,16.5,,5.8,M,13,Globular Cluster,clusters,Hercules,16.6,16.6,large_dso
67,M 17,"M 17, NGC 6618","Checkmark Nebula,Lobster Nebula,Swan Nebula,om...",18:20:47.11,-16:10:17.5,12.6,,7.0,M,17,Nebula,nebulae,Sagittarius,46.0,46.0,large_dso


In [268]:
df.to_json(demo_path, orient='records')