# The Capstone Deliverables

### Elasticsearch Synonym Filter

In [6]:
# this deliverable can be found as follows
filter_file = './deliverables/astrononimcal_synonyms_112923.txt'

# Open the file in read mode ('r')
with open(filter_file, 'r') as file:
    # Read the entire content of the file
    file_content = file.read()

# printing just part of the file
print(f'{file_content[:1000]} .... ')


messier 1, sh 2 244, crab, sharpless 244, m 1, crab neb, crab nebula, taurus a, ngc 1952
messier 2, ngc 7089, m 2, gcl 121
ngc 5272, messier 3, gcl 25, m 3
spider globular, messier 4, gcl 41, ngc 6121, m 4
ngc 5904, messier 5, m 5, rose cluster, gcl 34
m 6, ngc 6405, butterfly cluster, messier 6
ngc 6475, ptolemy's cluster, m 7, messier 7
gum 72, ngc 6523, sharpless 25, messier 8, m 8, rcw 146, sh 2 25, lagoon nebula
gcl 60, messier 9, ngc 6333, m 9
m 10, messier 10, ngc 6254
ngc 6705, wild duck cluster, m 11, messier 11
m 12, ngc 6218, messier 12
m 13, hercules globular cluster, great hercules cluster, messier 13, ngc 6205
messier 14, gcl 72, ngc 6402, m 14
ngc 7078, great pegasus cluster, messier 15, gcl 120, m 15
star queen, eagle nebula, lbn 67, ngc 6611, ocl 54, ic 4703, messier 16, sh 2 49, gum 83, sharpless 49, m 16
horseshoe nebula, omega swan horseshoe lobster or checkmark nebula, m 17, sh 2 45, lbn 60, sharpless 45, swan nebula, omega nebula, gum 81, messier 17, lobster nebul

### Image Title Classification Model

In [2]:
# This deliverable requires the following imports:
# MODEL IMPORT
import joblib
model_file = './deliverables/full_model.joblib'
imported_model = joblib.load('./data/full_model.joblib')
# CLEANING METHOD IMPORT
import classes_and_methods.soam_class as soam #contains a text cleaning method
# CLASS CLASSIFICATION METHOD IMPORT
import classes_and_methods.title_classification as classifier

In [3]:
# Example use of the classifier
new_titles = ["A mountain sky with a comet",
            "Milky Way in the Mountains",
            "Pluto the Dog!", 
            "Pluto the Planet!",
            "The CPC1100 Ready... To... Go!",
            "Cygnus Loop Nebula"
           ]
classifier.subject_type_classifications(new_text = new_titles,
                                         top_n = 3, # ... if you want to return the top 3 results
                                         cleaning_method = soam.provided_cleaning_method, 
                                         imported_model = imported_model,
                                         values_only = False #set True if you do not want a dictionary returned   
                                        )

{'A mountain sky with a comet': ['WIDE_FIELD', 'NOCTILUCENT_CLOUDS', 'OTHER'],
 'Milky Way in the Mountains': ['WIDE_FIELD', 'OTHER', 'DEEP_SKY'],
 'Pluto the Dog!': ['DEEP_SKY', 'SOLAR_SYSTEM', 'OTHER'],
 'Pluto the Planet!': ['SOLAR_SYSTEM', 'WIDE_FIELD', 'STAR_TRAILS'],
 'The CPC1100 Ready... To... Go!': ['GEAR', 'WIDE_FIELD', 'STAR_TRAILS'],
 'Cygnus Loop Nebula': ['DEEP_SKY', 'WIDE_FIELD', 'OTHER']}

# BONUS Deliverables

### The SOAM Data Structure

In [12]:
# The following imports are required
# IMPORTED SOAM CLASS
import classes_and_methods.soam_class as soam
# IMPORTED SOAM (pre-built)
imported_soam = soam.Soam()
imported_soam.import_soam(file_location = "./deliverables/",file_name="soam_cleaned_bulk_export")
print(f'The IMPORTED SOAM: {imported_soam}')

SOAM Started ---------------- 
SOAM Cleaning Method Test -- 
original test string: "   #][!,@ ^&*NGc224-.99+9abc. ...   "
cleaned test string: "ngc 224 99 9 abc"
The IMPORTED SOAM: 25642 names / aliases mapped across 4264 objects.


In [13]:
#EXAMPLE: Get all M31 aliases
imported_soam.get_aliases("M31")

{'and nebula',
 'andromeda',
 'andromeda galaxy',
 'andromeda nebula',
 'da 21',
 'gin 801',
 'hd 3969',
 'k 79 1 c',
 'leda 2557',
 'm 31',
 'ngc 224',
 'plx 124',
 'ppm 43228',
 'rafgl 104',
 'ugc 454'}

In [16]:
#EXAMPLE: Switch in arbitrary standard names of all space objects in a given sentence 
test_sentence = "M 1 is a much better target than M 8 or ngc 224"
imported_soam.switch_in_standard_names(test_sentence)

'crab nebula is a much better target than lagoon nebula or andromeda galaxy'

### The SIMBAD Query Helper Method

In [17]:
# The following imports are required
# IMPORT simbad_alias_search
import classes_and_methods.simbad_alias_search as simbad

In [19]:
#Example Query
test_queue = ['ngc 224', 'm 31', 'Crab Nebula', 'Hello World']
simbad.online_alias_search(test_queue)

---- STARTING QUERY ----
... 1 queries completed...




---- QUERY END ----


{'queue': ['ngc 224', 'm 31', 'Crab Nebula', 'Hello World'],
 'associations': [{'searched': 'ngc 224',
   'found_associations': ['PPM 43228',
    'HD 3969',
    'PLX 124',
    '2C 56',
    'DA 21',
    'GIN 801',
    'K79 1C',
    'LEDA 2557',
    'Andromeda',
    'Andromeda Galaxy',
    'NGC 224',
    'RAFGL 104',
    'UGC 454',
    'Z 535-17',
    'And Nebula',
    'Andromeda Nebula']},
  {'searched': 'm 31',
   'found_associations': ['PPM 43228',
    'HD 3969',
    'PLX 124',
    '2C 56',
    'DA 21',
    'GIN 801',
    'K79 1C',
    'LEDA 2557',
    'Andromeda',
    'Andromeda Galaxy',
    'NGC 224',
    'RAFGL 104',
    'UGC 454',
    'Z 535-17',
    'And Nebula',
    'Andromeda Nebula']},
  {'searched': 'Crab Nebula',
   'found_associations': ['2C 481',
    '2E 1309',
    '3C 144',
    '3CR 144',
    'AJG 1',
    'CTA 36',
    'CTB 18',
    'DA 179',
    'DB 38',
    'LBN 833',
    'CRAB NEB',
    'Crab',
    'Crab Nebula',
    'Tau A',
    'Taurus A',
    'NGC 1952',
    'NRAO 2