# Using lxml parser for Matt's uranus_occs_obs_syst_to_spreadsheet.py script:

Note: I've called Matt's output `uranus_occs_info_auto.csv` as `uranus_occs_info_auto_matt.csv` (which I use to check my code works below)

In [None]:
from bs4 import BeautifulSoup as bs # Import BeautifulSoup4 (not BeautifulSoup3)
from astropy.table import Table

infile = 'obs_syst-templates-2021-03-22a_matt.txt'
outfile = 'uranus_occs_info_auto_mia.csv'

content = [] # initialise for storing

# Read the XML file
with open(infile, "r") as fin:
    # Read each line in the file, readlines() returns a list of lines
    content = fin.readlines()
    # Combine the lines in the list into a string
    content = "".join(content)
    bs_content = bs(content, "lxml") #pass the content into the BeautifulSoup library as well as the lxml parser 

# Initialise lists for storage purposes:
facility_names = []; telescope_names = []; instrument_names = []
facility_lids = []; telescope_lids = []; instrument_lids = []

for obsyscomp in bs_content.find_all('observing_system_component'):
    for obsyscomptype in obsyscomp.find("type"):
        if obsyscomptype == "Host" and obsyscomp.parent.find('name').text != "Hubble Space Telescope FOS" : # Gotcha: The HST does not have a <type>="Telescope" (only "Host" and "Instrument")
            facility_name = (obsyscomp.find("name").text)
            facility_lid = obsyscomp.find("lid_reference").text
            facility_names.append(facility_name)
            facility_lids.append(facility_lid)
        elif obsyscomptype == "Telescope" and obsyscomp.parent.find('name').text != "Hubble Space Telescope FOS" : 
            #print("here:", obsyscomp.parent.text)
            telescope_name = (obsyscomp.find("name").text)
            telescope_lid = obsyscomp.find("lid_reference").text
            telescope_names.append(telescope_name)
            telescope_lids.append(telescope_lid)
        elif obsyscomptype == "Instrument" and obsyscomp.parent.find('name').text != "Hubble Space Telescope FOS":
            instrument_name = (obsyscomp.find("name").text)
            instrument_lid = obsyscomp.find("lid_reference").text
            instrument_names.append(instrument_name)
            instrument_lids.append(instrument_lid)
        else:
            print("Hubble Space Telescope has no type = Instrument, therefore skipping")

table = Table({'facility_lid': facility_lids, 
               'telescope_lid': telescope_lids,
               'instrument_lid': instrument_lids,
               'facility_name': facility_names, 
               'telescope_name': telescope_names,
              'instrument_name': instrument_names},
              names=('facility_lid', 'telescope_lid', 'instrument_lid', 'facility_name', 'telescope_name', 'instrument_name'))

table.write('uranus_occs_info_auto_mia.csv', format='ascii', delimiter=",", overwrite=True)  

In [None]:
!diff ./uranus_occs_info_auto_matt.csv ./uranus_occs_info_auto_mia.csv # The only difference is that I gave each column a header

In [None]:
table.show_in_notebook()