In [1]:
# An example of reading data from a .xml file with Python, using the "lxml"
# library.
# First, you'll need to pip install the lxml library:
# https://pypi.org/project/lxml/
# A helpful tutorial can be found here: https://lxml.de/tutorial.html
# The data used here is an instance of
# https://api.stlouisfed.org/fred/series/observations?series_id=U6RATE& \
# api_key=YOUR_API_KEY_HERE

In [2]:
# specify the "chapter" of the `lxml` library you want to import,
# in this case, `etree`, which stands for "ElementTree"
from lxml import etree

# import the `csv` library, to create our output file
import csv

In [3]:
# # UNCOMMENT BELOW TO USE WITH GOOGLE COLAB
# # Import PyDrive and associated libraries.
# # This only needs to be done once per notebook.
# # Documentation found here: https://colab.research.google.com/notebooks/io.ipynb#scrollTo=7taylj9wpsA2
# from pydrive.auth import GoogleAuth
# from pydrive.drive import GoogleDrive
# from google.colab import auth
# from oauth2client.client import GoogleCredentials

# # Authenticate and create the PyDrive client.
# # This only needs to be done once per notebook.
# auth.authenticate_user()
# gauth = GoogleAuth()
# gauth.credentials = GoogleCredentials.get_application_default()
# drive = GoogleDrive(gauth)

In [4]:
# # UNCOMMENT BELOW TO USE WITH GOOGLE COLAB
# # Link to data file stored in Drive: https://drive.google.com/file/d/17Gh6YeNbnXEk40GKeGIogsF1jzMhdYKK/view?usp=sharing
# file_id = '17Gh6YeNbnXEk40GKeGIogsF1jzMhdYKK' # notice where this string comes from in link above

# imported_file = drive.CreateFile({'id': file_id}) # creating an accessible copy of the shared data file
# print(imported_file['title'])  # it should print the title of desired file
# imported_file.GetContentFile(imported_file['title']) # refer to it in this notebook by the same name as it has in Drive

In [5]:
# choose a filename
filename = "U6_FRED_data"

# open our data file in read format, using "rb" as the "mode"
xml_source_file = open(filename+".xml","rb")

In [6]:
# pass our xml_source_file as an ingredient to the the `lxml` library's
# `etree.parse()` method and store the result in a variable called `xml_doc`
xml_doc = etree.parse(xml_source_file)

In [7]:
# start by getting the current xml document's "root" element
document_root = xml_doc.getroot()

In [8]:
# let's print it out to see what it looks like
print(etree.tostring(document_root))

b'<observations realtime_start="2020-12-29" realtime_end="2020-12-29" observation_start="1600-01-01" observation_end="9999-12-31" units="lin" output_type="1" file_type="xml" order_by="observation_date" sort_order="asc" count="323" offset="0" limit="100000">\n  <observation realtime_start="2020-12-29" realtime_end="2020-12-29" date="1994-01-01" value="11.7"/>\n  <observation realtime_start="2020-12-29" realtime_end="2020-12-29" date="1994-02-01" value="11.4"/>\n  <observation realtime_start="2020-12-29" realtime_end="2020-12-29" date="1994-03-01" value="11.5"/>\n  <observation realtime_start="2020-12-29" realtime_end="2020-12-29" date="1994-04-01" value="11.3"/>\n  <observation realtime_start="2020-12-29" realtime_end="2020-12-29" date="1994-05-01" value="10.9"/>\n  <observation realtime_start="2020-12-29" realtime_end="2020-12-29" date="1994-06-01" value="11.0"/>\n  <observation realtime_start="2020-12-29" realtime_end="2020-12-29" date="1994-07-01" value="10.8"/>\n  <observation realt

In [9]:
# confirm that `document_root` is a well-formed XML element
if etree.iselement(document_root):

    # create our output file, naming it "xml_"+filename+".csv
    output_file = open("xml_"+filename+".csv","w")

    # use the `csv` library's "writer" recipe to easily write rows of data
    # to `output_file`, instead of reading data *from* it
    output_writer = csv.writer(output_file)

    # grab the first element of our xml document (using `document_root[0]`)
    # and write its attribute keys as column headers to our output file
    output_writer.writerow(document_root[0].attrib.keys())

    # now, we need to loop through every element in our XML file
    for child in document_root:

        # now we'll use the `.values()` method to get each element's values
        # as a list, and then use that directly with the `writerow` recipe
        output_writer.writerow(child.attrib.values())

    # officially close the `.csv` file we just wrote all that data to
    output_file.close()

In [10]:
# # UNCOMMENT BELOW TO USE WITH GOOGLE COLAB
# from google.colab import files

# files.download("xml_"+filename+".csv")