In [71]:
%matplotlib inline
import requests
import xml.etree.ElementTree as et
from xml.parsers.expat import ExpatError
import os
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import datetime
import urllib.request as urllib2

In [72]:
#---- Scrape XML From Treasury Website ----#
year  = 2023
curpath = os.path.dirname(os.path.realpath("./yield"))
data_dir = data_dir = curpath + '/data/xml'
while (year <=2023):
    filename = os.path.join(data_dir, '%s.xml' %year )
    resp = urllib2.urlopen('http://data.treasury.gov/feed.svc/DailyTreasuryYieldCurveRateData?$filter=year(NEW_DATE)%20eq%20'+ str(year))
    xml = resp.read()
    with open(filename, "wb") as file:
        file.write(xml)
        print("Pulled file: %s" % filename)    
    year += 1

Pulled file: /Users/fujiokaken/Docs/Github/yieldcurvedata/data/xml/2023.xml


In [73]:
#-------- Retrieve XML Files --------#
# Current  directory:
curpath = os.path.dirname( os.path.abspath("./yield/"))

# Traverse a directory
data_dir = curpath + '/data/'
def traverse_directory(directory):
    files = [directory+f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]
    return files

In [74]:
# ----Prepend Namespaces for Retrieval ----#
def prepend_atom(s):
    return '{http://www.w3.org/2005/Atom}' + s

def prepend_schema(s):
    return '{http://schemas.microsoft.com/ado/2007/08/dataservices/metadata}%s' % s

def prepend_ds(s):
    return '{http://schemas.microsoft.com/ado/2007/08/dataservices}%s' % s

In [75]:
# ----Build a Dictionary of All Entries from 1990 to Present  ----#
def build_entries_dict(count, dentries, root):
    for elem in root.findall(prepend_atom('entry')):
        count +=1
        dentry = {}
        properties = elem.find(prepend_atom('content')).find(prepend_schema('properties'))
        iid = elem.find(prepend_atom('content')).find(prepend_schema('properties')).find(prepend_ds('Id')).text
        date = elem.find(prepend_atom('content')).find(prepend_schema('properties')).find(prepend_ds('NEW_DATE')).text
        bc_1month = elem.find(prepend_atom('content')).find(prepend_schema('properties')).find(prepend_ds('BC_1MONTH')).text
        bc_3month = elem.find(prepend_atom('content')).find(prepend_schema('properties')).find(prepend_ds('BC_3MONTH')).text
        bc_6month = elem.find(prepend_atom('content')).find(prepend_schema('properties')).find(prepend_ds('BC_6MONTH')).text
        bc_1yr = elem.find(prepend_atom('content')).find(prepend_schema('properties')).find(prepend_ds('BC_1YEAR')).text
        bc_2yr = elem.find(prepend_atom('content')).find(prepend_schema('properties')).find(prepend_ds('BC_2YEAR')).text
        bc_3yr = elem.find(prepend_atom('content')).find(prepend_schema('properties')).find(prepend_ds('BC_3YEAR')).text
        bc_5yr = elem.find(prepend_atom('content')).find(prepend_schema('properties')).find(prepend_ds('BC_5YEAR')).text
        bc_7yr = elem.find(prepend_atom('content')).find(prepend_schema('properties')).find(prepend_ds('BC_7YEAR')).text
        bc_10yr = elem.find(prepend_atom('content')).find(prepend_schema('properties')).find(prepend_ds('BC_10YEAR')).text
        bc_30yr = elem.find(prepend_atom('content')).find(prepend_schema('properties')).find(prepend_ds('BC_30YEAR')).text
        bc_30yr_display = elem.find(prepend_atom('content')).find(prepend_schema('properties')).find(prepend_ds('BC_30YEARDISPLAY')).text
        dentry['date'] = date
        dentry['1m'] = bc_1month
        dentry['3m'] = bc_3month
        dentry['6m'] = bc_6month
        dentry['1y'] = bc_1yr
        dentry['2y'] = bc_2yr
        dentry['3y'] = bc_3yr
        dentry['5y'] = bc_5yr
        dentry['7y'] = bc_7yr
        dentry['10y'] = bc_10yr
        dentry['30y'] = bc_30yr
        dentries[iid] = dentry
    return dentries

In [76]:
# ---- Parse the XML to create a Dict of All Data ---#
def parse_xml(filename, count, dentries):
    try:
        #Parse the given XML file:
        tree = et.parse(filename)
    except ExpatError as e:
        print ("[XML] Error (line %d): %d" % (e.lineno, e.code))
        print ("[XML] Offset: %d" % (e.offset))
        raise e
    except IOError as e:
        print ("[XML] I/O Error %d: %s" % (e.errno, e.strerror))
        raise e
    else:
        root = tree.getroot()
        dentries = build_entries_dict(count, dentries, root)
    return dentries

In [77]:
#-------- Main --------#
count = 0
dentries = {}
curpath = os.path.dirname( os.path.abspath("./yield"))
data_dir = curpath + '/data/xml/'
xml_files = traverse_directory(data_dir)


In [78]:
for f in xml_files:
    dentries = parse_xml(f, count, dentries)

ParseError: not well-formed (invalid token): line 6, column 45 (<string>)

In [66]:
# ---- Load JSON Into Pandas DataFrame ----#
# Convert dict to JSON
json_entries = json.dumps(dentries)


In [68]:
# Create a Panda Dataframe
df = pd.read_json(json_entries, convert_dates=True, convert_axes=True, orient='index')


In [70]:
df = df[['date','1m', '3m', '6m', '1y', '2y', '3y', '5y', '7y', '10y', '30y']]


KeyError: "None of [Index(['date', '1m', '3m', '6m', '1y', '2y', '3y', '5y', '7y', '10y', '30y'], dtype='object')] are in the [columns]"

In [None]:
df['10y/6m'] = df['10y']/df['6m']
df['10y/1y'] = df['10y']/df['1y']
df['10y/2y'] = df['10y']/df['2y']
df['10y/3y'] = df['10y']/df['3y']
df['10y/5y'] = df['10y']/df['5y']
df['10y/7y'] = df['10y']/df['7y']
df['30y/10y'] = df['30y']/df['10y']
df.head()