In [1]:
import pandas as pd
import requests
from xml.etree import ElementTree as ET

# 01-Investment Company Series and Class Information.ipynb
***R Persichitte***

***7/14/2020***

## Purpose
This section of code imports a list of all investment companies. Investment companies are now required to file form N-PORT. This information will be useful in finding company filings.

Information about this form and how to use it can be found at the [SEC website](https://www.sec.gov/open/datasets-investment_company.html).

## Method

- 1) Download information from [SEC listing of investment companies](https://www.sec.gov/files/investment/data/other/investment-company-series-and-class-information/investment_company_series_class_2019.xml).
- 2) Use ElementTree to parse the results into a dataframe.
- 3) Use the `to_pickle()` method to save the results for future use.

In [2]:
link = 'https://www.sec.gov/files/investment/data/other/investment-company-series-and-class-information/investment_company_series_class_2019.xml'

def download_data(link):
    '''Use the link field to specify an XML document online.
    This code will return it as an ElementTree'''
    r = requests.get(link)
    assert(r), r
    return ET.fromstring(r.text)

In [3]:
xml = download_data(link)

In [4]:
all_companies = []
for company in list(xml):
    t_dict = {}
    for e in list(company):
        t_dict[e.tag] = e.text
    all_companies += [t_dict]
df = pd.DataFrame(all_companies)

In [5]:
df.to_pickle('Investment Company Series and Class Information.p')