In [114]:
import xml.etree.ElementTree as ET
import requests
import pandas as pd
import glob

In [34]:
def parse_element(element, parsed=None):
    """ Collect {key:attribute} and {tag:text} from thie XML
     element and all its children into a single dictionary of strings."""
    if parsed is None:
        parsed = dict()

    for key in element.keys():
        if key not in parsed:
            parsed[key] = element.attrib.get(key)
        if element.text:
            parsed[element.tag] = element.text                
        else:
            continue     

    """ Apply recursion"""
    for child in list(element):
        parse_element(child, parsed)
    return parsed

def parse_root(root):
    """Return a list of dictionaries from the text and attributes of the
    children under this XML root."""
    return [parse_element(child) for child in root.getchildren()]

## SEC Investment Adviser Report

In [None]:
file_path = 'IA_FIRM_SEC_Feed_03_04_2020.xml' # You can change the file

In [None]:
xroot  = ET.parse(file_path).getroot()

In [None]:
structure_data = parse_root(xroot.getchildren()[0])

In [103]:
structure_data = pd.DataFrame(structure_data)

In [104]:
structure_data

Unnamed: 0,SECRgnCD,FirmCrdNb,SECNb,BusNm,LegalNm,UmbrRgstn,Strt1,Strt2,City,State,...,Q5D2D,Q5D2E,Q5D2F,Q5D2G,Q5D2H,Q5D2I,Q5D2J,Q5D2K,Q5D2L,Q5D2M
0,BRO,133693,801-73527,H/2 CAPITAL PARTNERS,H/2 CREDIT MANAGER LP,Y,680 WASHINGTON BOULEVARD,SEVENTH FLOOR,STAMFORD,CT,...,,,,,,,,,,
1,DRO,145800,801-106933,FREESTATE ADVISORS LLC,FREESTATE ADVISORS LLC,N,4400 COLLEGE BLVD,SUITE 125,OVERLAND PARK,KS,...,,,,,,,,,,
2,NYRO,285650,801-108550,"MORSE ASSET MANAGEMENT, INC.","MORSE ASSET MANAGEMENT, INC.",N,"805 THIRD AVENUE,",SUITE 1120,NEW YORK,NY,...,,,,,,,,,,
3,DRO,105957,801-18270,ALBION FINANCIAL GROUP,ALBION MANAGEMENT GROUP,N,812 EAST 2100 SOUTH,,SALT LAKE CITY,UT,...,,,,,,,,,,
4,BRO,159529,801-73571,"ALTUS CAPITAL PARTNERS, INC.","ALTUS CAPITAL PARTNERS, INC.",N,10 WESTPORT ROAD SUITE C 204,,WILTON,CT,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17828,MIRO,306458,802-118208,ORLANDO OPPORTUNITY FUND MANAGEMENT LLC,ORLANDO OPPORTUNITY FUND MANAGEMENT LLC,,496 DELANEY AVENUE,SUITE 408B,ORLANDO,FL,...,,,,,,,,,,
17829,CHRO,306449,801-117914,CRESSET PARTNERS LLC,CRESSET PARTNERS LLC,Y,444 W. LAKE STREET,SUITE 4700,CHICAGO,IL,...,,,,,,,,,,
17830,HQ,306443,801-117831,XY INVESTMENTS,XY INVESTMENTS (HK) LTD,N,1 HOI WAN STREET,"20/F, UNIT 2051, QUARRY BAY",HONG KONG,,...,,,,,,,,,,
17831,NYRO,306454,801-117811,CEDAR LEGACY LLC,CEDAR LEGACY LLC,N,30 EAST 23RD STREET,11TH FLOOR,NEW YORK,NY,...,,,,,,,,,,


In [107]:
structure_data.to_csv(r'SEC_Investment_Adviser_Report.csv', index = False)

## State Investment Adviser Report

In [108]:
xroot  = ET.parse(file_path).getroot()

In [None]:
structure_data = parse_root(xroot.getchildren()[0])

In [110]:
structure_data = pd.DataFrame(structure_data)

#### structure_data

In [112]:
structure_data.to_csv(r'State_Investment_Adviser_Report.csv', index = False)

## Investment Adviser Representatives Report

In [115]:
broker_feed_list = glob.glob('./IA_Indvl_Feeds*.xml')
#broker_pdf_list = glob.glob('/home/ubuntu/Broker/*.pdf')

In [None]:
indvl_advisor_reprens_report = pd.DataFrame()
for file in broker_feed_list:
    print(file)
    xroot  = ET.parse(file).getroot()
    structure_data = parse_root(xroot.getchildren()[0])
    indvl_advisor_reprens_report = pd.concat([indvl_advisor_reprens_report,pd.DataFrame(structure_data)])

In [118]:
indvl_advisor_reprens_report.shape

(360427, 39)

In [121]:
indvl_advisor_reprens_report.head()

Unnamed: 0,CrntEmp,PrevRgstn,actvAGReg,city,cntry,desc,dsgntnNm,exmCd,exmDt,exmNm,...,regBeginDt,regCat,regEndDt,st,stDt,state,str1,str2,sufNm,toDt
0,\n,\n,Y,BOSTON,United States,1. 03/25/2011: SILVER WEALTH MANAGEMENT - AT R...,,S63,1996-02-06,Uniform Securities Agent State Law Examination,...,2004-12-17,RA,2005-07-06,APPROVED,2011-03-28,MA,"75 STATE STREET, 22ND FLOOR",,,
1,\n,,Y,NEW YORK,United States,"Bishop Hendricken High School, Not investment ...",,S63,1996-01-24,Uniform Securities Agent State Law Examination,...,,RA,,APPROVED,2011-09-07,NY,1290 AVENUE OF THE AMERICAS,,,11/2010
2,\n,,Y,MINNEAPOLIS,United States,Independent Insurance Brokering; Blue Cross Bl...,,S63,1996-02-16,Uniform Securities Agent State Law Examination,...,,RA,,APPROVED,2010-10-11,MN,707 2ND AVENUE SOUTH,Ste 500,,
3,\n,,N,HAMILTON,United States,"I AM A MEMBER OF CAMERON-ELMWOOD REALTY, LLC. ...",,S65,1995-12-26,Uniform Investment Adviser Law Examination,...,,RA,,APPROVED,2012-03-28,MA,205 WILLOW ST,,,
4,\n,\n,Y,JERSEY CITY,United States,"Insurance Broker: Health and Group Benefits,...",,S65,2005-06-16,Uniform Investment Adviser Law Examination,...,2005-06-29,RA,2018-11-02,APPROVED,2018-11-02,NJ,10 EXCHANGE PLACE,SUITE 1410,,11/2018


In [124]:
indvl_advisor_reprens_report.to_csv(r'Investment_Adviser_Representatives_Report.csv', index = False)