In [9]:
import pandas as pd
import requests
from xml.etree import ElementTree as ET
from io import StringIO
from bs4 import BeautifulSoup

The information in this document takes an XML target for an NPORT form and returns information about it.

<hr>
The following block of code imports the pickle file from 01 then selects a sample. The sample is selected randomly, but by setting the seed, the sample will the be same each time this block of code is run.

In [2]:
df = pd.read_pickle('Investment Company Series and Class Information.p')
sample = df.sample(random_state=35)
sample

Unnamed: 0,reporting_file_number,cik,entity_name,entity_org_type,series_id,series_name,class_id,class_name,class_ticker,address_1,city,zip_code,state,address_2
15962,811-04347,772129,GMO TRUST,30,S000005487,GMO Global Asset Allocation Fund,C000213377,Class I,[NULL],40 ROWES WHARF,BOSTON,2110,MA,[NULL]


I manually found an example N-PORT filing for this company. This example is the [NPORT monthly filing from November 30, 2019](https://www.sec.gov/Archives/edgar/data/772129/000175272420015077/0001752724-20-015077-index.htm). This block of code imports that into an ElementTree

In [21]:
link = 'https://www.sec.gov/Archives/edgar/data/772129/000175272420015077/primary_doc.xml'

r = requests.get(link)
assert(r), r
xml = ET.fromstring(r.text)

# Remove namespace tags
xml = ET.iterparse(StringIO(r.text))
for _, el in xml:
    _, _, el.tag = el.tag.rpartition('}') # strip ns
xml = xml.root

In [19]:
list(xml.find('formData'))

[<Element 'genInfo' at 0x000002374144E728>,
 <Element 'fundInfo' at 0x000002374144BD18>,
 <Element 'invstOrSecs' at 0x000002374147A2C8>,
 <Element 'explntrNotes' at 0x00000237414CCA98>,
 <Element 'signature' at 0x00000237414CCC28>]

In this block of code, I build the portion of the function that accepts the xml ElementTree then extracts information about the investments --`invstOrSecs`--they hold.

In [22]:

secs = xml.find('formData').find('invstOrSecs')
all_secs = []
for s in secs:
    t_dict = {}
    for e in s:
        t_dict[e.tag] = e.text 
    all_secs += [t_dict]
res = pd.DataFrame(all_secs)
        
# Extra data cleaning to convert strings to numbers
num_cols = ['valUSD','balance','pctVal']

for c in num_cols:
    res[c] = res[c].astype(float)

res.head()

Unnamed: 0,name,lei,title,cusip,identifiers,balance,units,currencyConditional,valUSD,pctVal,payoffProfile,assetCat,issuerCat,invCountry,isRestrictedSec,fairValLevel,securityLending,curCd,issuerConditional,derivativeInfo
0,Ebara Corp,353800UV8U3SOMCDTI49,Ebara Corp,000000000,\n,35900.0,NS,,1080297.91,0.915719,Long,EC,CORP,JP,N,2,\n,,,
1,Bakkafrost P/F,2138007LH7OP4V112978,Bakkafrost P/F,000000000,\n,704.0,NS,,47197.02,0.040007,Long,EC,CORP,FO,N,2,\n,,,
2,METAWATER Co Ltd,,METAWATER Co Ltd,000000000,\n,4300.0,NS,,156886.67,0.132986,Long,EC,CORP,JP,N,2,\n,,,
3,Tongaat Hulett Ltd,,Tongaat Hulett Ltd,000000000,\n,19722.0,NS,,8891.42,0.007537,Long,EC,CORP,ZA,N,3,\n,,,
4,Exelon Corp,3SOUA6IRML7435B56G12,Exelon Corp,30161N101,\n,26500.0,NS,,1176600.0,0.99735,Long,EC,CORP,US,N,1,\n,USD,,


To test, pulled information from fund info and manually compared it with the HTML form.

In [53]:
fund_info = {}
for e in list(xml.find('formData').find('fundInfo')):
    fund_info[e.tag] = e.text

print(fund_info)

{'totAssets': '118169763.36', 'totLiabs': '197117.91', 'netAssets': '117972645.45', 'assetsAttrMiscSec': '0.00000000', 'assetsInvested': '0.00000000', 'amtPayOneYrBanksBorr': '0.00000000', 'amtPayOneYrCtrldComp': '0.00000000', 'amtPayOneYrOthAffil': '0.00000000', 'amtPayOneYrOther': '0.00000000', 'amtPayAftOneYrBanksBorr': '0.00000000', 'amtPayAftOneYrCtrldComp': '0.00000000', 'amtPayAftOneYrOthAffil': '0.00000000', 'amtPayAftOneYrOther': '0.00000000', 'delayDeliv': '0.00000000', 'standByCommit': '0.00000000', 'liquidPref': '0.00000000', 'cshNotRptdInCorD': '113313.02000000', 'isNonCashCollateral': 'N', 'returnInfo': '\n        ', 'mon1Flow': None, 'mon2Flow': None, 'mon3Flow': None}


To test completeness, I tied out the securities held to the total assets of the fund.

First, to understand the total percent of assets in investments, I calculate the total invested as a percent of the fund.
Second, I multiply that amount by total assets.
Third, I compare the total assets of the fund, compared to the assets reported.

In [70]:
pct_invested = res['pctVal'].sum()/100
print('{:.2%} of the fund is invested.'.format(pct_invested))

total_invested = float(fund_info['netAssets']) * pct_invested
print('${:,.2f} is invested based on netAssets * pct_invested.'.format(total_invested))

print('${:,.2f} is invested based on adding investments.'.format(res['valUSD'].sum()))

99.96% of the fund is invested.
$117,928,094.40 is invested based on totAssets * pct_invested.
$117,928,094.40 is invested based on adding investments.


In [67]:
res

Unnamed: 0,name,lei,title,cusip,identifiers,balance,units,currencyConditional,valUSD,pctVal,payoffProfile,assetCat,issuerCat,invCountry,isRestrictedSec,fairValLevel,securityLending,curCd,issuerConditional,derivativeInfo
0,Ebara Corp,353800UV8U3SOMCDTI49,Ebara Corp,000000000,\n,35900.00,NS,,1080297.91,0.915719,Long,EC,CORP,JP,N,2,\n,,,
1,Bakkafrost P/F,2138007LH7OP4V112978,Bakkafrost P/F,000000000,\n,704.00,NS,,47197.02,0.040007,Long,EC,CORP,FO,N,2,\n,,,
2,METAWATER Co Ltd,,METAWATER Co Ltd,000000000,\n,4300.00,NS,,156886.67,0.132986,Long,EC,CORP,JP,N,2,\n,,,
3,Tongaat Hulett Ltd,,Tongaat Hulett Ltd,000000000,\n,19722.00,NS,,8891.42,0.007537,Long,EC,CORP,ZA,N,3,\n,,,
4,Exelon Corp,3SOUA6IRML7435B56G12,Exelon Corp,30161N101,\n,26500.00,NS,,1176600.00,0.997350,Long,EC,CORP,US,N,1,\n,USD,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
123,Renesas Electronics Corp,5299000EPC47VJQOFB81,Renesas Electronics Corp,000000000,\n,89700.00,NS,,585127.01,0.495985,Long,EC,CORP,JP,N,2,\n,,,
124,Arcadis NV,7245000UZH70GO047N03,Arcadis NV,000000000,\n,46972.00,NS,,955399.71,0.809849,Long,EC,CORP,NL,N,2,\n,,,
125,K+S AG,529900YURAYD4IJX2J91,K+S AG,000000000,\n,39459.00,NS,,445220.39,0.377393,Long,EC,CORP,DE,N,2,\n,,,
126,GMO US Treasury Fund,0NIBQB0NDRTFX04R2J69,GMO US Treasury Fund,362013369,\n,115414.97,NS,,577074.85,0.489160,Long,STIV,RF,US,N,1,\n,USD,,


The import appears to have succeeded. I will use the code to make a function.

<hr>

The following block of code declares a class for future use.

In [72]:
class download_NPORT:
    def __init__(self, link):
        r = requests.get(link)
        assert(r), r
        xml = ET.fromstring(r.text)
        
        # Remove namespace tags
        xml = ET.iterparse(StringIO(r.text))
        for _, el in xml:
            _, _, el.tag = el.tag.rpartition('}') # strip ns
        self.xml = xml.root
    def parseSecurities(self):
        secs = self.xml.find('formData').find('invstOrSecs')
        all_secs = []
        for s in secs:
            t_dict = {}
            for e in s:
                t_dict[e.tag] = e.text 
            all_secs += [t_dict]
        res = pd.DataFrame(all_secs)
                
        # Extra data cleaning to convert strings to numbers
        num_cols = ['valUSD','balance','pctVal']
        
        for c in num_cols:
            res[c] = res[c].astype(float)
        
        return res
    def parseFundInfo(self):
        fund_info = {}
        for e in list(self.xml.find('formData').find('fundInfo')):
            fund_info[e.tag] = e.text
        
        return fund_info

Test the newly declared class.

In [74]:
test = download_NPORT(link)

test.parseSecurities().head()

Unnamed: 0,name,lei,title,cusip,identifiers,balance,units,currencyConditional,valUSD,pctVal,payoffProfile,assetCat,issuerCat,invCountry,isRestrictedSec,fairValLevel,securityLending,curCd,issuerConditional,derivativeInfo
0,Ebara Corp,353800UV8U3SOMCDTI49,Ebara Corp,000000000,\n,35900.0,NS,,1080297.91,0.915719,Long,EC,CORP,JP,N,2,\n,,,
1,Bakkafrost P/F,2138007LH7OP4V112978,Bakkafrost P/F,000000000,\n,704.0,NS,,47197.02,0.040007,Long,EC,CORP,FO,N,2,\n,,,
2,METAWATER Co Ltd,,METAWATER Co Ltd,000000000,\n,4300.0,NS,,156886.67,0.132986,Long,EC,CORP,JP,N,2,\n,,,
3,Tongaat Hulett Ltd,,Tongaat Hulett Ltd,000000000,\n,19722.0,NS,,8891.42,0.007537,Long,EC,CORP,ZA,N,3,\n,,,
4,Exelon Corp,3SOUA6IRML7435B56G12,Exelon Corp,30161N101,\n,26500.0,NS,,1176600.0,0.99735,Long,EC,CORP,US,N,1,\n,USD,,


In [75]:
test.parseFundInfo()

{'totAssets': '118169763.36',
 'totLiabs': '197117.91',
 'netAssets': '117972645.45',
 'assetsAttrMiscSec': '0.00000000',
 'assetsInvested': '0.00000000',
 'amtPayOneYrBanksBorr': '0.00000000',
 'amtPayOneYrCtrldComp': '0.00000000',
 'amtPayOneYrOthAffil': '0.00000000',
 'amtPayOneYrOther': '0.00000000',
 'amtPayAftOneYrBanksBorr': '0.00000000',
 'amtPayAftOneYrCtrldComp': '0.00000000',
 'amtPayAftOneYrOthAffil': '0.00000000',
 'amtPayAftOneYrOther': '0.00000000',
 'delayDeliv': '0.00000000',
 'standByCommit': '0.00000000',
 'liquidPref': '0.00000000',
 'cshNotRptdInCorD': '113313.02000000',
 'isNonCashCollateral': 'N',
 'returnInfo': '\n        ',
 'mon1Flow': None,
 'mon2Flow': None,
 'mon3Flow': None}