# Working with filing data from the SEC's EDGAR service

In [21]:
import warnings
warnings.filterwarnings('ignore')

In [22]:
%matplotlib inline

from pathlib import Path
from datetime import date
import json
from io import BytesIO
from zipfile import ZipFile, BadZipFile
from tqdm import tqdm
import requests

import pandas_datareader.data as web
import pandas as pd

from pprint import pprint

import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker

In [23]:
sns.set_style('whitegrid')

In [40]:
# store data in this directory since we won't use it in other chapters
data_path = Path('data') # perhaps set to external harddrive to accomodate large amount of data
if not data_path.exists():
    data_path.mkdir()

## Download FS & Notes Data

The following code downloads and extracts all historical filings contained in the [Financial Statement and Notes](https://www.sec.gov/dera/data/financial-statement-and-notes-data-set.html) (FSN) datasets from Q1/2014 through Q3/2020. 

> The SEC has moved to a monthly cadence after Q3/2020; feel free to extend the code by creating the correpsonding file names (see linked website) and download those as well.

**Downloads over 40GB of data!**

In [36]:
SEC_URL = 'https://www.sec.gov/'
FSN_PATH = 'files/dera/data/financial-statement-and-notes-data-sets/'

In [38]:
filing_periods = [(d.year, d.quarter) for d in pd.date_range('2014', '2024-01-30', freq='Q')]
filing_periods

[(2014, 1),
 (2014, 2),
 (2014, 3),
 (2014, 4),
 (2015, 1),
 (2015, 2),
 (2015, 3),
 (2015, 4),
 (2016, 1),
 (2016, 2),
 (2016, 3),
 (2016, 4),
 (2017, 1),
 (2017, 2),
 (2017, 3),
 (2017, 4),
 (2018, 1),
 (2018, 2),
 (2018, 3),
 (2018, 4),
 (2019, 1),
 (2019, 2),
 (2019, 3),
 (2019, 4),
 (2020, 1),
 (2020, 2),
 (2020, 3),
 (2020, 4),
 (2021, 1),
 (2021, 2),
 (2021, 3),
 (2021, 4),
 (2022, 1),
 (2022, 2),
 (2022, 3),
 (2022, 4),
 (2023, 1),
 (2023, 2),
 (2023, 3),
 (2023, 4)]

In [46]:
from requests.exceptions import RequestException
from zipfile import BadZipFile, ZipFile
from io import BytesIO

for yr, qtr in tqdm(filing_periods):
    # set (and create) directory
    path = data_path / f'{yr}_{qtr}' / 'source'
    if not path.exists():
        path.mkdir(parents=True)
    
    # define url and get file
    filing = f'{yr}q{qtr}_notes.zip'
    url = SEC_URL + FSN_PATH + filing
    
    try:
        response = requests.get(url)
        if response.status_code != 200:
            print(f"Failed to download: {url}, Status Code: {response.status_code}")
            continue

        # decompress and save
        with ZipFile(BytesIO(response.content)) as zip_file:
            for file in zip_file.namelist():
                local_file = path / file
                if local_file.exists():
                    continue
                with local_file.open('wb') as output:
                    for line in zip_file.open(file).readlines():
                        output.write(line)

    except RequestException as e:
        print(f"Request failed: {e}")
    except BadZipFile:
        print(f'\nBad zip file: {yr} {qtr}\n')


  0%|          | 0/40 [00:00<?, ?it/s]

https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2014q1_notes.zip


  5%|▌         | 2/40 [00:01<00:17,  2.21it/s]

Failed to download: https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2014q1_notes.zip, Status Code: 403
https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2014q2_notes.zip
Failed to download: https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2014q2_notes.zip, Status Code: 403
https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2014q3_notes.zip


 10%|█         | 4/40 [00:01<00:09,  3.87it/s]

Failed to download: https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2014q3_notes.zip, Status Code: 403
https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2014q4_notes.zip
Failed to download: https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2014q4_notes.zip, Status Code: 403
https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2015q1_notes.zip


 15%|█▌        | 6/40 [00:01<00:07,  4.84it/s]

Failed to download: https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2015q1_notes.zip, Status Code: 403
https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2015q2_notes.zip
Failed to download: https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2015q2_notes.zip, Status Code: 403
https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2015q3_notes.zip


 20%|██        | 8/40 [00:01<00:05,  5.49it/s]

Failed to download: https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2015q3_notes.zip, Status Code: 403
https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2015q4_notes.zip
Failed to download: https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2015q4_notes.zip, Status Code: 403
https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2016q1_notes.zip


 25%|██▌       | 10/40 [00:02<00:04,  6.02it/s]

Failed to download: https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2016q1_notes.zip, Status Code: 403
https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2016q2_notes.zip
Failed to download: https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2016q2_notes.zip, Status Code: 403
https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2016q3_notes.zip


 30%|███       | 12/40 [00:02<00:04,  6.58it/s]

Failed to download: https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2016q3_notes.zip, Status Code: 403
https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2016q4_notes.zip
Failed to download: https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2016q4_notes.zip, Status Code: 403
https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2017q1_notes.zip


 35%|███▌      | 14/40 [00:02<00:03,  6.80it/s]

Failed to download: https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2017q1_notes.zip, Status Code: 403
https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2017q2_notes.zip
Failed to download: https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2017q2_notes.zip, Status Code: 403
https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2017q3_notes.zip


 40%|████      | 16/40 [00:03<00:03,  6.70it/s]

Failed to download: https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2017q3_notes.zip, Status Code: 403
https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2017q4_notes.zip
Failed to download: https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2017q4_notes.zip, Status Code: 403
https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2018q1_notes.zip


 45%|████▌     | 18/40 [00:03<00:03,  7.08it/s]

Failed to download: https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2018q1_notes.zip, Status Code: 403
https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2018q2_notes.zip
Failed to download: https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2018q2_notes.zip, Status Code: 403
https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2018q3_notes.zip


 50%|█████     | 20/40 [00:03<00:02,  6.67it/s]

Failed to download: https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2018q3_notes.zip, Status Code: 403
https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2018q4_notes.zip
Failed to download: https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2018q4_notes.zip, Status Code: 403
https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2019q1_notes.zip


 55%|█████▌    | 22/40 [00:04<00:02,  6.66it/s]

Failed to download: https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2019q1_notes.zip, Status Code: 403
https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2019q2_notes.zip
Failed to download: https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2019q2_notes.zip, Status Code: 403
https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2019q3_notes.zip


 60%|██████    | 24/40 [00:04<00:02,  6.69it/s]

Failed to download: https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2019q3_notes.zip, Status Code: 403
https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2019q4_notes.zip
Failed to download: https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2019q4_notes.zip, Status Code: 403
https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2020q1_notes.zip


 65%|██████▌   | 26/40 [00:04<00:02,  6.83it/s]

Failed to download: https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2020q1_notes.zip, Status Code: 403
https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2020q2_notes.zip
Failed to download: https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2020q2_notes.zip, Status Code: 403
https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2020q3_notes.zip


 70%|███████   | 28/40 [00:04<00:01,  6.57it/s]

Failed to download: https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2020q3_notes.zip, Status Code: 403
https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2020q4_notes.zip
Failed to download: https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2020q4_notes.zip, Status Code: 403
https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2021q1_notes.zip


 75%|███████▌  | 30/40 [00:05<00:01,  6.69it/s]

Failed to download: https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2021q1_notes.zip, Status Code: 403
https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2021q2_notes.zip
Failed to download: https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2021q2_notes.zip, Status Code: 403
https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2021q3_notes.zip


 80%|████████  | 32/40 [00:05<00:01,  6.60it/s]

Failed to download: https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2021q3_notes.zip, Status Code: 403
https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2021q4_notes.zip
Failed to download: https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2021q4_notes.zip, Status Code: 403
https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2022q1_notes.zip


 85%|████████▌ | 34/40 [00:05<00:00,  6.78it/s]

Failed to download: https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2022q1_notes.zip, Status Code: 403
https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2022q2_notes.zip
Failed to download: https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2022q2_notes.zip, Status Code: 403
https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2022q3_notes.zip


 90%|█████████ | 36/40 [00:06<00:00,  6.85it/s]

Failed to download: https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2022q3_notes.zip, Status Code: 403
https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2022q4_notes.zip
Failed to download: https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2022q4_notes.zip, Status Code: 403
https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2023q1_notes.zip


 92%|█████████▎| 37/40 [00:06<00:00,  6.34it/s]

Failed to download: https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2023q1_notes.zip, Status Code: 403
https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2023q2_notes.zip


 98%|█████████▊| 39/40 [00:06<00:00,  6.13it/s]

Failed to download: https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2023q2_notes.zip, Status Code: 403
https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2023q3_notes.zip
Failed to download: https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2023q3_notes.zip, Status Code: 403
https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2023q4_notes.zip


100%|██████████| 40/40 [00:06<00:00,  5.90it/s]

Failed to download: https://www.sec.gov/files/dera/data/financial-statement-and-notes-data-sets/2023q4_notes.zip, Status Code: 403





## Save to parquet

The data is fairly large and to enable faster access than the original text files permit, it is better to convert the text files to binary, columnar parquet format (see Section 'Efficient data storage with pandas' in chapter 2 for a performance comparison of various data-storage options compatible with pandas DataFrames):

> Some fo the `txt.tsv` source files contain a small number of faulty lines; the code below drops those lines but indicates the line numbers where you can find the errors if you would like to investigate further. 

In [47]:
from pathlib import Path
tsv_files = list(data_path.glob('**/*.tsv'))
print(tsv_files)  # Check if this list is empty


[PosixPath('data/2014_2/source/2014q2_notes/ren.tsv'), PosixPath('data/2014_2/source/2014q2_notes/pre.tsv'), PosixPath('data/2014_2/source/2014q2_notes/sub.tsv'), PosixPath('data/2014_2/source/2014q2_notes/txt.tsv'), PosixPath('data/2014_2/source/2014q2_notes/cal.tsv'), PosixPath('data/2014_2/source/2014q2_notes/tag.tsv'), PosixPath('data/2014_2/source/2014q2_notes/dim.tsv'), PosixPath('data/2014_2/source/2014q2_notes/num.tsv'), PosixPath('data/2014_1/source/2014q1_notes/ren.tsv'), PosixPath('data/2014_1/source/2014q1_notes/pre.tsv'), PosixPath('data/2014_1/source/2014q1_notes/sub.tsv'), PosixPath('data/2014_1/source/2014q1_notes/txt.tsv'), PosixPath('data/2014_1/source/2014q1_notes/cal.tsv'), PosixPath('data/2014_1/source/2014q1_notes/tag.tsv'), PosixPath('data/2014_1/source/2014q1_notes/dim.tsv'), PosixPath('data/2014_1/source/2014q1_notes/num.tsv'), PosixPath('data/2018_3/source/2018q3_notes/ren.tsv'), PosixPath('data/2018_3/source/2018q3_notes/pre.tsv'), PosixPath('data/2018_3/sour

In [52]:
for f in tqdm(sorted(list(data_path.glob('**/*.tsv')))):
    # set (and create) directory
    parquet_path = f.parent.parent / 'parquet'
    if not parquet_path.exists():
        parquet_path.mkdir(parents=True)    

    # write content to .parquet
    file_name = f.stem  + '.parquet'
    if not (parquet_path / file_name).exists():
        try:
            df = pd.read_csv(f, sep='\t', encoding='latin1', low_memory=False, on_bad_lines='skip')
            df.to_parquet(parquet_path / file_name)
        except Exception as e:
            print(e, ' | ', f)
        # optional: uncomment to delete original .tsv
    #else:
     #   f.unlink

100%|██████████| 24/24 [09:02<00:00, 22.59s/it]


## Metadata json

In [54]:
file = data_path / '2018_3' / 'source' / '2018q3_notes-metadata.json'
with file.open() as f:
    data = json.load(f)

pprint(data)

{'@context': 'http://www.w3.org/ns/csvw',
 'dialect': {'delimiter': '\t', 'header': True, 'headerRowCount': 1},
 'tables': [{'tableSchema': {'aboutUrl': 'readme.htm',
                             'columns': [{'datatype': {'base': 'string',
                                                       'maxLength': 20,
                                                       'minLength': 20},
                                          'dc:description': 'Accession Number. '
                                                            'The 20-character '
                                                            'string formed '
                                                            'from the 18-digit '
                                                            'number assigned '
                                                            'by the Commission '
                                                            'to each EDGAR '
                                                           

## Data Organization

For each quarter, the FSN data is organized into eight file sets that contain information about submissions, numbers, taxonomy tags, presentation, and more. Each dataset consists of rows and fields and is provided as a tab-delimited text file:

| File | Dataset      | Description                                                 |
|------|--------------|-------------------------------------------------------------|
| SUB  | Submission   | Identifies each XBRL submission by company, form, date, etc |
| TAG  | Tag          | Defines and explains each taxonomy tag                      |
| DIM  | Dimension    | Adds detail to numeric and plain text data                  |
| NUM  | Numeric      | One row for each distinct data point in filing              |
| TXT  | Plain Text   | Contains all non-numeric XBRL fields                        |
| REN  | Rendering    | Information for rendering on SEC website                    |
| PRE  | Presentation | Detail on tag and number presentation in primary statements |
| CAL  | Calculation  | Shows arithmetic relationships among tags                   |

## Submission Data

The latest submission file contains around 6,500 entries.

In [56]:
sub = pd.read_parquet(data_path / '2018_3' / 'parquet' / 'sub.parquet')
sub.info()

FileNotFoundError: [Errno 2] No such file or directory: 'data/2018_3/parquet/sub.parquet'

### Get AAPL submission

The submission dataset contains the unique identifiers required to retrieve the filings: the Central Index Key (CIK) and the Accession Number (adsh). The following shows some of the information about Apple's 2018Q1 10-Q filing:

In [None]:
name = 'APPLE INC'
apple = sub[sub.name == name].T.dropna().squeeze()
key_cols = ['name', 'adsh', 'cik', 'name', 'sic', 'countryba', 'stprba',
            'cityba', 'zipba', 'bas1', 'form', 'period', 'fy', 'fp', 'filed']
apple.loc[key_cols]

## Build AAPL fundamentals dataset

Using the central index key, we can identify all historical quarterly filings available for Apple, and combine this information to obtain 26 Forms 10-Q and nine annual Forms 10-K.

### Get filings

In [None]:
aapl_subs = pd.DataFrame()
for sub in data_path.glob('**/sub.parquet'):
    sub = pd.read_parquet(sub)
    aapl_sub = sub[(sub.cik.astype(int) == apple.cik) & (sub.form.isin(['10-Q', '10-K']))]
    aapl_subs = pd.concat([aapl_subs, aapl_sub])

We find 15 quarterly 10-Q and 4 annual 10-K reports:

In [None]:
aapl_subs.form.value_counts()

### Get numerical filing data

With the Accession Number for each filing, we can now rely on the taxonomies to select the appropriate XBRL tags (listed in the TAG file) from the NUM and TXT files to obtain the numerical or textual/footnote data points of interest.

First, let's extract all numerical data available from the 19 Apple filings:

In [None]:
aapl_nums = pd.DataFrame()
for num in data_path.glob('**/num.parquet'):
    num = pd.read_parquet(num).drop('dimh', axis=1)
    aapl_num = num[num.adsh.isin(aapl_subs.adsh)]
    print(len(aapl_num))
    aapl_nums = pd.concat([aapl_nums, aapl_num])
aapl_nums.ddate = pd.to_datetime(aapl_nums.ddate, format='%Y%m%d')   
aapl_nums.to_parquet(data_path / 'aapl_nums.parquet')

In total, the nine years of filing history provide us with over 18,000 numerical values for AAPL.

In [None]:
aapl_nums.info()

## Create P/E Ratio from EPS and stock price data

We can select a useful field, such as Earnings per Diluted Share (EPS), that we can combine with market data to calculate the popular Price/Earnings (P/E) valuation ratio.

In [None]:
stock_split = 7
split_date = pd.to_datetime('20140604')
split_date

We do need to take into account, however, that Apple split its stock 7:1 on June 4, 2014, and Adjusted Earnings per Share before the split to make earnings comparable, as illustrated in the following code block:

In [None]:
# Filter by tag; keep only values measuring 1 quarter
eps = aapl_nums[(aapl_nums.tag == 'EarningsPerShareDiluted')
                & (aapl_nums.qtrs == 1)].drop('tag', axis=1)

# Keep only most recent data point from each filing
eps = eps.groupby('adsh').apply(lambda x: x.nlargest(n=1, columns=['ddate']))

# Adjust earnings prior to stock split downward
eps.loc[eps.ddate < split_date,'value'] = eps.loc[eps.ddate < split_date, 'value'].div(7)
eps = eps[['ddate', 'value']].set_index('ddate').squeeze().sort_index()
eps = eps.rolling(4,min_periods=4).sum().dropna()

In [None]:
eps.plot(lw=2, figsize=(14, 6), title='Diluted Earnings per Share')
plt.xlabel('')
plt.savefig('diluted eps', dps=300);

In [None]:
symbol = 'AAPL.US'

aapl_stock = (web.
              DataReader(symbol, 'quandl', start=eps.index.min())
              .resample('D')
              .last()
             .loc['2014':eps.index.max()])
aapl_stock.info()

In [None]:
pe = aapl_stock.AdjClose.to_frame('price').join(eps.to_frame('eps'))
pe = pe.fillna(method='ffill').dropna()
pe['P/E Ratio'] = pe.price.div(pe.eps)
pe['P/E Ratio'].plot(lw=2, figsize=(14, 6), title='TTM P/E Ratio');

In [None]:
pe.info()

In [None]:
axes = pe.plot(subplots=True, figsize=(16,8), legend=False, lw=2)
axes[0].set_title('Adj. Close Price')
axes[1].set_title('Diluted Earnings per Share')
axes[2].set_title('Trailing P/E Ratio')
plt.tight_layout();

## Explore Additional Fields

The field `tag` references values defined in the taxonomy:

In [None]:
aapl_nums.tag.value_counts()

We can select values of interest and track their value or use them as inputs to compute fundamental metrics like the Dividend/Share ratio.

### Dividends per Share

In [None]:
fields = ['EarningsPerShareDiluted',
          'PaymentsOfDividendsCommonStock',
          'WeightedAverageNumberOfDilutedSharesOutstanding',
          'OperatingIncomeLoss',
          'NetIncomeLoss',
          'GrossProfit']

In [None]:
dividends = (aapl_nums
             .loc[aapl_nums.tag == 'PaymentsOfDividendsCommonStock', ['ddate', 'value']]
             .groupby('ddate')
             .mean())
shares = (aapl_nums
          .loc[aapl_nums.tag == 'WeightedAverageNumberOfDilutedSharesOutstanding', ['ddate', 'value']]
          .drop_duplicates()
          .groupby('ddate')
          .mean())
df = dividends.div(shares).dropna()
ax = df.plot.bar(figsize=(14, 5), title='Dividends per Share', legend=False)
ax.xaxis.set_major_formatter(mticker.FixedFormatter(df.index.strftime('%Y-%m')))

## Bonus: Textual Information

In [None]:
txt = pd.read_parquet(data_path / '2016_2' / 'parquet' /  'txt.parquet')

AAPL's adsh is not avaialble in the txt file but you can obtain notes from the financial statements here:

In [None]:
txt.head()