https://www.gresearch.co.uk/blog/article/faster-string-processing-in-pandas/

https://gist.github.com/anshoomehra/ead8925ea291e233a5aa2dcaa2dc61b2

In [1]:
import pandas as pd
import cython
import os
import re
import json
from bs4 import BeautifulSoup
from multiprocessing import Pool

In [2]:
os.chdir('/mnt/d/workspace/8-2/Financial-Statements-Text-Analysis/')

In [3]:
# params
with open('config.json', 'r') as f:
    c = json.load(f)
input_dir = os.path.join(c['DATA_DIR'], '10k_raw')
destination_dir = os.path.join(c['DATA_DIR'], '10k_clean')

In [4]:
metadata = pd.read_csv(os.path.join(c['DATA_DIR'], 'metadata.csv'))
metadata_legacy = pd.read_csv(os.path.join(c['DATA_DIR'], 'metadata_2017.csv'))

# only download the data from russell 3000 today
metadata = metadata_legacy[metadata_legacy['TICKER'].isin(metadata['ticker'])]

In [7]:
def parse_filing(text):
    regex_10k = re.search(r"(?s)(?m)<TYPE>10-K.*?(</TEXT>)", text)
    regex_items = re.compile(r"(> +Item|>Item|^Item|ITEM)((\s|&#160;|&nbsp;)(1A|1B|7A|7|8))")
    
    try: 
        extracted_text = regex_10k.group(0)
    except:
        print('❌ No 10-K match for file')
    
    item_matches = regex_items.finditer(extracted_text)
    matches_df = pd.DataFrame([(x.group(), x.start()) for x in item_matches])
    matches_df.columns = ['item', 'start']
    matches_df['item'] = matches_df['item'].str.lower().replace('>|\.| |&#160;|&nbsp|;', '', regex=True)
    matches_df = matches_df.sort_values('start').drop_duplicates(subset=['item'], keep='last')
    matches_df['end'] = matches_df['start'].shift(-1).fillna(0).astype(int)
    matches_df = matches_df[matches_df['item'].isin(['item1a', 'item1b', 'item7', 'item7a'])]
    if len(matches_df) != 4:
        print('❌ Only {sections} sections found for file'.format(sections=len(matches_df)))
    matches_df['text'] = matches_df\
        .apply(lambda row: BeautifulSoup(extracted_text[row['start']:row['end']], 'lxml').get_text('\n\n'), axis=1)
    
    return matches_df

def process_filing(i):
    ticker = metadata.iloc[i]['TICKER']
    leaf = metadata.iloc[i]['EDGAR_LINK'].split("/")[-1]
    
    input_file_name = os.path.join(input_dir,
                             ticker, leaf)
    with open(input_file_name, 'r', encoding='utf-8') as f:
        file = f.read()
    
    try:
        clean_df = parse_filing(file)
    
        destination_file = os.path.join(destination_dir, ticker, leaf)

        destination_subdir = os.path.dirname(destination_file)

        if not os.path.isdir(destination_subdir):
            os.makedirs(destination_subdir)

        clean_df.to_csv(destination_file)
        print('Successfully parsed for file {} and ticker {}'.format(input_file_name, metadata.iloc[i]['TICKER']))
        return clean_df
        
    except Exception as e:
        print('Failed parse for file {} and ticker {}'.format(input_file_name, metadata.iloc[i]['TICKER']))
        print(e)
        print('\n')

In [8]:
%load_ext line_profiler

In [8]:
%lprun -f parse_filing process_filing(10)

UsageError: Cell magic `%%lprun` not found (But line magic `%lprun` exists, did you mean that instead?).


In [10]:
%lprun -f process_filing process_filing(10)

Successfully parsed for file data/10k_raw/AEE/0001445305-13-000414.txt and ticker AEE


# Just doing it as a for loop

In [9]:
%%timeit
for i in range(10):
    ticker = metadata.iloc[i]['TICKER']
    leaf = metadata.iloc[i]['EDGAR_LINK'].split("/")[-1]
    
    input_file_name = os.path.join(input_dir,
                             ticker, leaf)
    with open(input_file_name, 'r', encoding='utf-8') as f:
        file = f.read()
    
    try:
        clean_df = parse_filing(file)
    
        destination_file = os.path.join(destination_dir, ticker, leaf)

        destination_subdir = os.path.dirname(destination_file)

        if not os.path.isdir(destination_subdir):
            os.makedirs(destination_subdir)

        clean_df.to_csv(destination_file)
        print('Successfully parsed for file {} and ticker {}'.format(input_file_name, metadata.iloc[i]['TICKER']))
        
    except Exception as e:
        print('Failed parse for file {} and ticker {}'.format(input_file_name, metadata.iloc[i]['TICKER']))
        print(e)
        print('\n')

Successfully parsed for file data/10k_raw/HSIC/0001000228-13-000010.txt and ticker HSIC
❌ Only 3 sections found for file
Successfully parsed for file data/10k_raw/NVAX/0001144204-13-014545.txt and ticker NVAX
Successfully parsed for file data/10k_raw/WAT/0001193125-13-076804.txt and ticker WAT
Successfully parsed for file data/10k_raw/NSP/0001140361-13-005779.txt and ticker NSP
Successfully parsed for file data/10k_raw/DISH/0001104659-13-011967.txt and ticker DISH
Successfully parsed for file data/10k_raw/SGMO/0001193125-13-076887.txt and ticker SGMO
❌ Only 2 sections found for file
Successfully parsed for file data/10k_raw/TGTX/0001144204-13-016691.txt and ticker TGTX
Successfully parsed for file data/10k_raw/NWPX/0001193125-13-112587.txt and ticker NWPX
Successfully parsed for file data/10k_raw/SCCO/0001104659-13-015720.txt and ticker SCCO
Successfully parsed for file data/10k_raw/INSG/0001144204-13-013543.txt and ticker INSG
Successfully parsed for file data/10k_raw/HSIC/0001000228-

# Multiprocessing with Pool

In [10]:
%%timeit
with Pool(processes=4) as pool:
    pool.map(process_filing, range(10))

❌ Only 3 sections found for file
Successfully parsed for file data/10k_raw/NVAX/0001144204-13-014545.txt and ticker NVAX
Successfully parsed for file data/10k_raw/WAT/0001193125-13-076804.txt and ticker WAT
Successfully parsed for file data/10k_raw/NSP/0001140361-13-005779.txt and ticker NSP
❌ Only 2 sections found for file
Successfully parsed for file data/10k_raw/TGTX/0001144204-13-016691.txt and ticker TGTX
Successfully parsed for file data/10k_raw/HSIC/0001000228-13-000010.txt and ticker HSIC
Successfully parsed for file data/10k_raw/SGMO/0001193125-13-076887.txt and ticker SGMO
Successfully parsed for file data/10k_raw/INSG/0001144204-13-013543.txt and ticker INSG
Successfully parsed for file data/10k_raw/NWPX/0001193125-13-112587.txt and ticker NWPX
Successfully parsed for file data/10k_raw/DISH/0001104659-13-011967.txt and ticker DISH
Successfully parsed for file data/10k_raw/SCCO/0001104659-13-015720.txt and ticker SCCO
❌ Only 3 sections found for file
Successfully parsed for f

# Cythonized

In [1]:
%load_ext Cython

In [10]:
%%cython

import pandas as pd
import cython
import os
import re
import json
from bs4 import BeautifulSoup
from multiprocessing import Pool

os.chdir('/mnt/d/workspace/8-2/Financial-Statements-Text-Analysis/')

# params
with open('config.json', 'r') as f:
    c = json.load(f)
input_dir = os.path.join(c['DATA_DIR'], '10k_raw')
destination_dir = os.path.join(c['DATA_DIR'], '10k_clean')

metadata = pd.read_csv(os.path.join(c['DATA_DIR'], 'metadata.csv'))
metadata_legacy = pd.read_csv(os.path.join(c['DATA_DIR'], 'metadata_2017.csv'))

# only download the data from russell 3000 today
metadata = metadata_legacy[metadata_legacy['TICKER'].isin(metadata['ticker'])]

def parse_filing(text):
    regex_10k = re.search(r"(?s)(?m)<TYPE>10-K.*?(</TEXT>)", text)
    regex_items = re.compile(r"(> +Item|>Item|^Item|ITEM)((\s|&#160;|&nbsp;)(1A|1B|7A|7|8))")
    
    try: 
        extracted_text = regex_10k.group(0)
    except:
        print('❌ No 10-K match for file')
    
    item_matches = regex_items.finditer(extracted_text)
    matches_df = pd.DataFrame([(x.group(), x.start()) for x in item_matches])
    matches_df.columns = ['item', 'start']
    matches_df['item'] = matches_df['item'].str.lower().replace('>|\.| |&#160;|&nbsp|;', '', regex=True)
    matches_df = matches_df.sort_values('start').drop_duplicates(subset=['item'], keep='last')
    matches_df['end'] = matches_df['start'].shift(-1).fillna(0).astype(int)
    matches_df = matches_df[matches_df['item'].isin(['item1a', 'item1b', 'item7', 'item7a'])]
    if len(matches_df) != 4:
        print('❌ Only {sections} sections found for file'.format(sections=len(matches_df)))
    matches_df['text'] = matches_df\
        .apply(lambda row: BeautifulSoup(extracted_text[row['start']:row['end']], 'lxml').get_text('\n\n'), axis=1)
    
    return matches_df

def process_filing(i):
    ticker = metadata.iloc[i]['TICKER']
    leaf = metadata.iloc[i]['EDGAR_LINK'].split("/")[-1]
    
    input_file_name = os.path.join(input_dir,
                             ticker, leaf)
    with open(input_file_name, 'r', encoding='utf-8') as f:
        file = f.read()
    
    try:
        clean_df = parse_filing(file)
    
        destination_file = os.path.join(destination_dir, ticker, leaf)

        destination_subdir = os.path.dirname(destination_file)

        if not os.path.isdir(destination_subdir):
            os.makedirs(destination_subdir)

        clean_df.to_csv(destination_file)
        print('Successfully parsed for file {} and ticker {}'.format(input_file_name, metadata.iloc[i]['TICKER']))
        
    except Exception as e:
        print('Failed parse for file {} and ticker {}'.format(input_file_name, metadata.iloc[i]['TICKER']))
        print(e)
        print('\n')

In [6]:
%%timeit 
with Pool(processes=4) as pool:
    pool.map(process_filing, range(10))

❌ Only 3 sections found for file
Successfully parsed for file data/10k_raw/NVAX/0001144204-13-014545.txt and ticker NVAX
Successfully parsed for file data/10k_raw/NSP/0001140361-13-005779.txt and ticker NSP
Successfully parsed for file data/10k_raw/WAT/0001193125-13-076804.txt and ticker WAT
Successfully parsed for file data/10k_raw/HSIC/0001000228-13-000010.txt and ticker HSIC
❌ Only 2 sections found for file
Successfully parsed for file data/10k_raw/SGMO/0001193125-13-076887.txt and ticker SGMO
Successfully parsed for file data/10k_raw/TGTX/0001144204-13-016691.txt and ticker TGTX
Successfully parsed for file data/10k_raw/NWPX/0001193125-13-112587.txt and ticker NWPX
Successfully parsed for file data/10k_raw/INSG/0001144204-13-013543.txt and ticker INSG
Successfully parsed for file data/10k_raw/DISH/0001104659-13-011967.txt and ticker DISH
Successfully parsed for file data/10k_raw/SCCO/0001104659-13-015720.txt and ticker SCCO
❌ Only 3 sections found for file
Successfully parsed for f

In [7]:
%%timeit
for i in range(10):
    ticker = metadata.iloc[i]['TICKER']
    leaf = metadata.iloc[i]['EDGAR_LINK'].split("/")[-1]
    
    input_file_name = os.path.join(input_dir,
                             ticker, leaf)
    with open(input_file_name, 'r', encoding='utf-8') as f:
        file = f.read()
    
    try:
        clean_df = parse_filing(file)
    
        destination_file = os.path.join(destination_dir, ticker, leaf)

        destination_subdir = os.path.dirname(destination_file)

        if not os.path.isdir(destination_subdir):
            os.makedirs(destination_subdir)

        clean_df.to_csv(destination_file)
        print('Successfully parsed for file {} and ticker {}'.format(input_file_name, metadata.iloc[i]['TICKER']))
        
    except Exception as e:
        print('Failed parse for file {} and ticker {}'.format(input_file_name, metadata.iloc[i]['TICKER']))
        print(e)
        print('\n')

Successfully parsed for file data/10k_raw/HSIC/0001000228-13-000010.txt and ticker HSIC
❌ Only 3 sections found for file
Successfully parsed for file data/10k_raw/NVAX/0001144204-13-014545.txt and ticker NVAX
Successfully parsed for file data/10k_raw/WAT/0001193125-13-076804.txt and ticker WAT
Successfully parsed for file data/10k_raw/NSP/0001140361-13-005779.txt and ticker NSP
Successfully parsed for file data/10k_raw/DISH/0001104659-13-011967.txt and ticker DISH
Successfully parsed for file data/10k_raw/SGMO/0001193125-13-076887.txt and ticker SGMO
❌ Only 2 sections found for file
Successfully parsed for file data/10k_raw/TGTX/0001144204-13-016691.txt and ticker TGTX
Successfully parsed for file data/10k_raw/NWPX/0001193125-13-112587.txt and ticker NWPX
Successfully parsed for file data/10k_raw/SCCO/0001104659-13-015720.txt and ticker SCCO
Successfully parsed for file data/10k_raw/INSG/0001144204-13-013543.txt and ticker INSG
Successfully parsed for file data/10k_raw/HSIC/0001000228-

# Cython with declarations

In [17]:
%%cython

import pandas as pd
import cython
import os
import re
import json
from bs4 import BeautifulSoup
from multiprocessing import Pool

os.chdir('/mnt/d/workspace/8-2/Financial-Statements-Text-Analysis/')

# params
with open('config.json', 'r') as f:
    c = json.load(f)
input_dir = os.path.join(c['DATA_DIR'], '10k_raw')
destination_dir = os.path.join(c['DATA_DIR'], '10k_clean')

metadata = pd.read_csv(os.path.join(c['DATA_DIR'], 'metadata.csv'))
metadata_legacy = pd.read_csv(os.path.join(c['DATA_DIR'], 'metadata_2017.csv'))

# only download the data from russell 3000 today
metadata = metadata_legacy[metadata_legacy['TICKER'].isin(metadata['ticker'])]

@cython.boundscheck(False)
@cython.wraparound(False)
def parse_filing_cython(text):
    cdef str extracted_text
    cdef int start, end, sections
    cdef Py_ssize_t i, n
    
    regex_10k = re.compile(r"(?s)(?m)<TYPE>10-K.*?(</TEXT>)")
    regex_items = re.compile(r"(> +Item|>Item|^Item|ITEM)((\s|&#160;|&nbsp;)(1A|1B|7A|7|8))")
    
    try:
        extracted_text = regex_10k.search(text).group(0)
    except:
        print('❌ No 10-K match for file')
    
    item_matches = regex_items.finditer(extracted_text)
    matches_df = pd.DataFrame([(x.group(), x.start()) for x in item_matches])
    matches_df.columns = ['item', 'start']
    matches_df['item'] = matches_df['item'].str.lower().replace('>|\.| |&#160;|&nbsp|;', '', regex=True)
    matches_df = matches_df.sort_values('start').drop_duplicates(subset=['item'], keep='last')
    matches_df['end'] = matches_df['start'].shift(-1).fillna(0).astype(int)
    matches_df = matches_df[matches_df['item'].isin(['item1a', 'item1b', 'item7', 'item7a'])]
    n = len(matches_df)
    if n != 4:
        print('❌ Only {sections} sections found for file'.format(sections=n))
    for i in range(n):
        start = matches_df.iloc[i]['start']
        end = matches_df.iloc[i+1]['start'] if i+1<n else 0
        matches_df.at[i, 'text'] = BeautifulSoup(extracted_text[start:end], 'lxml').get_text('\n\n')
    
    return matches_df

def process_filing(i):
    ticker = metadata.iloc[i]['TICKER']
    leaf = metadata.iloc[i]['EDGAR_LINK'].split("/")[-1]
    
    input_file_name = os.path.join(input_dir,
                             ticker, leaf)
    with open(input_file_name, 'r', encoding='utf-8') as f:
        file = f.read()
    
    try:
        clean_df = parse_filing_cython(file)
    
        destination_file = os.path.join(destination_dir, ticker, leaf)

        destination_subdir = os.path.dirname(destination_file)

        if not os.path.isdir(destination_subdir):
            os.makedirs(destination_subdir)

        clean_df.to_csv(destination_file)
        print('Successfully parsed for file {} and ticker {}'.format(input_file_name, metadata.iloc[i]['TICKER']))
        
    except Exception as e:
        print('Failed parse for file {} and ticker {}'.format(input_file_name, metadata.iloc[i]['TICKER']))
        print(e)
        print('\n')
        


In [16]:
%%timeit
with Pool(processes=4) as pool:
    pool.map(process_filing, range(10))

❌ Only 3 sections found for file
Successfully parsed for file data/10k_raw/NVAX/0001144204-13-014545.txt and ticker NVAX
Successfully parsed for file data/10k_raw/NSP/0001140361-13-005779.txt and ticker NSP
Successfully parsed for file data/10k_raw/WAT/0001193125-13-076804.txt and ticker WAT
Successfully parsed for file data/10k_raw/HSIC/0001000228-13-000010.txt and ticker HSIC
❌ Only 2 sections found for file
Successfully parsed for file data/10k_raw/SGMO/0001193125-13-076887.txt and ticker SGMO
Successfully parsed for file data/10k_raw/TGTX/0001144204-13-016691.txt and ticker TGTX
Successfully parsed for file data/10k_raw/NWPX/0001193125-13-112587.txt and ticker NWPX
Successfully parsed for file data/10k_raw/INSG/0001144204-13-013543.txt and ticker INSG
Successfully parsed for file data/10k_raw/DISH/0001104659-13-011967.txt and ticker DISH
Successfully parsed for file data/10k_raw/SCCO/0001104659-13-015720.txt and ticker SCCO
❌ Only 3 sections found for file
Successfully parsed for f

# Batch all files to process

In [None]:
for i in range(len(metadata)):
    ticker = metadata.iloc[i]['TICKER']
    leaf = metadata.iloc[i]['EDGAR_LINK'].split("/")[-1]
    
    input_file_name = os.path.join(input_dir,
                             ticker, leaf)
    with open(input_file_name, 'r', encoding='utf-8') as f:
        file = f.read()
    
    try:
        clean_df = parse_filing(file)
    
        destination_file = os.path.join(destination_dir, ticker, leaf)

        destination_subdir = os.path.dirname(destination_file)

        if not os.path.isdir(destination_subdir):
            os.makedirs(destination_subdir)

        clean_df.to_csv(destination_file, index=False)
        print('Successfully parsed for file {} and ticker {}'.format(input_file_name, metadata.iloc[i]['TICKER']))
        
    except Exception as e:
        print('Failed parse for file {} and ticker {}'.format(input_file_name, metadata.iloc[i]['TICKER']))
        print(e)
        print('\n')

Successfully parsed for file data/10k_raw/HSIC/0001000228-13-000010.txt and ticker HSIC
❌ Only 3 sections found for file
Successfully parsed for file data/10k_raw/NVAX/0001144204-13-014545.txt and ticker NVAX
Successfully parsed for file data/10k_raw/WAT/0001193125-13-076804.txt and ticker WAT
Successfully parsed for file data/10k_raw/NSP/0001140361-13-005779.txt and ticker NSP
Successfully parsed for file data/10k_raw/DISH/0001104659-13-011967.txt and ticker DISH
Successfully parsed for file data/10k_raw/SGMO/0001193125-13-076887.txt and ticker SGMO
❌ Only 2 sections found for file
Successfully parsed for file data/10k_raw/TGTX/0001144204-13-016691.txt and ticker TGTX
Successfully parsed for file data/10k_raw/NWPX/0001193125-13-112587.txt and ticker NWPX
Successfully parsed for file data/10k_raw/SCCO/0001104659-13-015720.txt and ticker SCCO
Successfully parsed for file data/10k_raw/INSG/0001144204-13-013543.txt and ticker INSG
Successfully parsed for file data/10k_raw/AEE/0001445305-1

Successfully parsed for file data/10k_raw/MBWM/0001193125-13-106327.txt and ticker MBWM
Successfully parsed for file data/10k_raw/PDM/0001042776-13-000009.txt and ticker PDM
Successfully parsed for file data/10k_raw/DRQ/0001193125-13-081240.txt and ticker DRQ
Successfully parsed for file data/10k_raw/NLY/0001157523-13-001038.txt and ticker NLY
Successfully parsed for file data/10k_raw/CHRW/0001043277-13-000004.txt and ticker CHRW
Successfully parsed for file data/10k_raw/SRI/0001144204-13-014041.txt and ticker SRI
Successfully parsed for file data/10k_raw/SAH/0001193125-13-127041.txt and ticker SAH
Successfully parsed for file data/10k_raw/JNPR/0001043604-13-000022.txt and ticker JNPR
Successfully parsed for file data/10k_raw/EPR/0001045450-13-000019.txt and ticker EPR
Successfully parsed for file data/10k_raw/BAX/0001193125-13-069609.txt and ticker BAX
Successfully parsed for file data/10k_raw/NVDA/0001045810-13-000008.txt and ticker NVDA
Successfully parsed for file data/10k_raw/HFWA

Successfully parsed for file data/10k_raw/LAMR/0001193125-13-084458.txt and ticker LAMR
Successfully parsed for file data/10k_raw/UPS/0001090727-13-000005.txt and ticker UPS
Successfully parsed for file data/10k_raw/CHTR/0001091667-13-000020.txt and ticker CHTR
Successfully parsed for file data/10k_raw/CIR/0001091883-13-000005.txt and ticker CIR
Successfully parsed for file data/10k_raw/WWE/0001445305-13-000441.txt and ticker WWE
Successfully parsed for file data/10k_raw/SPSC/0001193125-13-093885.txt and ticker SPSC
Successfully parsed for file data/10k_raw/DXCM/0001193125-13-069590.txt and ticker DXCM
Successfully parsed for file data/10k_raw/ZION/0000109380-13-000081.txt and ticker ZION
Successfully parsed for file data/10k_raw/TDY/0001094285-13-000064.txt and ticker TDY
Successfully parsed for file data/10k_raw/BGCP/0001193125-13-101758.txt and ticker BGCP
Successfully parsed for file data/10k_raw/RE/0001095073-13-000015.txt and ticker RE
Successfully parsed for file data/10k_raw/HS

Successfully parsed for file data/10k_raw/VRNT/0001166388-13-000056.txt and ticker VRNT
Successfully parsed for file data/10k_raw/CMCSA/0001193125-13-067658.txt and ticker CMCSA
Successfully parsed for file data/10k_raw/WTBA/0001166928-13-000019.txt and ticker WTBA
Successfully parsed for file data/10k_raw/NE/0001193125-13-074360.txt and ticker NE
Successfully parsed for file data/10k_raw/CPSI/0001193125-13-101203.txt and ticker CPSI
Successfully parsed for file data/10k_raw/BANC/0001193125-13-132678.txt and ticker BANC
Successfully parsed for file data/10k_raw/DCT/0001193125-13-069475.txt and ticker DCT
Successfully parsed for file data/10k_raw/SAFT/0001047469-13-002988.txt and ticker SAFT
Successfully parsed for file data/10k_raw/HA/0001047469-13-000781.txt and ticker HA
Successfully parsed for file data/10k_raw/CEVA/0001193125-13-109922.txt and ticker CEVA
Successfully parsed for file data/10k_raw/HY/0001173514-13-000022.txt and ticker HY
❌ Only 1 sections found for file
Successfull

❌ Only 2 sections found for file
Successfully parsed for file data/10k_raw/DUK/0001326160-13-000009.txt and ticker DUK
Successfully parsed for file data/10k_raw/WDAY/0001193125-13-122351.txt and ticker WDAY
Successfully parsed for file data/10k_raw/BCC/0001328581-13-000015.txt and ticker BCC
Successfully parsed for file data/10k_raw/HOMB/0001193125-13-089830.txt and ticker HOMB
Successfully parsed for file data/10k_raw/BKD/0001332349-13-000012.txt and ticker BKD
Successfully parsed for file data/10k_raw/CROX/0001193125-13-072829.txt and ticker CROX
Successfully parsed for file data/10k_raw/Z/0001193125-13-071010.txt and ticker Z
Successfully parsed for file data/10k_raw/CCO/0001334978-13-000006.txt and ticker CCO
Successfully parsed for file data/10k_raw/LYV/0001193125-13-077102.txt and ticker LYV
❌ Only 3 sections found for file
Successfully parsed for file data/10k_raw/APP/0001336545-13-000009.txt and ticker APP
Successfully parsed for file data/10k_raw/LDOS/0001193125-13-127240.txt 

Successfully parsed for file data/10k_raw/APO/0001193125-13-087670.txt and ticker APO
Successfully parsed for file data/10k_raw/MOFG/0001412665-13-000031.txt and ticker MOFG
Successfully parsed for file data/10k_raw/PM/0001413329-13-000019.txt and ticker PM
Successfully parsed for file data/10k_raw/SATS/0001104659-13-011985.txt and ticker SATS
❌ Only 3 sections found for file
Successfully parsed for file data/10k_raw/CRWD/0001144204-13-002844.txt and ticker CRWD
Successfully parsed for file data/10k_raw/APLE/0001185185-13-000452.txt and ticker APLE
Successfully parsed for file data/10k_raw/IRDM/0001144204-13-012838.txt and ticker IRDM
Successfully parsed for file data/10k_raw/IPI/0001421461-13-000007.txt and ticker IPI
Successfully parsed for file data/10k_raw/ERII/0001437749-13-002691.txt and ticker ERII
Successfully parsed for file data/10k_raw/AGNC/0001423689-13-000007.txt and ticker AGNC
Successfully parsed for file data/10k_raw/BMY/0001193125-13-061678.txt and ticker BMY
Successfu

Successfully parsed for file data/10k_raw/ALEX/0001545654-13-000010.txt and ticker ALEX
Successfully parsed for file data/10k_raw/BLMN/0001546417-13-000038.txt and ticker BLMN
Successfully parsed for file data/10k_raw/SSTK/0001047469-13-002091.txt and ticker SSTK
Successfully parsed for file data/10k_raw/ABBV/0001047469-13-002827.txt and ticker ABBV
Successfully parsed for file data/10k_raw/ETN/0001551182-13-000005.txt and ticker ETN
Successfully parsed for file data/10k_raw/ZTS/0001555280-13-000008.txt and ticker ZTS
Successfully parsed for file data/10k_raw/PRTA/0001193125-13-133056.txt and ticker PRTA
Successfully parsed for file data/10k_raw/MTZ/0000015615-13-000028.txt and ticker MTZ
Successfully parsed for file data/10k_raw/TPH/0001193125-13-132645.txt and ticker TPH
Successfully parsed for file data/10k_raw/INBK/0001144204-13-018060.txt and ticker INBK
Successfully parsed for file data/10k_raw/ABT/0001047469-13-001180.txt and ticker ABT
Successfully parsed for file data/10k_raw/

Successfully parsed for file data/10k_raw/CTBI/0000350852-13-000041.txt and ticker CTBI
Successfully parsed for file data/10k_raw/SEIC/0000350894-13-000006.txt and ticker SEIC
Successfully parsed for file data/10k_raw/ABCB/0001193125-13-087270.txt and ticker ABCB
Successfully parsed for file data/10k_raw/STKL/0001062993-13-001081.txt and ticker STKL
Successfully parsed for file data/10k_raw/LNT/0000352541-13-000007.txt and ticker LNT
Successfully parsed for file data/10k_raw/UHS/0001193125-13-084679.txt and ticker UHS
Successfully parsed for file data/10k_raw/QDEL/0001193125-13-071645.txt and ticker QDEL
Successfully parsed for file data/10k_raw/AJG/0001193125-13-046126.txt and ticker AJG
Successfully parsed for file data/10k_raw/CVBF/0001193125-13-087822.txt and ticker CVBF
Successfully parsed for file data/10k_raw/HE/0001104659-13-011461.txt and ticker HE
Successfully parsed for file data/10k_raw/HD/0000354950-13-000008.txt and ticker HD
Successfully parsed for file data/10k_raw/SHEN

Successfully parsed for file data/10k_raw/ALE/0000066756-13-000030.txt and ticker ALE
Successfully parsed for file data/10k_raw/APA/0001193125-13-085083.txt and ticker APA
Successfully parsed for file data/10k_raw/MDU/0000067716-13-000021.txt and ticker MDU
Successfully parsed for file data/10k_raw/MSI/0001445305-13-000205.txt and ticker MSI
Successfully parsed for file data/10k_raw/MYE/0000069488-13-000004.txt and ticker MYE
Successfully parsed for file data/10k_raw/FULT/0000700564-13-000012.txt and ticker FULT
Successfully parsed for file data/10k_raw/FMBH/0000700565-13-000007.txt and ticker FMBH
Successfully parsed for file data/10k_raw/MYRG/0001047469-13-002282.txt and ticker MYRG
Successfully parsed for file data/10k_raw/CI/0001047469-13-001925.txt and ticker CI
Successfully parsed for file data/10k_raw/ATRI/0001157523-13-001345.txt and ticker ATRI
Successfully parsed for file data/10k_raw/CPF/0001140361-13-009841.txt and ticker CPF
Successfully parsed for file data/10k_raw/NSC/00

Successfully parsed for file data/10k_raw/NEE/0000753308-13-000023.txt and ticker NEE
Successfully parsed for file data/10k_raw/PCAR/0001193125-13-080215.txt and ticker PCAR
Successfully parsed for file data/10k_raw/ARW/0001526508-13-000004.txt and ticker ARW
Successfully parsed for file data/10k_raw/PCG/0001004980-13-000013.txt and ticker PCG
Successfully parsed for file data/10k_raw/UTL/0001193125-13-029141.txt and ticker UTL
Successfully parsed for file data/10k_raw/PKG/0001193125-13-083569.txt and ticker PKG
❌ Only 3 sections found for file
Successfully parsed for file data/10k_raw/BANF/0001193125-13-112436.txt and ticker BANF
Successfully parsed for file data/10k_raw/BPOP/0001193125-13-084900.txt and ticker BPOP
Successfully parsed for file data/10k_raw/CLF/0000764065-13-000003.txt and ticker CLF
Successfully parsed for file data/10k_raw/MO/0000764180-13-000024.txt and ticker MO
Successfully parsed for file data/10k_raw/PNW/0001104659-13-012982.txt and ticker PNW
Successfully pars

Successfully parsed for file data/10k_raw/SASR/0001144204-13-015676.txt and ticker SASR
Successfully parsed for file data/10k_raw/DELL/0000826083-13-000005.txt and ticker DELL
Successfully parsed for file data/10k_raw/ORRF/0001193125-13-093432.txt and ticker ORRF
Successfully parsed for file data/10k_raw/DX/0000826675-13-000006.txt and ticker DX
Successfully parsed for file data/10k_raw/EIX/0000827052-13-000024.txt and ticker EIX
Successfully parsed for file data/10k_raw/RBC/0000082811-13-000008.txt and ticker RBC
Successfully parsed for file data/10k_raw/WSFS/0001193125-13-112559.txt and ticker WSFS
Failed parse for file data/10k_raw/C/0001206774-13-000852.txt and ticker C
Length mismatch: Expected axis has 0 elements, new values have 2 elements


❌ Only 2 sections found for file
Successfully parsed for file data/10k_raw/FCX/0000831259-13-000014.txt and ticker FCX
Successfully parsed for file data/10k_raw/IEX/0001193125-13-069436.txt and ticker IEX
Successfully parsed for file data/10

Successfully parsed for file data/10k_raw/ICUI/0000883984-13-000016.txt and ticker ICUI
Successfully parsed for file data/10k_raw/VVI/0001193125-13-101356.txt and ticker VVI
Successfully parsed for file data/10k_raw/OFIX/0001193125-13-087561.txt and ticker OFIX
Successfully parsed for file data/10k_raw/ACT/0001193125-13-082059.txt and ticker ACT
Successfully parsed for file data/10k_raw/RCL/0001047469-13-001567.txt and ticker RCL
Successfully parsed for file data/10k_raw/CACC/0000885550-13-000018.txt and ticker CACC
Successfully parsed for file data/10k_raw/KSS/0000885639-13-000004.txt and ticker KSS
Successfully parsed for file data/10k_raw/BSX/0000885725-13-000007.txt and ticker BSX
Successfully parsed for file data/10k_raw/USPH/0001193125-13-102877.txt and ticker USPH
Successfully parsed for file data/10k_raw/FCEL/0000886128-13-000002.txt and ticker FCEL
Successfully parsed for file data/10k_raw/LGND/0000886163-13-000025.txt and ticker LGND
Successfully parsed for file data/10k_raw/

Successfully parsed for file data/10k_raw/EMN/0000915389-13-000010.txt and ticker EMN
Successfully parsed for file data/10k_raw/KEY/0001193125-13-076993.txt and ticker KEY
❌ Only 1 sections found for file
Successfully parsed for file data/10k_raw/AVB/0001047469-13-001516.txt and ticker AVB
Successfully parsed for file data/10k_raw/ALB/0001193125-13-062728.txt and ticker ALB
Successfully parsed for file data/10k_raw/MLM/0001193125-13-070711.txt and ticker MLM
❌ Only 3 sections found for file
Successfully parsed for file data/10k_raw/TSCO/0000916365-13-000006.txt and ticker TSCO
Successfully parsed for file data/10k_raw/DAR/0000916540-13-000006.txt and ticker DAR
Failed parse for file data/10k_raw/ADC/0001144204-13-014094.txt and ticker ADC
Length mismatch: Expected axis has 0 elements, new values have 2 elements


Successfully parsed for file data/10k_raw/RMBS/0000917273-13-000009.txt and ticker RMBS
Successfully parsed for file data/10k_raw/FARO/0001193125-13-080082.txt and ticker FARO

Successfully parsed for file data/10k_raw/TGI/0001021162-13-000018.txt and ticker TGI
Successfully parsed for file data/10k_raw/PLUS/0001022408-13-000025.txt and ticker PLUS
Successfully parsed for file data/10k_raw/EGHT/0001136261-13-000259.txt and ticker EGHT
Successfully parsed for file data/10k_raw/BFLY/0001133796-13-000038.txt and ticker BFLY
Successfully parsed for file data/10k_raw/RL/0001037038-13-000009.txt and ticker RL
Successfully parsed for file data/10k_raw/PETS/0001188112-13-001701.txt and ticker PETS
Successfully parsed for file data/10k_raw/SCS/0001050825-13-000083.txt and ticker SCS
Successfully parsed for file data/10k_raw/FBP/0001193125-13-136825.txt and ticker FBP
Successfully parsed for file data/10k_raw/NTCT/0001193125-13-235038.txt and ticker NTCT
Successfully parsed for file data/10k_raw/AMEH/0001144204-13-025748.txt and ticker AMEH
❌ Only 2 sections found for file
Successfully parsed for file data/10k_raw/WRLD/0000108385-13-000022.txt and ticker WRLD
Successfu

Successfully parsed for file data/10k_raw/URBN/0001193125-13-136738.txt and ticker URBN
Successfully parsed for file data/10k_raw/SJM/0001193125-13-266625.txt and ticker SJM
Successfully parsed for file data/10k_raw/HELE/0001104659-13-034243.txt and ticker HELE
Successfully parsed for file data/10k_raw/EXP/0001193125-13-235755.txt and ticker EXP
Successfully parsed for file data/10k_raw/MCK/0001561787-13-000010.txt and ticker MCK
Successfully parsed for file data/10k_raw/TTWO/0001047469-13-006006.txt and ticker TTWO
Successfully parsed for file data/10k_raw/TDW/0001193125-13-229622.txt and ticker TDW
Successfully parsed for file data/10k_raw/TRNS/0001171843-13-002603.txt and ticker TRNS
Successfully parsed for file data/10k_raw/EL/0001104659-13-065820.txt and ticker EL
Successfully parsed for file data/10k_raw/COTY/0000930413-13-004596.txt and ticker COTY
Successfully parsed for file data/10k_raw/OSIS/0001047469-13-008515.txt and ticker OSIS
Successfully parsed for file data/10k_raw/FD