In [29]:
import requests
from edgar_functions import *
from lxml import etree

ticker = "wsm"

In [9]:
accession_num_series = get_10K_accessionNumbers_for_ticker(ticker)

In [10]:
filing = get_specific_filing_using_accessionNumber(ticker, accession_num_series[0])

In [11]:
filing = pd.read_html(filing)

In [67]:
def filter_accession_number_index_page(df, column_name, search_term):
    fitered_df = df[df[column_name].str.contains(search_term, case=False, na=False)]
    filtered_list = fitered_df[column_name].tolist()
    return filtered_list

In [66]:
def rename_statement(statement, label_dictionary):
    statement.index = statement.index.map(lambda x: label_dictionary.get(x, x))
    return statement

In [72]:
def rename_with_all_statement_labels(statement, ticker):
    accession_num_series = get_10K_accessionNumbers_for_ticker(ticker)
    for accession_num in accession_num_series:
        label_dict = get_label_dictionary(ticker, accession_num)
        if label_dict == None:
            break
        statement = rename_statement(statement, label_dict)
        print("Renamed with accession number: " + accession_num)
    return statement

In [64]:
def get_schema_document_links(ticker, accession_number):
    cik = get_cik_matching_ticker(ticker)
    headers = {"User-Agent": "russ@sunriseanalysis.com"}
    base_url = f"https://www.sec.gov/Archives/edgar/data/{cik}/{accession_number}"
    requested_content = requests.get(base_url, headers=headers).content.decode("utf-8")
    filing = pd.read_html(requested_content)
    defenition_link = filter_accession_number_index_page(filing[0], "Name", "_def.xml")
    presentation_link = filter_accession_number_index_page(
        filing[0], "Name", "_pre.xml"
    )
    label_link = filter_accession_number_index_page(filing[0], "Name", "_lab.xml")
    defenition_link = f"{base_url}/{defenition_link[0]}"
    presentation_link = f"{base_url}/{presentation_link[0]}"
    label_link = f"{base_url}/{label_link[0]}"
    return defenition_link, presentation_link, label_link

In [65]:
def get_label_dictionary(ticker, accession_number):
    cik = get_cik_matching_ticker(ticker)
    headers = {"User-Agent": "russ@sunriseanalysis.com"}
    base_url = f"https://www.sec.gov/Archives/edgar/data/{cik}/{accession_number}"
    requested_content = requests.get(base_url, headers=headers).content.decode("utf-8")
    filing = pd.read_html(requested_content)
    try:
        label_link = filter_accession_number_index_page(filing[0], "Name", "_lab.xml")
    except:
        return None
    label_link = f"{base_url}/{label_link[0]}"
    label_content = requests.get(label_link, headers=headers).content
    tree_lab = etree.fromstring(label_content)
    namespaces = tree_lab.nsmap
    label_dictionary = {}
    for label in tree_lab.xpath("//link:label", namespaces=namespaces):
        gaap_fact = label.get("{http://www.w3.org/1999/xlink}label", None).split(
            "_", 1
        )[-1]
        human_readable_label = label.text
        label_dictionary[gaap_fact] = human_readable_label
    return label_dictionary

In [33]:
def_link, pre_link, lab_link = get_schema_document_links(
    ticker, accession_num_series[0]
)
print(def_link)
print(lab_link)

https://www.sec.gov/Archives/edgar/data/0000719955/000162828023009175/wsm-20230129_def.xml
https://www.sec.gov/Archives/edgar/data/0000719955/000162828023009175/wsm-20230129_lab.xml


In [58]:
label_dict = get_label_dictionary(ticker, accession_num_series[0])

In [68]:
balance_sheet = pd.read_csv("quarterly&annual_results/wsm/balance_sheet_annual.csv")

In [69]:
balance_sheet = balance_sheet.set_index("Unnamed: 0")

In [73]:
balance_sheet = rename_with_all_statement_labels(balance_sheet, ticker)

Renamed with accession number: 000162828023009175
Renamed with accession number: 000162828022007494


TypeError: empty namespace prefix is not supported in XPath

In [61]:
balance_sheet_single_rename = rename_statement(balance_sheet, label_dict)

In [74]:
balance_sheet

Unnamed: 0_level_0,2023-01-29,2022-01-30,2021-01-31,2020-02-02,2019-02-03,2018-01-28,2017-01-29,2016-01-31,2015-02-01,2014-02-02,2013-02-03,2012-01-29,2011-01-30
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
"Assets, Current [Abstract]",,,,,,,,,,,,,
"Cash and Cash Equivalents, at Carrying Value",367344.0,850338.0,1200337.0,432162.0,338954.0,390136.0,213713.0,193647.0,222927.0,330121.0,424555.0,502757.0,628403000.0
"Receivables, Net, Current",115685.0,131683.0,143728.0,111737.0,107102.0,90119.0,88803.0,79304.0,67465.0,60330.0,62985.0,45961.0,41565000.0
"Inventory, Net",1456123.0,1246372.0,1006299.0,1100544.0,1124992.0,1061593.0,977505.0,978138.0,887701.0,813160.0,640024.0,553461.0,513381000.0
"Prepaid Expense, Current",64961.0,69252.0,93822.0,90426.0,101356.0,62204.0,52882.0,44654.0,36265.0,35309.0,26339.0,24188.0,21120000.0
"Other Assets, Current",31967.0,26249.0,22894.0,20766.0,21939.0,11876.0,10652.0,11438.0,13005.0,10852.0,9819.0,9229.0,8176000.0
"Assets, Current",2036080.0,2323894.0,2467080.0,1755635.0,1694343.0,1636445.0,1367180.0,1336100.0,1391923.0,1419103.0,1316772.0,1276366.0,1347594000.0
"Property, Plant and Equipment, Net",1065381.0,920773.0,873894.0,929038.0,929635.0,932283.0,923283.0,886813.0,883012.0,849293.0,812037.0,734672.0,730556000.0
"Operating Lease, Right-of-Use Asset",1286452.0,1132764.0,1086009.0,1166383.0,,,,,,,,,
"Deferred Income Tax Assets, Net",81389.0,56585.0,61854.0,47977.0,,,,,,,,,


In [25]:
label_content = requests.get(lab_link, headers=headers).content.decode("utf-8")

In [28]:
BeautifulSoup(label_content, "lxml")



<?xml version="1.0" encoding="UTF-8"?><!--XBRL Document Created with Wdesk from Workiva--><!--Copyright 2023 Workiva--><!--r:d3661e13-e911-471b-ab8c-d5ce438d064d,g:cc687c39-ccbe-4c39-8d80-2202045fd707--><html><body><link:linkbase xmlns:link="http://www.xbrl.org/2003/linkbase" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemalocation="http://www.xbrl.org/2003/linkbase http://www.xbrl.org/2003/xbrl-linkbase-2003-12-31.xsd">
<link:roleref roleuri="http://www.xbrl.org/2009/role/negatedPeriodStartLabel" xlink:href="http://www.xbrl.org/lrr/role/negated-2009-12-16.xsd#negatedPeriodStartLabel" xlink:type="simple"></link:roleref>
<link:roleref roleuri="http://www.xbrl.org/2009/role/netLabel" xlink:href="http://www.xbrl.org/lrr/role/net-2009-12-16.xsd#netLabel" xlink:type="simple"></link:roleref>
<link:roleref roleuri="http://www.xbrl.org/2009/role/negatedNetLabel" xlink:href="http://www.xbrl.org/lrr/role/negated-2009-12-16.xsd#negatedNet