In [1]:
# to ensure that the logging statements are shown in juypter output, run this cell
import logging

logger = logging.getLogger()
logger.setLevel(logging.INFO)

In [2]:
import pandas as pd
# ensure that all columns are shown and that colum content is not cut
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.width',1000)

In [24]:
from secfsdstools.update import update

update()

2024-08-07 06:34:30,159 [INFO] configmgt  reading configuration from C:\Users\hansj\.secfsdstools.cfg
2024-08-07 06:34:30,173 [INFO] updateprocess  Check if new report zip files are available...
2024-08-07 06:34:30,228 [INFO] updateprocess  check if there are new files to download from sec.gov ...
2024-08-07 06:34:30,925 [INFO] updateprocess  start to transform to parquet format ...
2024-08-07 06:34:30,938 [INFO] updateprocess  start to index parquet files ...


No rapid-api-key is set: 
If you are interested in daily updates, please have a look at https://rapidapi.com/hansjoerg.wingeier/api/daily-sec-financial-statement-dataset


In [3]:
from secfsdstools.e_collector.reportcollecting import SingleReportCollector
from secfsdstools.e_filter.rawfiltering import ReportPeriodAndPreviousPeriodRawFilter
from secfsdstools.e_presenter.presenting import StandardStatementPresenter

# the unique identifier for apple's 10-Q Q2 report of 2024
apple_10q_q2_2024_adsh = "0000320193-24-000069"

# us a Collector to grab the data of the 10-K report. filter for balancesheet information
collector: SingleReportCollector = SingleReportCollector.get_report_by_adsh(
      adsh=apple_10q_q2_2024_adsh
)  
rawdatabag = collector.collect() # load the data from the disk

2025-02-10 16:30:03,763 [INFO] configmgt  reading configuration from C:\Users\hansj\.secfsdstools.cfg
2025-02-10 16:30:04,435 [INFO] updateprocess  Launching data update process ...
2025-02-10 16:30:04,463 [INFO] task_framework  Starting process SecDownloadingProcess
2025-02-10 16:30:04,465 [INFO] secdownloading_process  reading table in main page: https://www.sec.gov/dera/data/financial-statement-data-sets.html
2025-02-10 16:30:06,060 [INFO] task_framework  Starting process ToParquetTransformerProcess
2025-02-10 16:30:06,065 [INFO] task_framework  Starting process ReportParquetIndexerProcess
2025-02-10 16:30:06,104 [INFO] configmgt  reading configuration from C:\Users\hansj\.secfsdstools.cfg


In [4]:
sub_df = rawdatabag.sub_df
num_df = rawdatabag.num_df
pre_df = rawdatabag.pre_df

In [5]:
sub_df = sub_df[sub_df.adsh=="0000320193-24-000069"]
num_df = num_df[num_df.adsh=="0000320193-24-000069"]
pre_df = pre_df[pre_df.adsh=="0000320193-24-000069"]

In [6]:
print(sub_df.shape)
print(num_df.shape)
print(pre_df.shape)

(1, 36)
(433, 10)
(95, 10)


In [7]:
num_df.coreg = None

In [8]:
num_df = num_df[num_df.coreg.isna()]

In [9]:
num_df.uom.value_counts()

USD       419
shares     14
Name: uom, dtype: int64

In [14]:
# we want to keep all values with uoms that are  not in upper case
mask_has_lower = ~num_df.uom.str.isupper()

# currency has always 3 letters, so we want to keep everything that has a different length
mask_is_none_currency = num_df.uom.str.len() != 3

# keep USD
mask_usd_only = num_df.uom == "USD"

num_df = num_df[mask_has_lower | mask_is_none_currency | mask_usd_only]

In [15]:
print(num_df.shape)

(344, 10)


In [16]:
# get the value of the "period" column for the entry in sub_df
# (there is only one entry left, since filtered for a certain adsh)
period = sub_df.iloc[0].period

# mask the datapoints for the current period
mask_current = num_df.ddate == period

# since period and ddate are actually numbers in the form of YYYYMMDD
# we can simply subtract 10'000 to get the previous year
# !!!! ATTENTION !!! 
# When the period is end of February, we have to consider the leap years!
mask_previous = num_df.ddate == (period - 10000)

num_df = num_df[mask_current | mask_previous]

In [17]:
print(num_df.shape)

(344, 10)


In [18]:
num_df = num_df[num_df.qtrs.isin([1,2])]

In [19]:
num_df[num_df.tag=="RevenueFromContractWithCustomerExcludingAssessedTax"]

Unnamed: 0,adsh,tag,version,ddate,qtrs,uom,segments,coreg,value,footnote
2,0000320193-24-000069,RevenueFromContractWithCustomerExcludingAssessedTax,us-gaap/2023,20240331,1,USD,BusinessSegments=GreaterChinaSegment;,,16372000000.0,
15,0000320193-24-000069,RevenueFromContractWithCustomerExcludingAssessedTax,us-gaap/2023,20240331,2,USD,,,210328000000.0,
39,0000320193-24-000069,RevenueFromContractWithCustomerExcludingAssessedTax,us-gaap/2023,20240331,2,USD,BusinessSegments=RestOfAsiaPacificSegment;,,16885000000.0,
65,0000320193-24-000069,RevenueFromContractWithCustomerExcludingAssessedTax,us-gaap/2023,20230331,1,USD,,,94836000000.0,
70,0000320193-24-000069,RevenueFromContractWithCustomerExcludingAssessedTax,us-gaap/2023,20230331,1,USD,ProductOrService=IPad;,,6670000000.0,
77,0000320193-24-000069,RevenueFromContractWithCustomerExcludingAssessedTax,us-gaap/2023,20240331,1,USD,ProductOrService=IPad;,,5559000000.0,
81,0000320193-24-000069,RevenueFromContractWithCustomerExcludingAssessedTax,us-gaap/2023,20230331,2,USD,ProductOrService=Mac;,,14903000000.0,
98,0000320193-24-000069,RevenueFromContractWithCustomerExcludingAssessedTax,us-gaap/2023,20240331,1,USD,,,90753000000.0,
100,0000320193-24-000069,RevenueFromContractWithCustomerExcludingAssessedTax,us-gaap/2023,20230331,2,USD,ProductOrService=Service;,,41673000000.0,
107,0000320193-24-000069,RevenueFromContractWithCustomerExcludingAssessedTax,us-gaap/2023,20240331,2,USD,BusinessSegments=GreaterChinaSegment;,,37191000000.0,


In [20]:
print(num_df.shape)

(263, 10)


In [21]:
pre_df = pre_df[pre_df.stmt == 'IS']

In [22]:
print(pre_df.shape)

(15, 10)


In [23]:
pre_num_df = pd.merge(num_df,
                      pre_df,
                      on=['adsh', 'tag', 'version'])

In [24]:
print(pre_num_df.shape)

(144, 17)


In [25]:
# mask the entries with the negating flag set and inverse the value column
pre_num_df.loc[pre_num_df.negating == 1, 'value'] = -pre_num_df.value

In [26]:
pre_num_df = pre_num_df[['tag', 'line', 'report', 'segments', 'uom', 'value', 'ddate', 'qtrs']]

In [27]:
pivot_df = pre_num_df.pivot_table(
                index=['tag','report', 'line', 'segments', 'uom'],
                columns=['qtrs', 'ddate'],
                values='value')

In [28]:
sort_df = pivot_df.sort_values(['report', 'line'])

In [37]:
sort_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,qtrs,1,1,2,2
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,ddate,20230331,20240331,20230331,20240331
tag,report,line,segments,uom,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
RevenueFromContractWithCustomerExcludingAssessedTax,2,7,,USD,94836000000.0,90753000000.0,211990000000.0,210328000000.0
RevenueFromContractWithCustomerExcludingAssessedTax,2,7,BusinessSegments=AmericasSegment;,USD,37784000000.0,37273000000.0,87062000000.0,87703000000.0
RevenueFromContractWithCustomerExcludingAssessedTax,2,7,BusinessSegments=EuropeSegment;,USD,23945000000.0,24123000000.0,51626000000.0,54520000000.0
RevenueFromContractWithCustomerExcludingAssessedTax,2,7,BusinessSegments=GreaterChinaSegment;,USD,17812000000.0,16372000000.0,41717000000.0,37191000000.0
RevenueFromContractWithCustomerExcludingAssessedTax,2,7,BusinessSegments=JapanSegment;,USD,7176000000.0,6262000000.0,13931000000.0,14029000000.0
RevenueFromContractWithCustomerExcludingAssessedTax,2,7,BusinessSegments=RestOfAsiaPacificSegment;,USD,8119000000.0,6723000000.0,17654000000.0,16885000000.0
RevenueFromContractWithCustomerExcludingAssessedTax,2,7,ProductOrService=IPad;,USD,6670000000.0,5559000000.0,16066000000.0,12582000000.0
RevenueFromContractWithCustomerExcludingAssessedTax,2,7,ProductOrService=IPhone;,USD,51334000000.0,45963000000.0,117109000000.0,115665000000.0
RevenueFromContractWithCustomerExcludingAssessedTax,2,7,ProductOrService=Mac;,USD,7168000000.0,7451000000.0,14903000000.0,15231000000.0
RevenueFromContractWithCustomerExcludingAssessedTax,2,7,ProductOrService=Product;,USD,73929000000.0,66886000000.0,170317000000.0,163344000000.0


In [47]:
sort_df[sort_df.index.get_level_values('segments').isin(['', 'ProductOrService=Service;', 'ProductOrService=Product;'])]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,qtrs,1,1,2,2
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,ddate,20230331,20240331,20230331,20240331
tag,report,line,segments,uom,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
RevenueFromContractWithCustomerExcludingAssessedTax,2,7,,USD,94836000000.0,90753000000.0,211990000000.0,210328000000.0
RevenueFromContractWithCustomerExcludingAssessedTax,2,7,ProductOrService=Product;,USD,73929000000.0,66886000000.0,170317000000.0,163344000000.0
RevenueFromContractWithCustomerExcludingAssessedTax,2,7,ProductOrService=Service;,USD,20907000000.0,23867000000.0,41673000000.0,46984000000.0
CostOfGoodsAndServicesSold,2,8,,USD,52860000000.0,48482000000.0,119682000000.0,113202000000.0
CostOfGoodsAndServicesSold,2,8,ProductOrService=Product;,USD,46795000000.0,42424000000.0,107560000000.0,100864000000.0
CostOfGoodsAndServicesSold,2,8,ProductOrService=Service;,USD,6065000000.0,6058000000.0,12122000000.0,12338000000.0
GrossProfit,2,9,,USD,41976000000.0,42271000000.0,92308000000.0,97126000000.0
ResearchAndDevelopmentExpense,2,11,,USD,7457000000.0,7903000000.0,15166000000.0,15599000000.0
SellingGeneralAndAdministrativeExpense,2,12,,USD,6201000000.0,6468000000.0,12808000000.0,13254000000.0
OperatingExpenses,2,13,,USD,13658000000.0,14371000000.0,27974000000.0,28853000000.0
