In [22]:
# to ensure that the logging statements are shown in juypter output, run this cell
import logging

logger = logging.getLogger()
logger.setLevel(logging.INFO)

In [23]:
import pandas as pd
# ensure that all columns are shown and that colum content is not cut
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.width',1000)

In [24]:
from secfsdstools.update import update

update()

2024-08-07 06:34:30,159 [INFO] configmgt  reading configuration from C:\Users\hansj\.secfsdstools.cfg
2024-08-07 06:34:30,173 [INFO] updateprocess  Check if new report zip files are available...
2024-08-07 06:34:30,228 [INFO] updateprocess  check if there are new files to download from sec.gov ...
2024-08-07 06:34:30,925 [INFO] updateprocess  start to transform to parquet format ...
2024-08-07 06:34:30,938 [INFO] updateprocess  start to index parquet files ...


No rapid-api-key is set: 
If you are interested in daily updates, please have a look at https://rapidapi.com/hansjoerg.wingeier/api/daily-sec-financial-statement-dataset


In [25]:
from secfsdstools.e_collector.reportcollecting import SingleReportCollector
from secfsdstools.e_filter.rawfiltering import ReportPeriodAndPreviousPeriodRawFilter
from secfsdstools.e_presenter.presenting import StandardStatementPresenter

# the unique identifier for apple's 10-Q Q2 report of 2024
apple_10q_q2_2024_adsh = "0000320193-24-000069"

# us a Collector to grab the data of the 10-K report. filter for balancesheet information
collector: SingleReportCollector = SingleReportCollector.get_report_by_adsh(
      adsh=apple_10q_q2_2024_adsh
)  
rawdatabag = collector.collect() # load the data from the disk

2024-08-07 06:34:31,028 [INFO] configmgt  reading configuration from C:\Users\hansj\.secfsdstools.cfg


In [26]:
sub_df = rawdatabag.sub_df
num_df = rawdatabag.num_df
pre_df = rawdatabag.pre_df

In [27]:
sub_df = sub_df[sub_df.adsh=="0000320193-24-000069"]
num_df = num_df[num_df.adsh=="0000320193-24-000069"]
pre_df = pre_df[pre_df.adsh=="0000320193-24-000069"]

In [28]:
print(sub_df.shape)
print(num_df.shape)
print(pre_df.shape)

(1, 36)
(271, 9)
(171, 10)


In [29]:
num_df.coreg = None

In [30]:
num_df = num_df[num_df.coreg.isna()]

In [31]:
num_df.uom.value_counts()

USD       251
shares     20
Name: uom, dtype: int64

In [32]:
# we want to keep all values with uoms that are  not in upper case
mask_has_lower = ~num_df.uom.str.isupper()

# currency has always 3 letters, so we want to keep everything that has a different length
mask_is_none_currency = num_df.uom.str.len() != 3

# keep USD
mask_usd_only = num_df.uom == "USD"

num_df = num_df[mask_has_lower | mask_is_none_currency | mask_usd_only]

In [33]:
print(num_df.shape)

(271, 9)


In [34]:
# get the value of the "period" column for the entry in sub_df
# (there is only one entry left, since filtered for a certain adsh)
period = sub_df.iloc[0].period

# mask the datapoints for the current period
mask_current = num_df.ddate == period

# since period and ddate are actually numbers in the form of YYYYMMDD
# we can simply subtract 10'000 to get the previous year
# !!!! ATTENTION !!! 
# When the period is end of February, we have to consider the leap years!
mask_previous = num_df.ddate == (period - 10000)

num_df = num_df[mask_current | mask_previous]

In [35]:
print(num_df.shape)

(222, 9)


In [36]:
num_df = num_df[num_df.qtrs.isin([1,2])]

In [50]:
num_df[num_df.tag=="RevenueFromContractWithCustomerExcludingAssessedTax"]

Unnamed: 0,adsh,tag,version,coreg,ddate,qtrs,uom,value,footnote
241,0000320193-24-000069,RevenueFromContractWithCustomerExcludingAssessedTax,us-gaap/2023,,20240331,1,USD,90753000000.0,
242,0000320193-24-000069,RevenueFromContractWithCustomerExcludingAssessedTax,us-gaap/2023,,20230331,1,USD,94836000000.0,
243,0000320193-24-000069,RevenueFromContractWithCustomerExcludingAssessedTax,us-gaap/2023,,20240331,2,USD,210328000000.0,
244,0000320193-24-000069,RevenueFromContractWithCustomerExcludingAssessedTax,us-gaap/2023,,20230331,2,USD,211990000000.0,


In [37]:
print(num_df.shape)

(172, 9)


In [38]:
pre_df = pre_df[pre_df.stmt == 'IS']

In [39]:
print(pre_df.shape)

(24, 10)


In [40]:
pre_num_df = pd.merge(num_df,
                      pre_df,
                      on=['adsh', 'tag', 'version'])

In [41]:
print(pre_num_df.shape)

(60, 16)


In [42]:
# mask the entries with the negating flag set and inverse the value column
pre_num_df.loc[pre_num_df.negating == 1, 'value'] = -pre_num_df.value

In [43]:
pre_num_df = pre_num_df[['tag', 'line', 'report', 'uom', 'value', 'ddate', 'qtrs']]

In [46]:
pivot_df = pre_num_df.pivot_table(
                index=['tag','report', 'line', 'uom'],
                columns=['qtrs', 'ddate'],
                values='value')

In [47]:
sort_df = pivot_df.sort_values(['report', 'line'])

In [48]:
sort_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,qtrs,1,1,2,2
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,ddate,20230331,20240331,20230331,20240331
tag,report,line,uom,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
RevenueFromContractWithCustomerExcludingAssessedTax,2,7,USD,94836000000.0,90753000000.0,211990000000.0,210328000000.0
CostOfGoodsAndServicesSold,2,8,USD,52860000000.0,48482000000.0,119682000000.0,113202000000.0
GrossProfit,2,9,USD,41976000000.0,42271000000.0,92308000000.0,97126000000.0
ResearchAndDevelopmentExpense,2,11,USD,7457000000.0,7903000000.0,15166000000.0,15599000000.0
SellingGeneralAndAdministrativeExpense,2,12,USD,6201000000.0,6468000000.0,12808000000.0,13254000000.0
OperatingExpenses,2,13,USD,13658000000.0,14371000000.0,27974000000.0,28853000000.0
OperatingIncomeLoss,2,14,USD,28318000000.0,27900000000.0,64334000000.0,68273000000.0
NonoperatingIncomeExpense,2,15,USD,64000000.0,158000000.0,-329000000.0,108000000.0
IncomeLossFromContinuingOperationsBeforeIncomeTaxesExtraordinaryItemsNoncontrollingInterest,2,16,USD,28382000000.0,28058000000.0,64005000000.0,68381000000.0
IncomeTaxExpenseBenefit,2,17,USD,4222000000.0,4422000000.0,9847000000.0,10829000000.0
