In [2]:
import pandas as pd


In [100]:
# Load the JSON file
df = pd.read_json("CIK0000320193.json")

In [101]:
# Print the DataFrame
df.head()

Unnamed: 0,cik,entityName,facts
dei,320193,Apple Inc.,{'EntityCommonStockSharesOutstanding': {'label...
us-gaap,320193,Apple Inc.,{'AccountsPayable': {'label': 'Accounts Payabl...


In [102]:
# Pull out the company's name and CIK

entityName = df.iloc[0,1]
entityCIK = str(df.iloc[0,0])
print('CIK: ' + entityCIK)
print('Company: ' + entityName)

CIK: 320193
Company: Apple Inc.


The JSON files come in two main partitions (need to find out exactly what they mean)

- DEI

- US-GAAP

General Structure of the JSON file:

- [Partition]
    - Heading Name
        - Label (more verbose heading)
        - Description
        - Units
            - Unit of Measurement
                - ...
                - Val
                - Form
                - Filed (date)
                - ...
    

## DEI Partition

In [129]:
partition = 'DEI'
dei = df.iloc[0,2]
subGroups = dei.items()

# Initialise a dataframe to hold the data
columnNames = ['company', 'CIK', 'headingFull', 'units', 'value', 
               'filedDate', 'endDate', 'fy', 'qtr', 'frame']
masterDf = pd.DataFrame(columns=columnNames)

# DEI is broken down into subgroups
for group in subGroups:
    # Each subgroup has a heading, detailed description, and unit of measurement
    row = {}
    heading = group[0]
    row['company'] = entityName
    row['CIK'] = entityCIK
    row['headingFull'] = group[1]['label']
    desc = group[1]['description']
    
    # The units component contains the actual measurement and the associated metadata (in the 'records' list)
    units = group[1]['units']
    for unit, records in units.items():
        row['units'] = unit
        
        # Filter out rows where the latest year of data is less than 2024
        maxYear = 0
        for record in records:
            if int(record['fy']) > maxYear:
                maxYear = int(record['fy'])
        
        if maxYear < 2024:
            continue
        
        # We have a record for every filing date
        for record in records:
            if record['form'] == '10-Q' and 'frame' in record.keys():
                row['endDate'] = record['end']
                row['fy'] = record['fy']
                row['qtr'] = record['fp']
                row['filedDate'] = record['filed']
                row['value'] = round(float(record['val']), 2)
                row['frame'] = record['frame']
                
                # Append the row to the main dataframe
                nextIdx = len(masterDf)
                masterDf.loc[nextIdx] = row

masterDf

Unnamed: 0,company,CIK,headingFull,units,value,filedDate,endDate,fy,qtr,frame
0,Apple Inc.,320193,"Entity Common Stock, Shares Outstanding",shares,895816800.0,2009-07-22,2009-06-27,2009,Q3,CY2009Q2I
1,Apple Inc.,320193,"Entity Common Stock, Shares Outstanding",shares,906794600.0,2010-01-25,2010-01-15,2010,Q1,CY2009Q4I
2,Apple Inc.,320193,"Entity Common Stock, Shares Outstanding",shares,909938400.0,2010-04-21,2010-04-09,2010,Q2,CY2010Q1I
3,Apple Inc.,320193,"Entity Common Stock, Shares Outstanding",shares,913562900.0,2010-07-21,2010-07-09,2010,Q3,CY2010Q2I
4,Apple Inc.,320193,"Entity Common Stock, Shares Outstanding",shares,921278000.0,2011-01-19,2011-01-07,2011,Q1,CY2010Q4I
5,Apple Inc.,320193,"Entity Common Stock, Shares Outstanding",shares,924754600.0,2011-04-21,2011-04-08,2011,Q2,CY2011Q1I
6,Apple Inc.,320193,"Entity Common Stock, Shares Outstanding",shares,927090900.0,2011-07-20,2011-07-08,2011,Q3,CY2011Q2I
7,Apple Inc.,320193,"Entity Common Stock, Shares Outstanding",shares,932370000.0,2012-01-25,2012-01-13,2012,Q1,CY2011Q4I
8,Apple Inc.,320193,"Entity Common Stock, Shares Outstanding",shares,935062000.0,2012-04-25,2012-04-13,2012,Q2,CY2012Q1I
9,Apple Inc.,320193,"Entity Common Stock, Shares Outstanding",shares,937406000.0,2012-07-25,2012-07-13,2012,Q3,CY2012Q2I


## US-GAAP Partition

In [126]:
partition = 'US-GAAP'
usGaap = df.iloc[1,2]
subGroups = usGaap.items()

# Initialise a dataframe to hold the data
columnNames = ['company', 'CIK', 'headingFull', 'units', 
               'value', 'filedDate', 'endDate', 'fy', 'qtr', 'form', 'frame']
masterDf = pd.DataFrame(columns=columnNames)

# DEI is broken down into subgroups
for group in subGroups:
    # Each subgroup has a heading, detailed description, and unit of measurement
    row = {}
    heading = group[0]
    row['company'] = entityName
    row['CIK'] = entityCIK
    row['headingFull'] = group[1]['label']
    desc = group[1]['description']
    
    if row['headingFull'] and ('deprecated' in row["headingFull"].lower()):
        continue
    
    # The units component contains the actual measurement and the associated metadata (in the 'records' list)
    units = group[1]['units']
    for unit, records in units.items():
        row['units'] = unit
        
        # Filter out rows where the latest year of data is less than 2024        
        maxYear = 0
        for record in records:
            if int(record['fy']) > maxYear:
                maxYear = int(record['fy'])
        
        if maxYear < 2024:
            continue
            
        # We have a record for every filing date
        for record in records:
            if record['form'] == '10-Q' and 'frame' in record.keys():
                row['form'] = record['form']
                row['endDate'] = record['end']
                row['fy'] = record['fy']
                row['qtr'] = record['fp']
                row['filedDate'] = record['filed']
                row['value'] = record['val']
                row['frame'] = record['frame']
                
                # Append the row to the main dataframe
                nextIdx = len(masterDf)
                masterDf.loc[nextIdx] = row

masterDf

Unnamed: 0,company,CIK,headingFull,units,value,filedDate,endDate,fy,qtr,form,frame
0,Apple Inc.,320193,"Accounts Payable, Current",USD,6.511000e+09,2010-01-25,2009-12-26,2010,Q1,10-Q,CY2009Q4I
1,Apple Inc.,320193,"Accounts Payable, Current",USD,5.666000e+09,2010-04-21,2010-03-27,2010,Q2,10-Q,CY2010Q1I
2,Apple Inc.,320193,"Accounts Payable, Current",USD,8.469000e+09,2010-07-21,2010-06-26,2010,Q3,10-Q,CY2010Q2I
3,Apple Inc.,320193,"Accounts Payable, Current",USD,1.430100e+10,2011-01-19,2010-12-25,2011,Q1,10-Q,CY2010Q4I
4,Apple Inc.,320193,"Accounts Payable, Current",USD,1.371400e+10,2011-04-21,2011-03-26,2011,Q2,10-Q,CY2011Q1I
...,...,...,...,...,...,...,...,...,...,...,...
2522,Apple Inc.,320193,"Hedged Liability, Fair Value Hedge",USD,1.798600e+10,2023-08-04,2023-07-01,2023,Q3,10-Q,CY2023Q2I
2523,Apple Inc.,320193,"Hedged Liability, Fair Value Hedge",USD,1.824700e+10,2024-02-02,2023-09-30,2024,Q1,10-Q,CY2023Q3I
2524,Apple Inc.,320193,"Hedged Liability, Fair Value Hedge",USD,1.866100e+10,2024-02-02,2023-12-30,2024,Q1,10-Q,CY2023Q4I
2525,Apple Inc.,320193,Incremental Common Shares Attributable to Dilu...,shares,6.299500e+07,2024-02-02,2022-12-31,2024,Q1,10-Q,CY2022Q4


In [128]:
masterDf = masterDf.sort_values(by = ['headingFull', 'frame'])
masterDf.to_csv('Apple Financials.csv', index=False)