In [3]:
import pandas as pd
import os
import openpyxl
import numpy as np
import re
from datetime import datetime
import requests

In [4]:
class HelperFunctions():
    def getYears(self,quarters): #go through each file name and find the year of filing
        return [int(re.findall("\d{4}",quarter)[0]) for quarter in quarters]
    def getUniqueYears(self,quarters):
        return set(self.getYears(quarters))
    
    def getExcelSheetsPostDate(self,Excel): #use IncomeExcel dictionary to get all dates and only keep where date is later than year specified 
        quarters = Excel.keys()
        fileDates = self.getYears(quarters) 
        files = [file for file, date in zip(Excel, fileDates) if date>self.fromDate or (date==self.fromDate and "Year Ended" in file)]
        Excel = {file:Excel[file] for file in files}
        return Excel
        
    def reorderQuarters(self):
        quarters = self.getQuarters()
        years = self.getUniqueYears(quarters)
        cols = [q + " " + str(year) for year in years for q in ["Q1","Q2","Q3","Q4","Year Ended"] if q + " " + str(year) in quarters]
        self.compiledStatement = self.compiledStatement[["label"] + cols]
        
    def getQuarters(self):
        return list(self.compiledStatement.columns[1:])
    
    
    def cleanup_label(self, label):
        new_label = label.lower()
        new_label = new_label.replace("gain","loss").replace("decrease","increase")
        new_label = re.sub("\s\([\w\W]+?\)","",new_label).replace("  "," ").replace("—"," - ").strip()
        new_label = re.sub(" \$|\s*\d+\,*", "", new_label) 
        return new_label
    

In [193]:
class Income(HelperFunctions):
    def __init__(self,ticker,fromDate):
        
        self.ticker = ticker
        self.fromDate = fromDate
        self.compiledStatement = pd.DataFrame(columns=["label"])
        
        self.IncomeExcel = self.readIncomeExcel()
        self.IncomeExcel = self.getExcelSheetsPostDate(self.IncomeExcel)
        
        self.compiledStatement = self.addData()
#         self.performIncomeMath()
#         self.reorderQuarters()
        
    def readIncomeExcel(self):
        self.path = os.path.join("input",f"Financial Statement {self.ticker}")
        file = os.path.join(self.path, f"Income Statements All-{self.ticker}.xlsx")
        IncomeExcel = pd.read_excel(file,sheet_name=None)
        return IncomeExcel
    
    def addData(self):
        compiledStatement = pd.DataFrame(columns=["label","value"])
        for quarter,sheet in list(self.IncomeExcel.items()):
            print(quarter)
            quarter =  re.sub("\s\([\w\W]+?\)","",quarter) #remove the parenthesis ie. (2021) from the quarter
            sheet.columns = ["label",quarter]
            sheet.label = sheet.label.apply(self.cleanup_label)
            
            new_sheet = sheet.reset_index()
            new_sheet["index 0"] = sheet.index
            new_sheet["index 1"] = sheet.index+1
            new_sheet["index 2"] = sheet.index-1
#             new_sheet["index 3"] = sheet.index+2
#             new_sheet["index 4"] = sheet.index
            
            new_sheet = pd.melt(new_sheet, id_vars=["label", quarter, "index"], value_vars=["index 0", "index 1", "index 2"])
            new_sheet = new_sheet.drop(columns="variable")

            compiledStatement = pd.merge(compiledStatement, new_sheet, on=["value","label"], how="left")

            for i, row in sheet.iterrows():
                if i not in list(compiledStatement["index"]):
                    row["value"] = i
                    compiledStatement = pd.concat([compiledStatement, pd.DataFrame(row).T],axis=0)

            compiledStatement = compiledStatement.drop(columns="index")
        
        compiledStatement = compiledStatement.sort_values(by="value")
        return compiledStatement
            
    def performIncomeMath(self):
        quarters = self.getQuarters()
        years = self.getUniqueYears(quarters)
        
        for year in years:
            year = str(year)
            if "Year Ended "+year in quarters and "Q3 "+year in quarters: 
                
                self.compiledStatement["Q4 "+year] = self.compiledStatement["Year Ended "+year] - self.compiledStatement["Q3 "+year]
                self.compiledStatement.loc[self.compiledStatement.label.str.startswith("weighted-average"),"Q4 "+year] = self.compiledStatement.loc[self.compiledStatement.label.str.startswith("weighted-average"),"Year Ended "+year]


            

In [195]:
income = Income("AAPL",2015)
finalIncome = income.compiledStatement
finalIncome.to_excel("try.xlsx")
income.compiledStatement

Year Ended 2017
Q1 2018 (2017)
Q2 2018
Q3 2018
Year Ended 2018
Q1 2019 (2018)
Q2 2019
Q3 2019
Year Ended 2019
Q1 2020 (2019)
Q2 2020
Q3 2020
Year Ended 2020
Q1 2021 (2020)
Q2 2021
Q3 2021
Year Ended 2021
Q1 2022 (2021)
Q2 2022
Q3 2022


Unnamed: 0,label,Year Ended 2017,value,Q1 2018,Q2 2018,Q3 2018,Year Ended 2018,Q1 2019,Q2 2019,Q3 2019,...,Q2 2020,Q3 2020,Year Ended 2020,Q1 2021,Q2 2021,Q3 2021,Year Ended 2021,Q1 2022,Q2 2022,Q3 2022
0,income statement [abstract],,0,,,,,,,,...,,,,,,,,,,
1,net sales,229234.0,1,88293.0,61137.0,53265.0,265595.0,84310.0,58015.0,53809.0,...,58313.0,59685.0,274515.0,111439.0,89584.0,81434.0,365817.0,123945.0,97278.0,82959.0
2,cost of sales,141048.0,2,54381.0,37715.0,32844.0,163756.0,52279.0,36194.0,33582.0,...,35943.0,37005.0,169559.0,67111.0,51505.0,46179.0,212981.0,69702.0,54719.0,47074.0
3,gross margin,88186.0,3,33912.0,23422.0,20421.0,101839.0,32031.0,21821.0,20227.0,...,22370.0,22680.0,104956.0,44328.0,38079.0,35255.0,152836.0,54243.0,42559.0,35885.0
4,operating expenses:,,4,,,,,,,,...,,,,,,,,,,
5,research and development,11581.0,5,3407.0,3378.0,3701.0,14236.0,3902.0,3948.0,4257.0,...,4565.0,4758.0,18752.0,5163.0,5262.0,5717.0,21914.0,6306.0,6387.0,6797.0
6,"selling, general and administrative",15261.0,6,4231.0,4150.0,4108.0,16705.0,4783.0,4458.0,4426.0,...,4952.0,4831.0,19916.0,5631.0,5314.0,5412.0,21973.0,6449.0,6193.0,6012.0
7,total operating expenses,26842.0,7,7638.0,7528.0,7809.0,30941.0,8685.0,8406.0,8683.0,...,9517.0,9589.0,38668.0,10794.0,10576.0,11129.0,43887.0,12755.0,12580.0,12809.0
8,operating income,61344.0,8,26274.0,15894.0,12612.0,70898.0,23346.0,13415.0,11544.0,...,12853.0,13091.0,66288.0,33534.0,27503.0,24126.0,108949.0,41488.0,29979.0,23076.0
9,"other income/(expense), net",2745.0,9,756.0,274.0,672.0,2005.0,560.0,378.0,367.0,...,282.0,46.0,803.0,45.0,508.0,243.0,258.0,-247.0,160.0,-10.0


In [159]:
income.c

Unnamed: 0,label,Q3 2018,value,Year Ended 2018,index
0,revenue,,0,110360.0,0.0
1,product,15114.0,1,,
2,service and other,11705.0,2,,
3,total revenue,26819.0,3,,
4,cost of revenue,,4,,
5,product,3425.0,5,,
6,service and other,5844.0,6,,
7,total cost of revenue,9269.0,7,,
8,gross margin,17550.0,8,,
9,research and development,3715.0,9,,


In [133]:
income.sheet

Unnamed: 0,label,Q1 2020
0,revenue,33055.0
1,cost of revenue,10406.0
2,gross margin,22649.0
3,research and development,4565.0
4,sales and marketing,4337.0
5,general and administrative,1061.0
6,operating income,12686.0
7,"other income, net",0.0
8,income before income taxes,12686.0
9,provision for income taxes,2008.0


In [132]:
class Balance(FinancialDataMerge, ProcessQuarters, HelperFunctions):
    def __init__(self):
        self.total_keyword = "total"
        self.finalStatement = pd.DataFrame(columns=["title","title_order","label"])
        
        self.BalanceStatements = self.getBalanceStatements()
        self.addData(self.BalanceStatements)
        
        self.rearrangeFinalStatement()
        
        self.setQuarters()
        
        self.performBalanceMath()
        
        self.reorderQuarters()
        
      
    def getBalanceStatements(self):
        FinancialReports = self.getFinancialStatements()
        BalanceStatements = [[report.getBalanceSheet().copy(), report.file] for report in FinancialReports]
        return BalanceStatements
    
    def parseQuarter(self,date):
        mo = date.split(" ")[0].strip(".")
        year = date.split(", ")[-1]
        if mo in ["Apr","May"]:
            return "Q1 "+str(int(year)+1)
        
        if mo in ["Jul","Aug"]:
            return "Q2 "+str(int(year)+1)
        
        if mo in ["Oct","Nov"]:
            return "Q3 "+str(int(year)+1)
        
        if mo in ["Jan","Feb"]:
            return "Year Ended "+str(int(year))
        
        
    def performBalanceMath(self):
        years = self.getYears()[1:]
        for year in years:
            if "Year Ended "+year not in self.getQuarterHeaders(): 
                continue
            self.finalStatement["Q4 "+year] = self.finalStatement["Year Ended "+year]
            
    def getAccumulatedDepreciation(self):
        Depreciation = {}
        quarters = []
        for sheet, file in self.BalanceStatements:
            sheet.label = [label.lower() for label in sheet.label]
            date = sheet.columns[1]
            quarter = self.parseQuarter(date) 
            quarters.append(quarter)
            label = sheet.loc[sheet.label.str.contains("property and equipment, net of accumulated depreciation"),"label"]
            if len(label)>0:
                label = label.iloc[0]
                dep = re.findall("\d+,*\d*",label)
                if len(dep) > 0:
                    dep = int(dep[0].replace(",",""))
                    Depreciation[quarter] = dep
                    continue
                   
            dep = sheet.loc[(sheet.label == "property and equipment, accumulated depreciation") | (sheet.label=="accumulated depreciation, depletion and amortization, property, plant, and equipment"), date]
            if len(dep) > 0:
                dep = dep.iloc[0]
                Depreciation[quarter] = dep
                if "Year Ended" in quarter:
                    Depreciation["Q4 "+quarter.split(" ")[-1]] = dep
                continue
        #dates = sorted(Depreciation,key=lambda date: datetime.strptime(date.replace(".",""), '%b %d, %Y'))
        years = list(set([x.split(" ")[-1] for x in quarters]))
        years.sort()
        cols = { q+" "+year : [Depreciation.get(q + " " + year, np.nan)] for year in years for q in ["Q1","Q2","Q3","Q4","Year Ended"] if q + " " + year in quarters}
        self.Depreciation = pd.DataFrame(cols)
        return self.Depreciation
            
            
        

In [133]:
balance = Balance()
balance.getAccumulatedDepreciation().to_excel("output/raw/depreciation.xlsx")


q3 2022 (2021).xlsx
q3 2019 (2018).xlsx
q3 2021 (2020).xlsx
q3 2018 (2017).xlsx
q1 2020 (2019).xlsx
q2 2022 (2021).xlsx
q2 2018 (2017).xlsx
q2 2021 (2020).xlsx
10k 2020 (2019).xlsx
q2 2019 (2018).xlsx
q1 2023 (2022).xlsx
q1 2022 (2021).xlsx
q3 2020 (2019).xlsx
q1 2021 (2020)xlsx.xlsx
q1 2018 (2017).xlsx
q1 2019 (2018).xlsx
10k 2022 (2021).xlsx
10k 2019 (2018).xlsx
q2 2020 (2019).xlsx
10k 2017 (2016).xlsx
10k 2018 (2017).xlsx
10k 2021 (2020).xlsx


In [134]:
balance = Balance()
finalBalance = balance.finalStatement
finalBalance.to_excel("output/raw/balance.xlsx")
finalBalance


q3 2022 (2021).xlsx
q3 2019 (2018).xlsx
q3 2021 (2020).xlsx
q3 2018 (2017).xlsx
q1 2020 (2019).xlsx
q2 2022 (2021).xlsx
q2 2018 (2017).xlsx
q2 2021 (2020).xlsx
10k 2020 (2019).xlsx
q2 2019 (2018).xlsx
q1 2023 (2022).xlsx
q1 2022 (2021).xlsx
q3 2020 (2019).xlsx
q1 2021 (2020)xlsx.xlsx
q1 2018 (2017).xlsx
q1 2019 (2018).xlsx
10k 2022 (2021).xlsx
10k 2019 (2018).xlsx
q2 2020 (2019).xlsx
10k 2017 (2016).xlsx
10k 2018 (2017).xlsx
10k 2021 (2020).xlsx


Unnamed: 0,title,label,Year Ended 2017,Q1 2018,Q2 2018,Q3 2018,Q4 2018,Year Ended 2018,Q1 2019,Q2 2019,...,Q2 2021,Q3 2021,Q4 2021,Year Ended 2021,Q1 2022,Q2 2022,Q3 2022,Q4 2022,Year Ended 2022,Q1 2023
0,,cash and cash equivalents,1783.0,1583.0,1609.0,1353.0,1783.0,1783.0,1210.0,1322.0,...,2188.0,2471.0,0.0,0.0,2066.0,2375.0,801.0,877.0,877.0,845.0
1,,short-term investments,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,410.0,410.0,0.0,0.0,275.0,0.0,0.0,0.0
2,,merchandise inventory,1830.0,1961.0,2051.0,2476.0,1997.0,1997.0,2035.0,2202.0,...,2242.0,2747.0,2451.0,2451.0,2370.0,2281.0,2721.0,3018.0,3018.0,3169.0
3,,other current assets,702.0,575.0,598.0,654.0,788.0,788.0,778.0,780.0,...,882.0,966.0,0.0,0.0,1091.0,1201.0,1410.0,1270.0,1270.0,991.0
4,,"available-for-sale securities, current",0.0,0.0,0.0,0.0,0.0,0.0,164.0,286.0,...,25.0,178.0,0.0,0.0,475.0,337.0,0.0,0.0,0.0,0.0
5,,"cash and cash equivalents, at carrying value",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1988.0,1988.0,0.0,0.0,0.0,0.0,0.0,0.0
6,,"other assets, current",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1159.0,1159.0,0.0,0.0,0.0,0.0,0.0,0.0
7,,total current assets,4315.0,4119.0,4258.0,4483.0,4568.0,4568.0,4187.0,4590.0,...,5337.0,6362.0,6008.0,6008.0,6002.0,6194.0,5207.0,5165.0,5165.0,5005.0
8,,"property and equipment, net of accumulated dep...",0.0,2605.0,2643.0,2686.0,0.0,0.0,2791.0,2832.0,...,2895.0,2846.0,2841.0,2841.0,2839.0,2897.0,2924.0,3037.0,3037.0,2791.0
9,,"operating lease, right-of-use asset",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,4689.0,4460.0,4217.0,4217.0,4060.0,3975.0,3788.0,0.0,0.0,3587.0


In [137]:
class CashFlow(FinancialDataMerge, ProcessQuarters, HelperFunctions):
    def __init__(self):
        self.total_keyword = "net"
        self.finalStatement = pd.DataFrame(columns=["title","title_order","label"])
        
        self.CashFlowStatements = self.getCashFlowStatements()
        self.addData(self.CashFlowStatements)
        
        self.rearrangeFinalStatement()
        
        self.setQuarters()
        
        self.performCashFlowMath()
        
        self.reorderQuarters()
        
      
    def getCashFlowStatements(self):
        FinancialReports = self.getFinancialStatements()
        CashFlowStatements = [[report.getCashFlowSheet().copy(), report.file] for report in FinancialReports]
        return CashFlowStatements
    
    def parseQuarter(self,date):
        mo = date.split(" ")[0].strip(".")
        year = date.split(", ")[-1]
        if mo in ["Apr","May"]:
            return "Q1 "+str(int(year)+1)
        
        if mo in ["Jul","Aug"]:
            return "6mo "+str(int(year)+1)
        
        if mo in ["Oct","Nov"]:
            return "9mo "+str(int(year)+1)
        
        if mo in ["Jan","Feb"]:
            return "Year Ended "+str(int(year))     
    def performCashFlowMath(self):
        years = self.getYears()[1:]
        begin_cash = self.finalStatement.label.apply(lambda label: "cash" in label and "beginning of period" in label)
        data = [None, "cash at beginning of period"] + self.finalStatement.loc[begin_cash,self.getQuarterHeaders()].sum().tolist()
        self.finalStatement = self.finalStatement.drop(self.finalStatement.loc[begin_cash].index)
        self.finalStatement.loc[len(self.finalStatement)] = data

        end_cash = self.finalStatement.label.apply(lambda label: "cash" in label and "end of period" in label)
        data = [None, "cash at end of period"] + self.finalStatement.loc[end_cash,self.getQuarterHeaders()].sum().tolist()
        self.finalStatement = self.finalStatement.drop(self.finalStatement.loc[end_cash].index)
        self.finalStatement.loc[len(self.finalStatement)] = data
        
        self.finalStatement = self.finalStatement.set_index("label")
        for year in years:
            if "Year Ended "+year not in self.getQuarterHeaders(): 
                continue
            # find Q2 
            self.finalStatement["Q2 "+year] = self.finalStatement["6mo "+year] - self.finalStatement["Q1 "+year]
            self.finalStatement.loc["cash at beginning of period","Q2 "+year] = self.finalStatement.loc["cash at end of period","Q1 "+year]
            self.finalStatement.loc["cash at end of period","Q2 "+year] = self.finalStatement.loc["cash at end of period","6mo "+year]
            
            #find Q3 
            self.finalStatement["Q3 "+year] = self.finalStatement["9mo "+year] - self.finalStatement["6mo "+year]
            self.finalStatement.loc["cash at beginning of period","Q3 "+year] = self.finalStatement.loc["cash at end of period", "6mo "+year]
            self.finalStatement.loc["cash at end of period","Q3 "+year] = self.finalStatement.loc["cash at end of period","9mo "+year]
            
            
            #find Q3 
            self.finalStatement["Q4 "+year] = self.finalStatement["Year Ended "+year] - self.finalStatement["9mo "+year]
            self.finalStatement.loc["cash at beginning of period","Q4 "+year] = self.finalStatement["9mo "+year].loc["cash at end of period"]
            self.finalStatement.loc["cash at end of period","Q4 "+year] = self.finalStatement.loc["cash at end of period","Year Ended "+year]
        
        self.finalStatement = self.finalStatement.reset_index()
        self.finalStatement = self.finalStatement[["title","label"]+self.getQuarterHeaders()]
        

In [138]:
cashflow = CashFlow()
finalCashflow = cashflow.finalStatement
finalCashflow.to_excel("output/cashflow.xlsx")
finalCashflow

q3 2022 (2021).xlsx
q3 2019 (2018).xlsx
q3 2021 (2020).xlsx
q3 2018 (2017).xlsx
q1 2020 (2019).xlsx
q2 2022 (2021).xlsx
q2 2018 (2017).xlsx
q2 2021 (2020).xlsx
10k 2020 (2019).xlsx
q2 2019 (2018).xlsx
q1 2023 (2022).xlsx
q1 2022 (2021).xlsx
q3 2020 (2019).xlsx
q1 2021 (2020)xlsx.xlsx
q1 2018 (2017).xlsx
q1 2019 (2018).xlsx
10k 2022 (2021).xlsx
10k 2019 (2018).xlsx
q2 2020 (2019).xlsx
10k 2017 (2016).xlsx
10k 2018 (2017).xlsx
10k 2021 (2020).xlsx


Unnamed: 0,title,label,Year Ended 2017,Q1 2018,Q2 2018,Q3 2018,Q4 2018,Year Ended 2018,Q1 2019,Q2 2019,...,Q2 2021,Q3 2021,Q4 2021,Year Ended 2021,Q1 2022,Q2 2022,Q3 2022,Q4 2022,Year Ended 2022,Q1 2023
0,,net income,676,143,271,229,205,848,164,297,...,-62,95,234,-665,166,258,-152,-16,256,-162
1,,depreciation and amortization,593,138,141,139,141,559,140,140,...,126,125,126,507,120,124,128,132,504,130
2,,share-based compensation,76,20,22,18,27,87,21,27,...,17,20,22,77,36,36,25,42,139,1
3,,"operating lease, impairment loss",0,0,0,0,0,0,0,0,...,1,0,30,391,5,1,0,-6,0,0
4,,other asset impairment charges,107,0,0,0,28,28,0,0,...,3,0,8,135,0,1,0,-1,0,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
667,,income taxes paid,0,0,0,0,0,0,0,0,...,0,0,20,20,0,0,0,0,0,0
668,,"cash paid for income taxes during the period, ...",488,35,95,130,310,570,19,42,...,16,-45,-8,0,20,127,34,34,215,-420
669,,cash at beginning of period,1370,1783,1583,1609,1353,1783,1799,1229,...,1048,2241,2499,1381,2016,2096,2407,829,2016,902
670,,"operating lease, payments",0,0,0,0,0,0,0,0,...,0,0,1096,1096,0,0,0,0,0,0


In [848]:
finalCashflow["label"].tolist()

['cash and cash equivalents',
 'short-term investments',
 'merchandise inventory',
 'other current assets',
 'available-for-sale securities, current',
 'total current assets',
 'property and equipment, net of accumulated depreciation',
 'operating lease, right-of-use asset',
 'other long-term assets',
 'accumulated depreciation, depletion and amortization, property, plant, and equipment',
 'operating lease assets',
 'total assets',
 'line of credit facility, fair value of amount outstanding',
 'accounts payable',
 'accrued expenses and other current liabilities',
 'operating lease, liability, current',
 'income taxes payable',
 'current portion of operating lease liabilities',
 'total current liabilities',
 'long-term debt',
 'lease incentives and other long-term liabilities',
 'long-term operating lease liabilities',
 'other long-term liabilities',
 'total long-term debt',
 'total long-term liabilities',
 'operating lease, liability, noncurrent',
 'lease incentives and other long-term

In [856]:
finalCashflow

Unnamed: 0,title,label,Year Ended 2016,Q1 2017,6mo 2017,9mo 2017,Year Ended 2017,Q1 2018,6mo 2018,9mo 2018,...,9mo 2020,Year Ended 2020,Q1 2021,6mo 2021,9mo 2021,Year Ended 2021,Q1 2022,6mo 2022,9mo 2022,Year Ended 2022
0,Cash flows from operating activities,net income,920,127,252,456,676,143,414,643,...,535,351,-932,-994,-899,-665,166,424,272,256
1,Adjustments to reconcile net income to net cas...,depreciation and amortization,592,148,303,449,593,138,279,418,...,417,557,130,256,381,507,120,244,372,504
2,Adjustments to reconcile net income to net cas...,share-based compensation,76,15,36,55,76,20,42,60,...,64,68,18,35,55,77,36,72,97,139
3,Adjustments to reconcile net income to net cas...,"operating lease, impairment loss",0,0,0,0,0,0,0,0,...,1,239,360,361,361,0,5,6,6,0
4,Adjustments to reconcile net income to net cas...,other asset impairment charges,54,0,0,0,107,0,0,0,...,9,98,124,127,127,0,0,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
94,Supplemental disclosure of cash flow information,"cash paid for income taxes during the period, ...",452,43,143,318,488,35,130,260,...,117,176,37,53,8,20,20,147,181,215
95,Supplemental disclosure of cash flow information,cash paid for interest during the period,78,39,41,80,82,38,38,76,...,0,76,0,0,0,145,0,0,0,180
96,Supplemental disclosure of cash flow information,"operating lease, payments",0,0,0,0,0,0,0,0,...,0,1244,0,0,0,0,0,0,0,0
97,Supplemental disclosure of cash flow information,cash paid for operating lease liabilities,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1096,0,0,0,1061
