In [2]:
import pandas as pd
import os
import openpyxl
import numpy as np
import re
from datetime import datetime
import requests

In [62]:
class Income(HelperFunctions):
    def __init__(self,ticker,fromDate):
        
        self.ticker = ticker
        self.fromDate = fromDate
        self.path = os.path.join("input",f"Financial Statement {self.ticker}")
        
        
        self.IncomeExcel = self.readIncomeExcel()
        self.IncomeExcel = self.getExcelSheetsPostDate(self.IncomeExcel)
        
        self.compiledStatement = self.addData(self.IncomeExcel)
        self.performIncomeMath()
        self.reorderQuarters()
        
        self.simpleCompiledStatement = self.simpleMerge(self.IncomeExcel)
        
        self.writeFinalIncome()
        
    def readIncomeExcel(self):
        file = os.path.join(self.path, f"Income Statements All-{self.ticker}.xlsx")
        Excel = pd.read_excel(file,sheet_name=None)
        return Excel
    
    def writeFinalIncome(self):
        file = os.path.join(self.path, f"Income Statement-{self.ticker}.xlsx")
        Excel = pd.ExcelWriter(file)
        
        self.compiledStatement.to_excel(Excel, index=False, sheet_name="compiled")
        self.simpleCompiledStatement.to_excel(Income, index=False, sheet_name="simple")
        
        Excel.save()
        

        
    def performIncomeMath(self):
        quarters = self.getQuarters()
        years = self.getUniqueYears(quarters)
        
        for year in years:
            year = str(year)
            if "Year Ended "+year in quarters and "Q3 "+year in quarters: 
                
                self.compiledStatement["Q4 "+year] = self.compiledStatement["Year Ended "+year] - self.compiledStatement["Q3 "+year]
                self.compiledStatement.loc[self.compiledStatement.label.str.startswith("weighted-average"),"Q4 "+year] = self.compiledStatement.loc[self.compiledStatement.label.str.startswith("weighted-average"),"Year Ended "+year]


            

In [63]:
income = Income("GPS",2017)
finalIncome = income.compiledStatement

finalIncome


Unnamed: 0,label,Year Ended 2017,Q2 2018,Q3 2018,Q4 2018,Year Ended 2018,Q1 2019,Q2 2019,Q3 2019,Q4 2019,...,Q2 2021,Q3 2021,Q4 2021,Year Ended 2021,Q1 2022,Q2 2022,Q3 2022,Q4 2022,Year Ended 2022,Q1 2023
0,net sales,15516.0,3799.0,3838.0,12017.0,15855.0,3783.0,4085.0,4089.0,-4089.0,...,3275.0,3994.0,-3994.0,0.0,3991.0,4211.0,3943.0,16670000000.0,16670000000.0,3477.0
1,revenues,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,16580.0,...,0.0,0.0,13800000000.0,13800000000.0,0.0,0.0,0.0,0.0,0.0,0.0
2,cost of goods sold and occupancy expenses,9876.0,2320.0,2313.0,7476.0,9789.0,2356.0,2458.0,2466.0,7792.0,...,2126.0,2374.0,9094998000.0,9095000000.0,2361.0,2388.0,2282.0,10033000000.0,10033000000.0,2381.0
3,gross profit,5640.0,1479.0,1525.0,4541.0,6066.0,1427.0,1627.0,1623.0,4699.0,...,1149.0,1620.0,4704998000.0,4705000000.0,1630.0,1823.0,1661.0,6636998000.0,6637000000.0,1096.0
4,operating expenses,4449.0,1028.0,1147.0,3440.0,4587.0,1198.0,1229.0,1260.0,3700.0,...,1076.0,1445.0,5566999000.0,5567000000.0,1390.0,1414.0,1508.0,5826998000.0,5827000000.0,1293.0
5,operating income,1191.0,451.0,378.0,1101.0,1479.0,229.0,398.0,363.0,999.0,...,73.0,175.0,-862000200.0,-862000000.0,240.0,409.0,153.0,809999800.0,810000000.0,-197.0
6,loss on extinguishment of debt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,58.0,0.0,58000000.0,58000000.0,0.0,0.0,325.0,324999700.0,325000000.0,0.0
7,interest expense,75.0,16.0,18.0,56.0,74.0,16.0,17.0,21.0,52.0,...,58.0,55.0,191999900.0,192000000.0,54.0,51.0,44.0,167000000.0,167000000.0,20.0
8,interest income,-8.0,-4.0,-4.0,-15.0,-19.0,-6.0,-7.0,-8.0,-25.0,...,-2.0,-1.0,-9999999.0,-10000000.0,-1.0,-1.0,-1.0,-4999999.0,-5000000.0,-1.0
9,income before income taxes,1124.0,439.0,364.0,1060.0,1424.0,219.0,388.0,350.0,972.0,...,-41.0,121.0,-1102000000.0,-1102000000.0,187.0,359.0,-215.0,323000200.0,323000000.0,-216.0


In [81]:
class HelperFunctions():
    def getYears(self,quarters): #go through each file name and find the year of filing
        return [int(re.findall("\d{4}",quarter)[0]) for quarter in quarters]
    
    def getUniqueYears(self,quarters):
        return set(self.getYears(quarters))
    
    def getExcelSheetsPostDate(self,Excel): #use IncomeExcel dictionary to get all dates and only keep where date is later than year specified 
        quarters = Excel.keys()
        fileDates = self.getYears(quarters) 
        files = [file for file, date in zip(Excel, fileDates) if date>self.fromDate or (date==self.fromDate and "Year Ended" in file)]
        Excel = {file:Excel[file] for file in files}
        return Excel
        
    def reorderQuarters(self):
        quarters = self.getQuarters()
        years = self.getUniqueYears(quarters)
        cols = [q + " " + str(year) for year in years for q in ["Q1","Q2","Q3","Q4","Year Ended"] if q + " " + str(year) in quarters]
        self.compiledStatement = self.compiledStatement[["label"] + cols]
        
    def getQuarters(self):
        return list(self.compiledStatement.columns[1:])
    
    
    def cleanup_label(self, label):
        new_label = str(label).lower()
        new_label = new_label.replace("gain","loss").replace("decrease","increase")
        new_label = re.sub("\s\([\w\W]+?\)","",new_label).replace("  "," ").replace("—"," - ").strip()
        new_label = re.sub(" \$|\s*\d+\,*", "", new_label) 
        return new_label
    
    
    def addData(self, Excel):
        compiledStatement = pd.DataFrame()
        
        for quarter,sheet in Excel.items():
            quarter =  re.sub("\s\([\w\W]+?\)","",quarter) #remove the parenthesis ie. (2021) from the quarter
            sheet.columns = ["label",quarter]
            sheet.label = sheet.label.apply(self.cleanup_label)
            
            sheet = sheet[~sheet[quarter].astype("string").str.contains("[a-zA-Z]",regex=True)]
            
            sheeti = statementi = 0
            newStatement = pd.DataFrame(columns=["label"])
            
            #merge the current finalstatement and new sheet into one statement 
            #iterate through each row, check if the label is the same and switch off adding rows
            while sheeti < len(sheet) and statementi < len(compiledStatement):
              
                sheet_label = sheet.iloc[sheeti].label
                statement_label = compiledStatement.iloc[statementi].label
                newRowStatement = compiledStatement.iloc[statementi:statementi+1].reset_index(drop=True) #grab the row and drop the index so that they merge
                newRowSheet = sheet.iloc[sheeti:sheeti+1].reset_index(drop=True)
                
                #check if the labels are the same 
                if statement_label == sheet_label:
                    #if labels are the same then create a new row by concating the rows
                    newRowSheet = newRowSheet.drop(columns="label")
                    newRow = pd.concat([newRowStatement,newRowSheet],axis=1)
                    
                    #concat the newRow to the newstatement
                    newStatement = pd.concat([newStatement, newRow])
                    #move on to the next row
                    sheeti+=1
                    statementi+=1
                    
                else:
                    #check if the sheet index is the last one so doesn't cause error
                    if sheeti+1<len(sheet):
                        #next, before I append the final statement label, I want to check if the statement label corresponds to the next label for the sheet
                        #if it corresponds, then I will move the sheet up instead
                        if statement_label == sheet.iloc[sheeti+1].label: 
                            newStatement = pd.concat([newStatement,newRowSheet]) #concat the sheet row to the new statement
                            sheeti += 1
                            continue
                    
                    #if the two labels don't match, default is to add the row of the final statment
                    newStatement = pd.concat([newStatement,newRowStatement])
                    statementi += 1
                         
                    
                    
                    
            #if either the finalstatement or the new sheet hasn't finished, then add the end of the statement to the new one
            newStatement = pd.concat([newStatement,sheet.iloc[sheeti:len(sheet)]])
            newStatement = pd.concat([newStatement,compiledStatement.iloc[statementi:len(compiledStatement)]])
            
            compiledStatement = newStatement
            
        compiledStatement = compiledStatement.reset_index(drop=True)
        
        compiledStatement = compiledStatement.fillna(0)
        compiledStatement = compiledStatement.loc[~(compiledStatement==0).all(axis=1)]
        
        return compiledStatement
    
    def simpleMerge(self, Excel):
        compiledStatement = pd.DataFrame()
        for quarter,sheet in Excel.items():
            quarter =  re.sub("\s\([\w\W]+?\)","",quarter) #remove the parenthesis ie. (2021) from the quarter
            sheet.columns = ["label",quarter]
            compiledStatement = pd.concat([compiledStatement, sheet],axis=1)
            compiledStatement = compiledStatement.reset_index(drop=True)
            
        return compiledStatement
   
    

In [86]:
class Cashflow(HelperFunctions):
    def __init__(self,ticker,fromDate):
        
        self.ticker = ticker
        self.fromDate = fromDate
        self.path = os.path.join("input",f"Financial Statement {self.ticker}")
        
        
        self.CashflowExcel = self.readCashflowExcel()
        self.CashflowExcel = self.getExcelSheetsPostDate(self.CashflowExcel)
        
        self.compiledStatement = self.addData(self.CashflowExcel)
#         self.performIncomeMath()
#         self.reorderQuarters()
        
        self.simpleCompiledStatement = self.simpleMerge(self.CashflowExcel)
        
        self.writeFinalCashflow()
        
    def readCashflowExcel(self):
        file = os.path.join(self.path, f"Cashflow Statements All-{self.ticker}.xlsx")
        Excel = pd.read_excel(file,sheet_name=None)
        return Excel
    
    def writeFinalCashflow(self):
        file = os.path.join(self.path, f"Cashflow Statement-{self.ticker}.xlsx")
        Excel = pd.ExcelWriter(file)
        
        self.compiledStatement.to_excel(Excel, index=False, sheet_name="compiled")
        self.simpleCompiledStatement.to_excel(Excel, index=False, sheet_name="simple")
        
        Excel.save()
        
        
    def performIncomeMath(self):
        quarters = self.getQuarters()
        years = self.getUniqueYears(quarters)
        
        for year in years:
            year = str(year)
            if "Year Ended "+year in quarters and "Q3 "+year in quarters: 
                
                self.compiledStatement["Q4 "+year] = self.compiledStatement["Year Ended "+year] - self.compiledStatement["Q3 "+year]
                self.compiledStatement.loc[self.compiledStatement.label.str.startswith("weighted-average"),"Q4 "+year] = self.compiledStatement.loc[self.compiledStatement.label.str.startswith("weighted-average"),"Year Ended "+year]


            

In [87]:
cashflow = Cashflow("XOM",2017)

In [76]:
cashflow.simpleCompiledStatement

Unnamed: 0,label,Year Ended 2017,label.1,Q1 2018,label.2,Q2 2018,label.3,Q3 2018,label.4,Year Ended 2018,...,label.5,Q1 2021,label.6,Q2 2021,label.7,Q3 2021,label.8,Year Ended 2021,label.9,Q1 2022
0,cash flows from operating activities,,cash flows from operating activities,,cash flows from operating activities,,cash flows from operating activities,,cash flows from operating activities,,...,cash flows from operating activities,,cash flows from operating activities,,cash flows from operating activities,,cash flows from operating activities,,cash flows from operating activities,
1,net income including noncontrolling interests,19848.0,net income including noncontrolling interests,4783.0,net income including noncontrolling interests,8769,net income including noncontrolling interests,15215,net income including noncontrolling interests,21421.0,...,net income including noncontrolling interests,2796.0,net income including noncontrolling interests,7577.0,net income including noncontrolling interests,14519.0,net income including noncontrolling interests,23598.0,net income including noncontrolling interests,5750.0
2,adjustments for noncash transactions,,depreciation and depletion,4470.0,depreciation and depletion,9059,depreciation and depletion,13717,adjustments for noncash transactions,,...,depreciation and depletion,5004.0,depreciation and depletion,9956.0,depreciation and depletion,14946.0,adjustments for noncash transactions,,depreciation and depletion,8883.0
3,depreciation and depletion,19893.0,"changes in operational working capital, exclud...",351.0,"changes in operational working capital, exclud...",-982,"changes in operational working capital, exclud...",-25,depreciation and depletion,18745.0,...,noncash inventory adjustment - lower of cost o...,0.0,"changes in operational working capital, exclud...",1573.0,"changes in operational working capital, exclud...",2232.0,depreciation and depletion,20607.0,"changes in operational working capital, exclud...",1086.0
4,deferred income tax charges/(credits),-8577.0,all other items - net,-1085.0,all other items - net,-547,all other items - net,-1500,deferred income tax charges/(credits),-60.0,...,"changes in operational working capital, exclud...",1953.0,all other items – net,-192.0,all other items – net,-692.0,deferred income tax charges/(credits),303.0,all other items – net,-931.0
5,postretirement benefits expense in excess of/(...,1135.0,net cash provided by operating activities,8519.0,net cash provided by operating activities,16299,net cash provided by operating activities,27407,postretirement benefits expense in excess of/(...,1070.0,...,all other items – net,-489.0,net cash provided by operating activities,18914.0,net cash provided by operating activities,31005.0,postretirement benefits expense in excess of/(...,754.0,net cash provided by operating activities,14788.0
6,other long-term obligation provisions in exces...,-610.0,cash flows from investing activities,,cash flows from investing activities,,cash flows from investing activities,,other long-term obligation provisions in exces...,-68.0,...,net cash provided by operating activities,9264.0,cash flows from investing activities,,cash flows from investing activities,,other long-term obligation provisions in exces...,50.0,cash flows from investing activities,
7,dividends received greater than/(less than) eq...,131.0,"additions to property, plant and equipment",-3349.0,"additions to property, plant and equipment",-8276,"additions to property, plant and equipment",-13480,dividends received greater than/(less than) eq...,-1684.0,...,cash flows from investing activities,,"additions to property, plant and equipment",-5147.0,"additions to property, plant and equipment",-7987.0,dividends received greater than/(less than) eq...,-668.0,"additions to property, plant and equipment",-3911.0
8,"changes in operational working capital, exclud...",,proceeds associated with sales of subsidiaries...,1441.0,proceeds associated with sales of subsidiaries...,1748,proceeds associated with sales of subsidiaries...,3239,"changes in operational working capital, exclud...",,...,"additions to property, plant and equipment",-2400.0,proceeds from asset sales and returns of inves...,557.0,proceeds from asset sales and returns of inves...,575.0,"changes in operational working capital, exclud...",,proceeds from asset sales and returns of inves...,293.0
9,reduction/(increase) - notes and accounts rece...,-3954.0,additional investments and advances,-138.0,additional investments and advances,-704,additional investments and advances,-1113,reduction/(increase) - notes and accounts rece...,-545.0,...,proceeds from asset sales and returns of inves...,307.0,additional investments and advances,-613.0,additional investments and advances,-1055.0,reduction/(increase) - notes and accounts rece...,-12098.0,additional investments and advances,-417.0


In [132]:
class Balance(FinancialDataMerge, ProcessQuarters, HelperFunctions):
    def __init__(self):
        self.total_keyword = "total"
        self.finalStatement = pd.DataFrame(columns=["title","title_order","label"])
        
        self.BalanceStatements = self.getBalanceStatements()
        self.addData(self.BalanceStatements)
        
        self.rearrangeFinalStatement()
        
        self.setQuarters()
        
        self.performBalanceMath()
        
        self.reorderQuarters()
        
      
    def getBalanceStatements(self):
        FinancialReports = self.getFinancialStatements()
        BalanceStatements = [[report.getBalanceSheet().copy(), report.file] for report in FinancialReports]
        return BalanceStatements
    
    def parseQuarter(self,date):
        mo = date.split(" ")[0].strip(".")
        year = date.split(", ")[-1]
        if mo in ["Apr","May"]:
            return "Q1 "+str(int(year)+1)
        
        if mo in ["Jul","Aug"]:
            return "Q2 "+str(int(year)+1)
        
        if mo in ["Oct","Nov"]:
            return "Q3 "+str(int(year)+1)
        
        if mo in ["Jan","Feb"]:
            return "Year Ended "+str(int(year))
        
        
    def performBalanceMath(self):
        years = self.getYears()[1:]
        for year in years:
            if "Year Ended "+year not in self.getQuarterHeaders(): 
                continue
            self.finalStatement["Q4 "+year] = self.finalStatement["Year Ended "+year]
            
    def getAccumulatedDepreciation(self):
        Depreciation = {}
        quarters = []
        for sheet, file in self.BalanceStatements:
            sheet.label = [label.lower() for label in sheet.label]
            date = sheet.columns[1]
            quarter = self.parseQuarter(date) 
            quarters.append(quarter)
            label = sheet.loc[sheet.label.str.contains("property and equipment, net of accumulated depreciation"),"label"]
            if len(label)>0:
                label = label.iloc[0]
                dep = re.findall("\d+,*\d*",label)
                if len(dep) > 0:
                    dep = int(dep[0].replace(",",""))
                    Depreciation[quarter] = dep
                    continue
                   
            dep = sheet.loc[(sheet.label == "property and equipment, accumulated depreciation") | (sheet.label=="accumulated depreciation, depletion and amortization, property, plant, and equipment"), date]
            if len(dep) > 0:
                dep = dep.iloc[0]
                Depreciation[quarter] = dep
                if "Year Ended" in quarter:
                    Depreciation["Q4 "+quarter.split(" ")[-1]] = dep
                continue
        #dates = sorted(Depreciation,key=lambda date: datetime.strptime(date.replace(".",""), '%b %d, %Y'))
        years = list(set([x.split(" ")[-1] for x in quarters]))
        years.sort()
        cols = { q+" "+year : [Depreciation.get(q + " " + year, np.nan)] for year in years for q in ["Q1","Q2","Q3","Q4","Year Ended"] if q + " " + year in quarters}
        self.Depreciation = pd.DataFrame(cols)
        return self.Depreciation
            
            
        

In [133]:
balance = Balance()
balance.getAccumulatedDepreciation().to_excel("output/raw/depreciation.xlsx")


q3 2022 (2021).xlsx
q3 2019 (2018).xlsx
q3 2021 (2020).xlsx
q3 2018 (2017).xlsx
q1 2020 (2019).xlsx
q2 2022 (2021).xlsx
q2 2018 (2017).xlsx
q2 2021 (2020).xlsx
10k 2020 (2019).xlsx
q2 2019 (2018).xlsx
q1 2023 (2022).xlsx
q1 2022 (2021).xlsx
q3 2020 (2019).xlsx
q1 2021 (2020)xlsx.xlsx
q1 2018 (2017).xlsx
q1 2019 (2018).xlsx
10k 2022 (2021).xlsx
10k 2019 (2018).xlsx
q2 2020 (2019).xlsx
10k 2017 (2016).xlsx
10k 2018 (2017).xlsx
10k 2021 (2020).xlsx


In [134]:
balance = Balance()
finalBalance = balance.finalStatement
finalBalance.to_excel("output/raw/balance.xlsx")
finalBalance


q3 2022 (2021).xlsx
q3 2019 (2018).xlsx
q3 2021 (2020).xlsx
q3 2018 (2017).xlsx
q1 2020 (2019).xlsx
q2 2022 (2021).xlsx
q2 2018 (2017).xlsx
q2 2021 (2020).xlsx
10k 2020 (2019).xlsx
q2 2019 (2018).xlsx
q1 2023 (2022).xlsx
q1 2022 (2021).xlsx
q3 2020 (2019).xlsx
q1 2021 (2020)xlsx.xlsx
q1 2018 (2017).xlsx
q1 2019 (2018).xlsx
10k 2022 (2021).xlsx
10k 2019 (2018).xlsx
q2 2020 (2019).xlsx
10k 2017 (2016).xlsx
10k 2018 (2017).xlsx
10k 2021 (2020).xlsx


Unnamed: 0,title,label,Year Ended 2017,Q1 2018,Q2 2018,Q3 2018,Q4 2018,Year Ended 2018,Q1 2019,Q2 2019,...,Q2 2021,Q3 2021,Q4 2021,Year Ended 2021,Q1 2022,Q2 2022,Q3 2022,Q4 2022,Year Ended 2022,Q1 2023
0,,cash and cash equivalents,1783.0,1583.0,1609.0,1353.0,1783.0,1783.0,1210.0,1322.0,...,2188.0,2471.0,0.0,0.0,2066.0,2375.0,801.0,877.0,877.0,845.0
1,,short-term investments,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,410.0,410.0,0.0,0.0,275.0,0.0,0.0,0.0
2,,merchandise inventory,1830.0,1961.0,2051.0,2476.0,1997.0,1997.0,2035.0,2202.0,...,2242.0,2747.0,2451.0,2451.0,2370.0,2281.0,2721.0,3018.0,3018.0,3169.0
3,,other current assets,702.0,575.0,598.0,654.0,788.0,788.0,778.0,780.0,...,882.0,966.0,0.0,0.0,1091.0,1201.0,1410.0,1270.0,1270.0,991.0
4,,"available-for-sale securities, current",0.0,0.0,0.0,0.0,0.0,0.0,164.0,286.0,...,25.0,178.0,0.0,0.0,475.0,337.0,0.0,0.0,0.0,0.0
5,,"cash and cash equivalents, at carrying value",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1988.0,1988.0,0.0,0.0,0.0,0.0,0.0,0.0
6,,"other assets, current",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1159.0,1159.0,0.0,0.0,0.0,0.0,0.0,0.0
7,,total current assets,4315.0,4119.0,4258.0,4483.0,4568.0,4568.0,4187.0,4590.0,...,5337.0,6362.0,6008.0,6008.0,6002.0,6194.0,5207.0,5165.0,5165.0,5005.0
8,,"property and equipment, net of accumulated dep...",0.0,2605.0,2643.0,2686.0,0.0,0.0,2791.0,2832.0,...,2895.0,2846.0,2841.0,2841.0,2839.0,2897.0,2924.0,3037.0,3037.0,2791.0
9,,"operating lease, right-of-use asset",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,4689.0,4460.0,4217.0,4217.0,4060.0,3975.0,3788.0,0.0,0.0,3587.0


In [137]:
class CashFlow(FinancialDataMerge, ProcessQuarters, HelperFunctions):
    def __init__(self):
        self.total_keyword = "net"
        self.finalStatement = pd.DataFrame(columns=["title","title_order","label"])
        
        self.CashFlowStatements = self.getCashFlowStatements()
        self.addData(self.CashFlowStatements)
        
        self.rearrangeFinalStatement()
        
        self.setQuarters()
        
        self.performCashFlowMath()
        
        self.reorderQuarters()
        
      
    def getCashFlowStatements(self):
        FinancialReports = self.getFinancialStatements()
        CashFlowStatements = [[report.getCashFlowSheet().copy(), report.file] for report in FinancialReports]
        return CashFlowStatements
    
    def parseQuarter(self,date):
        mo = date.split(" ")[0].strip(".")
        year = date.split(", ")[-1]
        if mo in ["Apr","May"]:
            return "Q1 "+str(int(year)+1)
        
        if mo in ["Jul","Aug"]:
            return "6mo "+str(int(year)+1)
        
        if mo in ["Oct","Nov"]:
            return "9mo "+str(int(year)+1)
        
        if mo in ["Jan","Feb"]:
            return "Year Ended "+str(int(year))     
    def performCashFlowMath(self):
        years = self.getYears()[1:]
        begin_cash = self.finalStatement.label.apply(lambda label: "cash" in label and "beginning of period" in label)
        data = [None, "cash at beginning of period"] + self.finalStatement.loc[begin_cash,self.getQuarterHeaders()].sum().tolist()
        self.finalStatement = self.finalStatement.drop(self.finalStatement.loc[begin_cash].index)
        self.finalStatement.loc[len(self.finalStatement)] = data

        end_cash = self.finalStatement.label.apply(lambda label: "cash" in label and "end of period" in label)
        data = [None, "cash at end of period"] + self.finalStatement.loc[end_cash,self.getQuarterHeaders()].sum().tolist()
        self.finalStatement = self.finalStatement.drop(self.finalStatement.loc[end_cash].index)
        self.finalStatement.loc[len(self.finalStatement)] = data
        
        self.finalStatement = self.finalStatement.set_index("label")
        for year in years:
            if "Year Ended "+year not in self.getQuarterHeaders(): 
                continue
            # find Q2 
            self.finalStatement["Q2 "+year] = self.finalStatement["6mo "+year] - self.finalStatement["Q1 "+year]
            self.finalStatement.loc["cash at beginning of period","Q2 "+year] = self.finalStatement.loc["cash at end of period","Q1 "+year]
            self.finalStatement.loc["cash at end of period","Q2 "+year] = self.finalStatement.loc["cash at end of period","6mo "+year]
            
            #find Q3 
            self.finalStatement["Q3 "+year] = self.finalStatement["9mo "+year] - self.finalStatement["6mo "+year]
            self.finalStatement.loc["cash at beginning of period","Q3 "+year] = self.finalStatement.loc["cash at end of period", "6mo "+year]
            self.finalStatement.loc["cash at end of period","Q3 "+year] = self.finalStatement.loc["cash at end of period","9mo "+year]
            
            
            #find Q3 
            self.finalStatement["Q4 "+year] = self.finalStatement["Year Ended "+year] - self.finalStatement["9mo "+year]
            self.finalStatement.loc["cash at beginning of period","Q4 "+year] = self.finalStatement["9mo "+year].loc["cash at end of period"]
            self.finalStatement.loc["cash at end of period","Q4 "+year] = self.finalStatement.loc["cash at end of period","Year Ended "+year]
        
        self.finalStatement = self.finalStatement.reset_index()
        self.finalStatement = self.finalStatement[["title","label"]+self.getQuarterHeaders()]
        

In [138]:
cashflow = CashFlow()
finalCashflow = cashflow.finalStatement
finalCashflow.to_excel("output/cashflow.xlsx")
finalCashflow

q3 2022 (2021).xlsx
q3 2019 (2018).xlsx
q3 2021 (2020).xlsx
q3 2018 (2017).xlsx
q1 2020 (2019).xlsx
q2 2022 (2021).xlsx
q2 2018 (2017).xlsx
q2 2021 (2020).xlsx
10k 2020 (2019).xlsx
q2 2019 (2018).xlsx
q1 2023 (2022).xlsx
q1 2022 (2021).xlsx
q3 2020 (2019).xlsx
q1 2021 (2020)xlsx.xlsx
q1 2018 (2017).xlsx
q1 2019 (2018).xlsx
10k 2022 (2021).xlsx
10k 2019 (2018).xlsx
q2 2020 (2019).xlsx
10k 2017 (2016).xlsx
10k 2018 (2017).xlsx
10k 2021 (2020).xlsx


Unnamed: 0,title,label,Year Ended 2017,Q1 2018,Q2 2018,Q3 2018,Q4 2018,Year Ended 2018,Q1 2019,Q2 2019,...,Q2 2021,Q3 2021,Q4 2021,Year Ended 2021,Q1 2022,Q2 2022,Q3 2022,Q4 2022,Year Ended 2022,Q1 2023
0,,net income,676,143,271,229,205,848,164,297,...,-62,95,234,-665,166,258,-152,-16,256,-162
1,,depreciation and amortization,593,138,141,139,141,559,140,140,...,126,125,126,507,120,124,128,132,504,130
2,,share-based compensation,76,20,22,18,27,87,21,27,...,17,20,22,77,36,36,25,42,139,1
3,,"operating lease, impairment loss",0,0,0,0,0,0,0,0,...,1,0,30,391,5,1,0,-6,0,0
4,,other asset impairment charges,107,0,0,0,28,28,0,0,...,3,0,8,135,0,1,0,-1,0,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
667,,income taxes paid,0,0,0,0,0,0,0,0,...,0,0,20,20,0,0,0,0,0,0
668,,"cash paid for income taxes during the period, ...",488,35,95,130,310,570,19,42,...,16,-45,-8,0,20,127,34,34,215,-420
669,,cash at beginning of period,1370,1783,1583,1609,1353,1783,1799,1229,...,1048,2241,2499,1381,2016,2096,2407,829,2016,902
670,,"operating lease, payments",0,0,0,0,0,0,0,0,...,0,0,1096,1096,0,0,0,0,0,0


In [848]:
finalCashflow["label"].tolist()

['cash and cash equivalents',
 'short-term investments',
 'merchandise inventory',
 'other current assets',
 'available-for-sale securities, current',
 'total current assets',
 'property and equipment, net of accumulated depreciation',
 'operating lease, right-of-use asset',
 'other long-term assets',
 'accumulated depreciation, depletion and amortization, property, plant, and equipment',
 'operating lease assets',
 'total assets',
 'line of credit facility, fair value of amount outstanding',
 'accounts payable',
 'accrued expenses and other current liabilities',
 'operating lease, liability, current',
 'income taxes payable',
 'current portion of operating lease liabilities',
 'total current liabilities',
 'long-term debt',
 'lease incentives and other long-term liabilities',
 'long-term operating lease liabilities',
 'other long-term liabilities',
 'total long-term debt',
 'total long-term liabilities',
 'operating lease, liability, noncurrent',
 'lease incentives and other long-term

In [856]:
finalCashflow

Unnamed: 0,title,label,Year Ended 2016,Q1 2017,6mo 2017,9mo 2017,Year Ended 2017,Q1 2018,6mo 2018,9mo 2018,...,9mo 2020,Year Ended 2020,Q1 2021,6mo 2021,9mo 2021,Year Ended 2021,Q1 2022,6mo 2022,9mo 2022,Year Ended 2022
0,Cash flows from operating activities,net income,920,127,252,456,676,143,414,643,...,535,351,-932,-994,-899,-665,166,424,272,256
1,Adjustments to reconcile net income to net cas...,depreciation and amortization,592,148,303,449,593,138,279,418,...,417,557,130,256,381,507,120,244,372,504
2,Adjustments to reconcile net income to net cas...,share-based compensation,76,15,36,55,76,20,42,60,...,64,68,18,35,55,77,36,72,97,139
3,Adjustments to reconcile net income to net cas...,"operating lease, impairment loss",0,0,0,0,0,0,0,0,...,1,239,360,361,361,0,5,6,6,0
4,Adjustments to reconcile net income to net cas...,other asset impairment charges,54,0,0,0,107,0,0,0,...,9,98,124,127,127,0,0,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
94,Supplemental disclosure of cash flow information,"cash paid for income taxes during the period, ...",452,43,143,318,488,35,130,260,...,117,176,37,53,8,20,20,147,181,215
95,Supplemental disclosure of cash flow information,cash paid for interest during the period,78,39,41,80,82,38,38,76,...,0,76,0,0,0,145,0,0,0,180
96,Supplemental disclosure of cash flow information,"operating lease, payments",0,0,0,0,0,0,0,0,...,0,1244,0,0,0,0,0,0,0,0
97,Supplemental disclosure of cash flow information,cash paid for operating lease liabilities,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1096,0,0,0,1061
