In [11]:
import pandas as pd
import os
import openpyxl
import numpy as np
import re
from datetime import datetime
import requests

In [298]:
class HelperFunctions():
    def getYears(self,quarters): #go through each file name and find the year of filing
        return [int(re.findall("\d{4}",quarter)[0]) for quarter in quarters]
    
    def getUniqueYears(self,quarters):
        return set(self.getYears(quarters))
    
    def getPossibleFilingsFromYears(self, years):
        return [q + " " + str(year) for year in years for q in ["Q1","Q2","Q3","Q4","Year Ended"]]
    
    def getExcelSheetsPostDate(self,Excel):
        years = range(self.fromDate,self.endDate+1)
        names = self.getPossibleFilingsFromYears(years)
        #names = ["Year Ended "+str(self.fromDate-1)]+names
        files = [filing for name in names for filing in Excel.keys() if name in filing] 
        Excel = {file:Excel[file] for file in files}
        return Excel
    
    def reorderQuarters(self,compiledStatement):
        quarters = self.getQuarters(compiledStatement)
        years = self.getUniqueYears(quarters)
        cols = self.getPossibleFilingsFromYears(years)
        cols = [col for col in cols if col in quarters]
        compiledStatement = compiledStatement[["label"] + cols]
        return compiledStatement
        
    def getQuarters(self, compiledStatement):
        return list(compiledStatement.columns[1:])
    
    
    def cleanup_label(self, label):
        new_label = label.lower()
        new_label = re.sub("\([\w\W]+?\)","",new_label)
        new_label = re.sub(" \$|\s*\d+\,*", "", new_label) 
        new_label = new_label.replace("gain","loss").replace("decrease","increase").replace("  "," ").replace("—"," - ").replace("/","").strip()
        return new_label
    
    
    def addData(self, Excel):
        compiledStatement = pd.DataFrame(columns=["label"])
        excel = list(Excel.items())
        excel.reverse()
        for quarter,sheet in excel:
            quarter =  re.sub("\s\([\w\W]+?\)","",quarter) #remove the parenthesis ie. (2021) from the quarter
            sheet.columns = ["label",quarter]
            
            sheet = sheet.dropna(subset="label", axis=0, how="any")
            sheet = sheet[(~sheet[quarter].astype("string").str.contains("[a-zA-Z]",regex=True))|sheet[quarter].isnull()]
            
            sheet.label = sheet.label.apply(self.cleanup_label)
            
            sheeti = statementi = 0
            newStatement = pd.DataFrame()
            #merge the current finalstatement and new sheet into one statement 
            #go through each row of the two statements and keep track of the indexes of each statement
            #continue until one of either the compiled statement or sheet ends
            while sheeti < len(sheet) and statementi < len(compiledStatement):
                sheet_label = sheet.iloc[sheeti].label
                statement_label = compiledStatement.iloc[statementi].label
                newRowStatement = compiledStatement.iloc[statementi:statementi+1].reset_index(drop=True) #grab the row and drop the index so that they merge
                newRowSheet = sheet.iloc[sheeti:sheeti+1].reset_index(drop=True)

                #check if the labels are the same 
                if statement_label == sheet_label:
                    #if labels are the same then create a new row by concating the rows
                    newRowSheet = newRowSheet.drop(columns="label")
                    newRow = pd.concat([newRowStatement,newRowSheet],axis=1)
                    
                    newStatement = pd.concat([newStatement, newRow], ignore_index=True)
                    
                    #move on to the next row
                    sheeti+=1
                    statementi+=1
                    
                else:
                    maxi = min(sheeti+5,len(sheet))
                    
                    #next, before I append the final statement label, I want to check if the statement label corresponds to the next label for the sheet
                    #if it corresponds, then I will move the sheet up instead
                    if statement_label in list(sheet.iloc[sheeti:maxi].label): 
                        newStatement = pd.concat([newStatement,newRowSheet]) #concat the sheet row to the new statement
                        sheeti += 1
                        continue
                    
                    #if the two labels don't match, default is to add the row of the final statment
                    newStatement = pd.concat([newStatement,newRowStatement])
                    
                    statementi += 1
                    
                newStatement = newStatement.reset_index(drop=True)
                newStatement.index = newStatement.index+1
        
            #if either the finalstatement or the new sheet hasn't finished, then add the end of the statement to the new one
            newStatement = pd.concat([newStatement,sheet.iloc[sheeti:len(sheet)]])
            newStatement = pd.concat([newStatement,compiledStatement.iloc[statementi:len(compiledStatement)]])

            compiledStatement = newStatement
            
        compiledStatement = compiledStatement.reset_index(drop=True)
        return compiledStatement
    

In [299]:
class CompileIncomeStatement(HelperFunctions):
    def __init__(self,ticker,fromDate, endDate=2022):
        self.ticker = ticker
        self.fromDate = fromDate
        self.endDate = endDate
        
        self.path = os.path.join("input",f"Financial Statement {self.ticker}")
        self.input = os.path.join(self.path, f"Income Statements All-{self.ticker}.xlsx")
        self.Excel = self.readExcel()
        self.Excel = self.getExcelSheetsPostDate(self.Excel)
        
        self.compiledStatement = self.addData(self.Excel)
        self.compiledStatement = self.performIncomeMath(self.compiledStatement)
        self.compiledStatement = self.reorderQuarters(self.compiledStatement)
    
    def readExcel(self):
        Excel = pd.read_excel(self.input,sheet_name=None)
        return Excel
    
    
    
    def performIncomeMath(self,compiledStatement):
        quarters = self.getQuarters(compiledStatement)
        years = self.getUniqueYears(quarters)
        
        for year in years:
            year = str(year)
            if "Year Ended "+year in quarters and "Q3 "+year in quarters: 
                compiledStatement["Q4 "+year] = compiledStatement["Year Ended "+year] - compiledStatement["Q3 "+year]
                compiledStatement.loc[compiledStatement.label.str.startswith("weighted-average"),"Q4 "+year] = compiledStatement.loc[compiledStatement.label.str.startswith("weighted-average"),"Year Ended "+year]
        return compiledStatement

            

In [300]:
class Income:
    def __init__(self,ticker,fromDate):
        self.ticker = ticker
        self.fromDate = fromDate
        
        self.path = os.path.join("input",f"Financial Statement {self.ticker}")
        self.output = os.path.join(self.path, f"Compiled Income Statement-{self.ticker}.xlsx")
        
        self.createSeparateStatements()
        self.createStatement()
        
        self.writeExcel()
        
    def createSeparateStatements(self):
        self.separateCompiledStatement = pd.DataFrame()
        for year in range(self.fromDate,2023):
            statement = CompileIncomeStatement(self.ticker, year,year)
            self.separateCompiledStatement = pd.concat([self.separateCompiledStatement,statement.compiledStatement],axis=1)
    
    def createStatement(self):
        self.compiledStatement = CompileIncomeStatement(self.ticker, self.fromDate).compiledStatement
        
    def writeExcel(self):
        Excel = pd.ExcelWriter(self.output)
        self.compiledStatement.to_excel(Excel, index=False, sheet_name="Compiled Statement")
        self.separateCompiledStatement.to_excel(Excel, index=False, sheet_name="Separately Compiled Statement")
        
        Excel.save()
         
    

In [301]:
income = Income("MSFT",2017)
income.compiledStatement



Unnamed: 0,label,Year Ended 2017,Q1 2018,Q2 2018,Q3 2018,Q4 2018,Year Ended 2018,Q1 2019,Q2 2019,Q3 2019,...,Q1 2021,Q2 2021,Q3 2021,Q4 2021,Year Ended 2021,Q1 2022,Q2 2022,Q3 2022,Q4 2022,Year Ended 2022
0,revenue,,,,,,110360.0,29084.0,32471.0,30571.0,...,37154.0,43076.0,41706.0,126382.0,168088.0,45317.0,51728.0,49360.0,148910.0,198270.0
1,product,57190.0,14298.0,17926.0,15114.0,,,,,,...,,,,,,,,,,
2,service and other,32760.0,10240.0,10992.0,11705.0,,,,,,...,,,,,,,,,,
3,total revenue,89950.0,24538.0,28918.0,26819.0,,,,,,...,,,,,,,,,,
4,cost of revenue,,,,,,38353.0,9905.0,12423.0,10170.0,...,11002.0,14194.0,13045.0,39187.0,52232.0,13646.0,16960.0,15615.0,47035.0,62650.0
5,product,15175.0,2980.0,5498.0,3425.0,,,,,,...,,,,,,,,,,
6,service and other,19086.0,5298.0,5566.0,5844.0,,,,,,...,,,,,,,,,,
7,total cost of revenue,34261.0,8278.0,11064.0,9269.0,,,,,,...,,,,,,,,,,
8,gross margin,55689.0,16260.0,17854.0,17550.0,54457.0,72007.0,19179.0,20048.0,20401.0,...,26152.0,28882.0,28661.0,87195.0,115856.0,31671.0,34768.0,33745.0,101875.0,135620.0
9,research and development,13037.0,3574.0,3504.0,3715.0,11011.0,14726.0,3977.0,4070.0,4316.0,...,4926.0,4899.0,5204.0,15512.0,20716.0,5599.0,5758.0,6306.0,18206.0,24512.0


In [294]:
class CompileCashflowStatement(HelperFunctions):
    def __init__(self,ticker,fromDate,endDate=2022):
        self.ticker = ticker
        self.fromDate = fromDate
        self.endDate = endDate
        
        self.path = os.path.join("input",f"Financial Statement {self.ticker}")
        self.input = os.path.join(self.path, f"Cashflow Statements All-{self.ticker}.xlsx")
        self.output = os.path.join(self.path, f"Compiled Cashflow Statement-{self.ticker}.xlsx")
        
        
        self.Excel = self.readExcel()
        self.Excel = self.getExcelSheetsPostDate(self.Excel)
        
        self.compiledStatement = self.addData(self.Excel)

        self.compiledStatement = self.reorderQuarters(self.compiledStatement)

        
    def readExcel(self):
        Excel = pd.read_excel(self.input,sheet_name=None)
        return Excel
    
    
   
            

In [295]:
CompileCashflowStatement("GPS",2022).compiledStatement

Unnamed: 0,label,Q1 2022,Q2 2022,Q3 2022,Year Ended 2022
0,cash flows from operating activities:,,,,
1,net income,166.0,424.0,272.0,256.0
2,adjustments to reconcile net income to net cas...,,,,
3,depreciation and amortization,120.0,244.0,372.0,504.0
4,share-based compensation,36.0,72.0,97.0,139.0
...,...,...,...,...,...
72,supplemental disclosure of cash flow information:,,,,
73,cash paid for interest during the period,,,,180.0
74,"interest paid, excluding capitalized interest,...",2.0,102.0,178.0,
75,"cash paid for income taxes during the period, ...",20.0,147.0,181.0,215.0


In [296]:
class Cashflow:
    def __init__(self,ticker,fromDate):
        self.ticker = ticker
        self.fromDate = fromDate
        
        self.path = os.path.join("input",f"Financial Statement {self.ticker}")
        self.output = os.path.join(self.path, f"Compiled Cashflow Statement-{self.ticker}.xlsx")
        
        self.createSeparateStatements()
        self.createStatement()
        
        self.writeExcel()
        
    def createSeparateStatements(self):
        self.separateCompiledStatement = pd.DataFrame()
        for year in range(self.fromDate,2023):
            statement = CompileCashflowStatement(self.ticker, year,year)
            self.separateCompiledStatement = pd.concat([self.separateCompiledStatement,statement.compiledStatement],axis=1)
    
    def createStatement(self):
        self.compiledStatement = CompileCashflowStatement(self.ticker, self.fromDate).compiledStatement
        
    def writeExcel(self):
        Excel = pd.ExcelWriter(self.output)
        self.compiledStatement.to_excel(Excel, index=False, sheet_name="Compiled Statement")
        self.separateCompiledStatement.to_excel(Excel, index=False, sheet_name="Separately Compiled Statement")
        
        Excel.save()
         
    

In [297]:
cashflow = Cashflow("GPS",2019)
cashflow.compiledStatement



Unnamed: 0,label,Q1 2019,Q2 2019,Q3 2019,Year Ended 2019,Q1 2020,Q2 2020,Q3 2020,Year Ended 2020,Q1 2021,Q2 2021,Q3 2021,Year Ended 2021,Q1 2022,Q2 2022,Q3 2022,Year Ended 2022
0,cash flows from operating activities:,,,,,,,,,,,,,,,,
1,net income,164.0,461.0,727.0,1003.0,227,395,535.0,351.0,,,,,,,,
2,adjustments to reconcile net income to net cas...,,,,,,,,,,,,,,,,
3,depreciation and amortization,140.0,280.0,425.0,578.0,138,277,417.0,557.0,,,,,,,,
4,amortization lease incentives,,,,-61.0,,,,0.0,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
152,cash paid for operating lease liabilities,,,,,,,,,,,,,,,,1061.0
153,"interest paid, including capitalized interest,...",,,,,,,,,,,,145.0,,,,
154,income taxes paid,,,,,,,,,,,,20.0,,,,
155,"operating lease, payments",,,,,301,612,916.0,1244.0,,,,1096.0,,,,


In [212]:
class CompileBalanceStatement(HelperFunctions):
    def __init__(self,ticker,fromDate,endDate=2022):
        self.ticker = ticker
        self.fromDate = fromDate
        self.endDate = endDate
        
        self.path = os.path.join("input",f"Financial Statement {self.ticker}")
        self.input = os.path.join(self.path, f"Balance Statements All-{self.ticker}.xlsx")
        self.output = os.path.join(self.path, f"Compiled Balance Statement-{self.ticker}.xlsx")
        
        
        self.Excel = self.readExcel()
        self.Excel = self.getExcelSheetsPostDate(self.Excel)
        
        self.compiledStatement = self.addData(self.Excel)

        self.compiledStatement = self.reorderQuarters(self.compiledStatement)

        
    def readExcel(self):
        Excel = pd.read_excel(self.input,sheet_name=None)
        return Excel
    
    
   
            

In [213]:
class Balance:
    def __init__(self,ticker,fromDate):
        self.ticker = ticker
        self.fromDate = fromDate
        
        self.path = os.path.join("input",f"Financial Statement {self.ticker}")
        self.output = os.path.join(self.path, f"Compiled Balance Statement-{self.ticker}.xlsx")
        
        self.createSeparateStatements()
        self.createStatement()
        
        self.writeExcel()
        
    def createSeparateStatements(self):
        self.separateCompiledStatement = pd.DataFrame()
        for year in range(self.fromDate,2023):
            statement = CompileBalanceStatement(self.ticker, year,year)
            self.separateCompiledStatement = pd.concat([self.separateCompiledStatement,statement.compiledStatement],axis=1)
    
    def createStatement(self):
        self.compiledStatement = CompileBalanceStatement(self.ticker, self.fromDate).compiledStatement
        
    def writeExcel(self):
        Excel = pd.ExcelWriter(self.output)
        self.compiledStatement.to_excel(Excel, index=False, sheet_name="Compiled Statement")
        self.separateCompiledStatement.to_excel(Excel, index=False, sheet_name="Separately Compiled Statement")
        
        Excel.save()
         
    

In [218]:
balance = Balance("MSFT",2019)
balance.compiledStatement



Year Ended 2018
Q1 2019 (2018)
Q2 2019 (2018)
Q3 2019
Year Ended 2019
Year Ended 2019
Q1 2020 (2019)
Q2 2020 (2019)
Q3 2020
Year Ended 2020
Year Ended 2020
Q1 2021 (2020)
Q2 2021 (2020)
Q3 2021
Year Ended 2021
Year Ended 2021
Q1 2022 (2021)
Q2 2022 (2021)
Q3 2022
Year Ended 2022
Year Ended 2018
Q1 2019 (2018)
Q2 2019 (2018)
Q3 2019
Year Ended 2019
Q1 2020 (2019)
Q2 2020 (2019)
Q3 2020
Year Ended 2020
Q1 2021 (2020)
Q2 2021 (2020)
Q3 2021
Year Ended 2021
Q1 2022 (2021)
Q2 2022 (2021)
Q3 2022
Year Ended 2022


Unnamed: 0,label,Year Ended 2018,Q1 2019,Q2 2019,Q3 2019,Year Ended 2019,Q1 2020,Q2 2020,Q3 2020,Year Ended 2020,Q1 2021,Q2 2021,Q3 2021,Year Ended 2021,Q1 2022,Q2 2022,Q3 2022,Year Ended 2022
0,current assets:,,,,,,,,,,,,,,,,,
1,cash and cash equivalents,11946.0,15137.0,6638.0,11212.0,11356.0,13117.0,8864.0,11710.0,13576.0,17205.0,14432.0,13702.0,14224.0,19165.0,20604.0,12498.0,13931.0
2,short-term investments,121822.0,120743.0,121024.0,120406.0,122463.0,123519.0,125389.0,125916.0,122951.0,120772.0,117536.0,111705.0,116110.0,111450.0,104765.0,92195.0,90826.0
3,"total cash, cash equivalents, and short-term i...",133768.0,135880.0,127662.0,131618.0,133819.0,136636.0,134253.0,137626.0,136527.0,137977.0,131968.0,125407.0,130334.0,130615.0,125369.0,104693.0,104757.0
4,"accounts receivable, net of allowance for doub...",26481.0,17390.0,19680.0,19269.0,29524.0,19087.0,23525.0,22699.0,32011.0,22851.0,27312.0,26322.0,38043.0,27349.0,33520.0,32613.0,44261.0
5,inventories,2662.0,3614.0,1961.0,1951.0,2063.0,2622.0,1823.0,1644.0,1895.0,2705.0,1924.0,2245.0,2636.0,3411.0,3019.0,3296.0,3742.0
6,other,6751.0,7311.0,7571.0,7049.0,10146.0,,,,,,,,,,,,
7,other current assets,,,,,,7551.0,7473.0,8536.0,11482.0,13544.0,12769.0,11640.0,13393.0,12951.0,12280.0,13320.0,16924.0
8,total current assets,169662.0,164195.0,156874.0,159887.0,175552.0,165896.0,167074.0,170505.0,181915.0,177077.0,173973.0,165614.0,184406.0,174326.0,174188.0,153922.0,169684.0
9,"property and equipment, net of accumulated dep...",29460.0,31430.0,32717.0,33648.0,36477.0,38409.0,40522.0,41221.0,44151.0,47927.0,51737.0,54945.0,59715.0,63772.0,67214.0,70298.0,74398.0
