In [100]:
import pandas as pd
import os
import openpyxl
import numpy as np
import re
from datetime import datetime
import requests

In [173]:
class HelperFunctions():
    def getYears(self,quarters): #go through each file name and find the year of filing
        return [int(re.findall("\d{4}",quarter)[0]) for quarter in quarters]
    
    def getUniqueYears(self,quarters):
        return set(self.getYears(quarters))
    
    def getPossibleFilingsFromYears(self, years):
        return [q + " " + str(year) for year in years for q in ["Q1","Q2","Q3","Q4","Year Ended"]]
    
    def getExcelSheetsPostDate(self,Excel):
        years = range(self.fromDate,self.endDate+1)
        names = self.getPossibleFilingsFromYears(years)
        names = ["Year Ended "+str(self.fromDate-1)]+names
        files = [filing for name in names for filing in Excel.keys() if name in filing] 
        Excel = {file:Excel[file] for file in files}
        return Excel
    
    def reorderQuarters(self,compiledStatement):
        quarters = self.getQuarters(compiledStatement)
        years = self.getUniqueYears(quarters)
        cols = self.getPossibleFilingsFromYears(years)
        cols = [col for col in cols if col in quarters]
        compiledStatement = compiledStatement[["label"] + cols]
        return compiledStatement
        
    def getQuarters(self, compiledStatement):
        return list(compiledStatement.columns[1:])
    
    
    def cleanup_label(self, label):
        new_label = label.lower()
        new_label = new_label.replace("gain","loss").replace("decrease","increase").replace("  "," ").replace("—"," - ").strip()
        new_label = re.sub("\s\([\w\W]+?\)","",new_label)
        new_label = re.sub(" \$|\s*\d+\,*", "", new_label) 
        return new_label
    
    
    def addData(self, Excel):
        compiledStatement = pd.DataFrame(columns=["label"])
        
        for quarter,sheet in list(Excel.items()):
            quarter =  re.sub("\s\([\w\W]+?\)","",quarter) #remove the parenthesis ie. (2021) from the quarter
            sheet.columns = ["label",quarter]
            sheet.label = sheet.label.apply(self.cleanup_label)
            
            #sheet = sheet.dropna(axis=0, how="any")
            sheet = sheet[(~sheet[quarter].astype("string").str.contains("[a-zA-Z]",regex=True))|sheet[quarter].isnull()]

            sheeti = statementi = 0
            newStatement = pd.DataFrame(columns=["label"])
            #merge the current finalstatement and new sheet into one statement 
            #go through each row of the two statements and keep track of the indexes of each statement
            #continue until one of either the compiled statement or sheet ends
            while sheeti < len(sheet) and statementi < len(compiledStatement):
              
                sheet_label = sheet.iloc[sheeti].label
                statement_label = compiledStatement.iloc[statementi].label
                newRowStatement = compiledStatement.iloc[statementi:statementi+1].reset_index(drop=True) #grab the row and drop the index so that they merge
                newRowSheet = sheet.iloc[sheeti:sheeti+1].reset_index(drop=True)
                
                #check if the labels are the same 
                if statement_label == sheet_label:
                    #if labels are the same then create a new row by concating the rows
                    newRowSheet = newRowSheet.drop(columns="label")
                    newRow = pd.concat([newRowStatement,newRowSheet],axis=1)
                    
                    #concat the newRow to the newstatement
                    newStatement = pd.concat([newStatement, newRow])
                    #move on to the next row
                    sheeti+=1
                    statementi+=1
                    
                else:
                    maxi = min(sheeti+4,len(sheet))
                    
                    #next, before I append the final statement label, I want to check if the statement label corresponds to the next label for the sheet
                    #if it corresponds, then I will move the sheet up instead
                    if statement_label in list(sheet.iloc[sheeti:maxi].label): 
                        newStatement = pd.concat([newStatement,newRowSheet]) #concat the sheet row to the new statement
                        sheeti += 1
                        continue
                    
                    #if the two labels don't match, default is to add the row of the final statment
                    newStatement = pd.concat([newStatement,newRowStatement])
                    statementi += 1
                    
            #if either the finalstatement or the new sheet hasn't finished, then add the end of the statement to the new one
            newStatement = pd.concat([newStatement,sheet.iloc[sheeti:len(sheet)]])
            newStatement = pd.concat([newStatement,compiledStatement.iloc[statementi:len(compiledStatement)]])
            
            compiledStatement = newStatement
            
        compiledStatement = compiledStatement.reset_index(drop=True)
        return compiledStatement
    

In [174]:
class CompileIncomeStatement(HelperFunctions):
    def __init__(self,ticker,fromDate, endDate=2022):
        self.ticker = ticker
        self.fromDate = fromDate
        self.endDate = endDate
        
        self.path = os.path.join("input",f"Financial Statement {self.ticker}")
        self.input = os.path.join(self.path, f"Income Statements All-{self.ticker}.xlsx")
        self.Excel = self.readExcel()
        self.Excel = self.getExcelSheetsPostDate(self.Excel)
        
        self.compiledStatement = self.addData(self.Excel)
        self.compiledStatement = self.performIncomeMath(self.compiledStatement)
        self.compiledStatement = self.reorderQuarters(self.compiledStatement)
    
    def readExcel(self):
        Excel = pd.read_excel(self.input,sheet_name=None)
        return Excel
    
    
    
    def performIncomeMath(self,compiledStatement):
        quarters = self.getQuarters(compiledStatement)
        years = self.getUniqueYears(quarters)
        
        for year in years:
            year = str(year)
            if "Year Ended "+year in quarters and "Q3 "+year in quarters: 
                compiledStatement["Q4 "+year] = compiledStatement["Year Ended "+year] - compiledStatement["Q3 "+year]
                compiledStatement.loc[compiledStatement.label.str.startswith("weighted-average"),"Q4 "+year] = compiledStatement.loc[compiledStatement.label.str.startswith("weighted-average"),"Year Ended "+year]
        return compiledStatement

            

In [175]:
class Income:
    def __init__(self,ticker,fromDate):
        self.ticker = ticker
        self.fromDate = fromDate
        
        self.path = os.path.join("input",f"Financial Statement {self.ticker}")
        self.output = os.path.join(self.path, f"Compiled Income Statement-{self.ticker}.xlsx")
        
        self.createSeparateStatements()
        self.createStatement()
        
        self.writeExcel()
        
    def createSeparateStatements(self):
        self.separateCompiledStatement = pd.DataFrame()
        for year in range(self.fromDate,2023):
            statement = CompileIncomeStatement(self.ticker, year,year)
            self.separateCompiledStatement = pd.concat([self.separateCompiledStatement,statement.compiledStatement],axis=1)
    
    def createStatement(self):
        self.compiledStatement = CompileIncomeStatement(self.ticker, self.fromDate).compiledStatement
        
    def writeExcel(self):
        Excel = pd.ExcelWriter(self.output)
        self.compiledStatement.to_excel(Excel, index=False, sheet_name="Compiled Statement")
        self.separateCompiledStatement.to_excel(Excel, index=False, sheet_name="Separately Compiled Statement")
        
        Excel.save()
         
    

In [179]:
income = Income("GPS",2019)
income.compiledStatement



Unnamed: 0,label,Year Ended 2018,Q1 2019,Q2 2019,Q3 2019,Q4 2019,Year Ended 2019,Q1 2020,Q2 2020,Q3 2020,...,Q1 2021,Q2 2021,Q3 2021,Q4 2021,Year Ended 2021,Q1 2022,Q2 2022,Q3 2022,Q4 2022,Year Ended 2022
0,net sales,15855.0,3783.0,4085.0,4089.0,,,3706.0,4005.0,3998.0,...,2107.0,3275.0,3994.0,,,3991.0,4211.0,3943.0,16670000000.0,16670000000.0
1,revenues,,,,,,16580.0,,,,...,,,,,13800000000.0,,,,,
2,cost of goods sold and occupancy expenses,9789.0,2356.0,2458.0,2466.0,7792.0,10258.0,2362.0,2449.0,2439.0,...,1839.0,2126.0,2374.0,9094998000.0,9095000000.0,2361.0,2388.0,2282.0,10033000000.0,10033000000.0
3,gross profit,6066.0,1427.0,1627.0,1623.0,4699.0,6322.0,1344.0,1556.0,1559.0,...,268.0,1149.0,1620.0,4704998000.0,4705000000.0,1630.0,1823.0,1661.0,6636998000.0,6637000000.0
4,operating expenses,4587.0,1198.0,1229.0,1260.0,3700.0,4960.0,1028.0,1274.0,1338.0,...,1512.0,1076.0,1445.0,5566999000.0,5567000000.0,1390.0,1414.0,1508.0,5826998000.0,5827000000.0
5,operating income,1479.0,229.0,398.0,363.0,999.0,1362.0,316.0,282.0,221.0,...,-1244.0,73.0,175.0,-862000200.0,-862000000.0,240.0,409.0,153.0,809999800.0,810000000.0
6,loss on extinguishment of debt,,,,,,,,,,...,,58.0,0.0,58000000.0,58000000.0,,0.0,325.0,324999700.0,325000000.0
7,interest expense,74.0,16.0,17.0,21.0,52.0,73.0,20.0,19.0,19.0,...,19.0,58.0,55.0,191999900.0,192000000.0,54.0,51.0,44.0,167000000.0,167000000.0
8,interest income,-19.0,-6.0,-7.0,-8.0,-25.0,-33.0,-6.0,-8.0,-7.0,...,-4.0,-2.0,-1.0,-9999999.0,-10000000.0,-1.0,-1.0,-1.0,-4999999.0,-5000000.0
9,income before income taxes,1424.0,219.0,388.0,350.0,972.0,1322.0,302.0,271.0,209.0,...,-1259.0,-41.0,121.0,-1102000000.0,-1102000000.0,187.0,359.0,-215.0,323000200.0,323000000.0


In [188]:
class CompileCashflowStatement(HelperFunctions):
    def __init__(self,ticker,fromDate,endDate=2022):
        self.ticker = ticker
        self.fromDate = fromDate
        self.endDate = endDate
        
        self.path = os.path.join("input",f"Financial Statement {self.ticker}")
        self.input = os.path.join(self.path, f"Cashflow Statements All-{self.ticker}.xlsx")
        self.output = os.path.join(self.path, f"Compiled Cashflow Statement-{self.ticker}.xlsx")
        
        
        self.Excel = self.readExcel()
        self.Excel = self.getExcelSheetsPostDate(self.Excel)
        
        self.compiledStatement = self.addData(self.Excel)

        self.compiledStatement = self.reorderQuarters(self.compiledStatement)

        
    def readExcel(self):
        Excel = pd.read_excel(self.input,sheet_name=None)
        return Excel
    
    
   
            

In [189]:
class Cashflow:
    def __init__(self,ticker,fromDate):
        self.ticker = ticker
        self.fromDate = fromDate
        
        self.path = os.path.join("input",f"Financial Statement {self.ticker}")
        self.output = os.path.join(self.path, f"Compiled Cashflow Statement-{self.ticker}.xlsx")
        
        self.createSeparateStatements()
        self.createStatement()
        
        self.writeExcel()
        
    def createSeparateStatements(self):
        self.separateCompiledStatement = pd.DataFrame()
        for year in range(self.fromDate,2023):
            statement = CompileCashflowStatement(self.ticker, year,year)
            self.separateCompiledStatement = pd.concat([self.separateCompiledStatement,statement.compiledStatement],axis=1)
    
    def createStatement(self):
        self.compiledStatement = CompileCashflowStatement(self.ticker, self.fromDate).compiledStatement
        
    def writeExcel(self):
        Excel = pd.ExcelWriter(self.output)
        self.compiledStatement.to_excel(Excel, index=False, sheet_name="Compiled Statement")
        self.separateCompiledStatement.to_excel(Excel, index=False, sheet_name="Separately Compiled Statement")
        
        Excel.save()
         
    

In [190]:
cashflow = Cashflow("WMT",2020)
cashflow.compiledStatement



Unnamed: 0,label,Year Ended 2019,Q1 2020,Q2 2020,Q3 2020,Year Ended 2020,Q1 2021,Q2 2021,Q3 2021,Year Ended 2021,Q1 2022,Q2 2022,Q3 2022,Year Ended 2022
0,cash flows from operating activities:,,,,,,,,,,,,,
1,consolidated net income,7179.0,3906.0,7586.0,10907.0,15201.0,4074.0,10513.0,15714.0,13706.0,2811.0,7175.0,10307.0,13940.0
2,adjustments to reconcile income from continuin...,,,,,,,,,,,,,
3,adjustments to reconcile consolidated net inco...,,,,,,,,,,,,,
4,depreciation and amortization,10678.0,2714.0,5436.0,8159.0,10987.0,2791.0,5562.0,8333.0,11152.0,2661.0,5302.0,7952.0,10658.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56,"cash, cash equivalents and restricted cash at ...",,,,,9515.0,,,,17788.0,,,,14834.0
57,supplemental disclosure of cash flow information:,,,,,,,,,,,,,
58,income taxes paid,3982.0,,,,3616.0,,,,5271.0,,,,5918.0
59,interest paid,2348.0,,,,,,,,2216.0,,,,2237.0
