In [11]:
import pandas as pd
import os
import openpyxl
import numpy as np
import re
from datetime import datetime
import requests

In [206]:
class HelperFunctions():
    def getYears(self,quarters): #go through each file name and find the year of filing
        return [int(re.findall("\d{4}",quarter)[0]) for quarter in quarters]
    
    def getUniqueYears(self,quarters):
        return set(self.getYears(quarters))
    
    def getPossibleFilingsFromYears(self, years):
        return [q + " " + str(year) for year in years for q in ["Q1","Q2","Q3","Q4","Year Ended"]]
    
    def getExcelSheetsPostDate(self,Excel):
        years = range(self.fromDate,self.endDate+1)
        names = self.getPossibleFilingsFromYears(years)
        names = ["Year Ended "+str(self.fromDate-1)]+names
        files = [filing for name in names for filing in Excel.keys() if name in filing] 
        Excel = {file:Excel[file] for file in files}
        return Excel
    
    def reorderQuarters(self,compiledStatement):
        quarters = self.getQuarters(compiledStatement)
        years = self.getUniqueYears(quarters)
        cols = self.getPossibleFilingsFromYears(years)
        cols = [col for col in cols if col in quarters]
        compiledStatement = compiledStatement[["label"] + cols]
        return compiledStatement
        
    def getQuarters(self, compiledStatement):
        return list(compiledStatement.columns[1:])
    
    
    def cleanup_label(self, label):
        new_label = label.lower()
        new_label = re.sub("\([\w\W]+?\)","",new_label)
        new_label = re.sub(" \$|\s*\d+\,*", "", new_label) 
        new_label = new_label.replace("gain","loss").replace("decrease","increase").replace("  "," ").replace("—"," - ").replace("/","").strip()
        return new_label
    
    
    def addData(self, Excel):
        compiledStatement = pd.DataFrame(columns=["label"])
        
        for quarter,sheet in list(Excel.items()):
            quarter =  re.sub("\s\([\w\W]+?\)","",quarter) #remove the parenthesis ie. (2021) from the quarter
            sheet.columns = ["label",quarter]
            
            sheet = sheet.dropna(subset="label", axis=0, how="any")
            sheet = sheet[(~sheet[quarter].astype("string").str.contains("[a-zA-Z]",regex=True))|sheet[quarter].isnull()]
            
            sheet.label = sheet.label.apply(self.cleanup_label)
            
            sheeti = statementi = 0
            newStatement = pd.DataFrame()
            #merge the current finalstatement and new sheet into one statement 
            #go through each row of the two statements and keep track of the indexes of each statement
            #continue until one of either the compiled statement or sheet ends
            while sheeti < len(sheet) and statementi < len(compiledStatement):
                sheet_label = sheet.iloc[sheeti].label
                statement_label = compiledStatement.iloc[statementi].label
                newRowStatement = compiledStatement.iloc[statementi:statementi+1].reset_index(drop=True) #grab the row and drop the index so that they merge
                newRowSheet = sheet.iloc[sheeti:sheeti+1].reset_index(drop=True)

                #check if the labels are the same 
                if statement_label == sheet_label:
                    #if labels are the same then create a new row by concating the rows
                    newRowSheet = newRowSheet.drop(columns="label")
                    newRow = pd.concat([newRowStatement,newRowSheet],axis=1)
                    
                    newStatement = pd.concat([newStatement, newRow], ignore_index=True)
                    
                    #move on to the next row
                    sheeti+=1
                    statementi+=1
                    
                else:
                    maxi = min(sheeti+4,len(sheet))
                    
                    #next, before I append the final statement label, I want to check if the statement label corresponds to the next label for the sheet
                    #if it corresponds, then I will move the sheet up instead
                    if statement_label in list(sheet.iloc[sheeti:maxi].label): 
                        newStatement = pd.concat([newStatement,newRowSheet]) #concat the sheet row to the new statement
                        sheeti += 1
                        continue
                    
                    #if the two labels don't match, default is to add the row of the final statment
                    newStatement = pd.concat([newStatement,newRowStatement])
                    
                    statementi += 1
                    
                newStatement = newStatement.reset_index(drop=True)
                newStatement.index = newStatement.index+1
        
            #if either the finalstatement or the new sheet hasn't finished, then add the end of the statement to the new one
            newStatement = pd.concat([newStatement,sheet.iloc[sheeti:len(sheet)]])
            newStatement = pd.concat([newStatement,compiledStatement.iloc[statementi:len(compiledStatement)]])

            compiledStatement = newStatement
            
        compiledStatement = compiledStatement.reset_index(drop=True)
        return compiledStatement
    

In [207]:
class CompileIncomeStatement(HelperFunctions):
    def __init__(self,ticker,fromDate, endDate=2022):
        self.ticker = ticker
        self.fromDate = fromDate
        self.endDate = endDate
        
        self.path = os.path.join("input",f"Financial Statement {self.ticker}")
        self.input = os.path.join(self.path, f"Income Statements All-{self.ticker}.xlsx")
        self.Excel = self.readExcel()
        self.Excel = self.getExcelSheetsPostDate(self.Excel)
        
        self.compiledStatement = self.addData(self.Excel)
        self.compiledStatement = self.performIncomeMath(self.compiledStatement)
        self.compiledStatement = self.reorderQuarters(self.compiledStatement)
    
    def readExcel(self):
        Excel = pd.read_excel(self.input,sheet_name=None)
        return Excel
    
    
    
    def performIncomeMath(self,compiledStatement):
        quarters = self.getQuarters(compiledStatement)
        years = self.getUniqueYears(quarters)
        
        for year in years:
            year = str(year)
            if "Year Ended "+year in quarters and "Q3 "+year in quarters: 
                compiledStatement["Q4 "+year] = compiledStatement["Year Ended "+year] - compiledStatement["Q3 "+year]
                compiledStatement.loc[compiledStatement.label.str.startswith("weighted-average"),"Q4 "+year] = compiledStatement.loc[compiledStatement.label.str.startswith("weighted-average"),"Year Ended "+year]
        return compiledStatement

            

In [198]:
class Income:
    def __init__(self,ticker,fromDate):
        self.ticker = ticker
        self.fromDate = fromDate
        
        self.path = os.path.join("input",f"Financial Statement {self.ticker}")
        self.output = os.path.join(self.path, f"Compiled Income Statement-{self.ticker}.xlsx")
        
        self.createSeparateStatements()
        self.createStatement()
        
        self.writeExcel()
        
    def createSeparateStatements(self):
        self.separateCompiledStatement = pd.DataFrame()
        for year in range(self.fromDate,2023):
            statement = CompileIncomeStatement(self.ticker, year,year)
            self.separateCompiledStatement = pd.concat([self.separateCompiledStatement,statement.compiledStatement],axis=1)
    
    def createStatement(self):
        self.compiledStatement = CompileIncomeStatement(self.ticker, self.fromDate).compiledStatement
        
    def writeExcel(self):
        Excel = pd.ExcelWriter(self.output)
        self.compiledStatement.to_excel(Excel, index=False, sheet_name="Compiled Statement")
        self.separateCompiledStatement.to_excel(Excel, index=False, sheet_name="Separately Compiled Statement")
        
        Excel.save()
         
    

In [199]:
income = Income("GPS",2017)
income.compiledStatement



Unnamed: 0,label,Year Ended 2016,Q1 2017,Q2 2017,Q3 2017,Q4 2017,Year Ended 2017,Q1 2018,Q2 2018,Q3 2018,...,Q1 2021,Q2 2021,Q3 2021,Q4 2021,Year Ended 2021,Q1 2022,Q2 2022,Q3 2022,Q4 2022,Year Ended 2022
0,net sales,15797.0,3438.0,3851.0,3798.0,11718.0,15516.0,3440.0,3799.0,3838.0,...,2107.0,3275.0,3994.0,,,3991.0,4211.0,3943.0,16670000000.0,16670000000.0
1,revenues,,,,,,,,,,...,,,,,13800000000.0,,,,,
2,cost of goods sold and occupancy expenses,10077.0,2229.0,2414.0,2305.0,7571.0,9876.0,2137.0,2320.0,2313.0,...,1839.0,2126.0,2374.0,9094998000.0,9095000000.0,2361.0,2388.0,2282.0,10033000000.0,10033000000.0
3,gross profit,5720.0,1209.0,1437.0,1493.0,4147.0,5640.0,1303.0,1479.0,1525.0,...,268.0,1149.0,1620.0,4704998000.0,4705000000.0,1630.0,1823.0,1661.0,6636998000.0,6637000000.0
4,operating expenses,4196.0,987.0,1158.0,1104.0,3345.0,4449.0,1049.0,1028.0,1147.0,...,1512.0,1076.0,1445.0,5566999000.0,5567000000.0,1390.0,1414.0,1508.0,5826998000.0,5827000000.0
5,operating income,1524.0,222.0,279.0,389.0,802.0,1191.0,254.0,451.0,378.0,...,-1244.0,73.0,175.0,-862000200.0,-862000000.0,240.0,409.0,153.0,809999800.0,810000000.0
6,loss on extinguishment of debt,,,,,,,,,,...,,58.0,0.0,58000000.0,58000000.0,,0.0,325.0,324999700.0,325000000.0
7,interest expense,59.0,19.0,18.0,20.0,55.0,75.0,19.0,16.0,18.0,...,19.0,58.0,55.0,191999900.0,192000000.0,54.0,51.0,44.0,167000000.0,167000000.0
8,interest income,-6.0,-1.0,-2.0,-3.0,-5.0,-8.0,-3.0,-4.0,-4.0,...,-4.0,-2.0,-1.0,-9999999.0,-10000000.0,-1.0,-1.0,-1.0,-4999999.0,-5000000.0
9,income before income taxes,1471.0,204.0,263.0,372.0,752.0,1124.0,238.0,439.0,364.0,...,-1259.0,-41.0,121.0,-1102000000.0,-1102000000.0,187.0,359.0,-215.0,323000200.0,323000000.0


In [208]:
class CompileCashflowStatement(HelperFunctions):
    def __init__(self,ticker,fromDate,endDate=2022):
        self.ticker = ticker
        self.fromDate = fromDate
        self.endDate = endDate
        
        self.path = os.path.join("input",f"Financial Statement {self.ticker}")
        self.input = os.path.join(self.path, f"Cashflow Statements All-{self.ticker}.xlsx")
        self.output = os.path.join(self.path, f"Compiled Cashflow Statement-{self.ticker}.xlsx")
        
        
        self.Excel = self.readExcel()
        self.Excel = self.getExcelSheetsPostDate(self.Excel)
        
        self.compiledStatement = self.addData(self.Excel)

        self.compiledStatement = self.reorderQuarters(self.compiledStatement)

        
    def readExcel(self):
        Excel = pd.read_excel(self.input,sheet_name=None)
        return Excel
    
    
   
            

In [209]:
class Cashflow:
    def __init__(self,ticker,fromDate):
        self.ticker = ticker
        self.fromDate = fromDate
        
        self.path = os.path.join("input",f"Financial Statement {self.ticker}")
        self.output = os.path.join(self.path, f"Compiled Cashflow Statement-{self.ticker}.xlsx")
        
        self.createSeparateStatements()
        self.createStatement()
        
        self.writeExcel()
        
    def createSeparateStatements(self):
        self.separateCompiledStatement = pd.DataFrame()
        for year in range(self.fromDate,2023):
            statement = CompileCashflowStatement(self.ticker, year,year)
            self.separateCompiledStatement = pd.concat([self.separateCompiledStatement,statement.compiledStatement],axis=1)
    
    def createStatement(self):
        self.compiledStatement = CompileCashflowStatement(self.ticker, self.fromDate).compiledStatement
        
    def writeExcel(self):
        Excel = pd.ExcelWriter(self.output)
        self.compiledStatement.to_excel(Excel, index=False, sheet_name="Compiled Statement")
        self.separateCompiledStatement.to_excel(Excel, index=False, sheet_name="Separately Compiled Statement")
        
        Excel.save()
         
    

In [210]:
cashflow = Cashflow("XOM",2020)
cashflow.compiledStatement



Year Ended 2021
Year Ended 2021
Q1 2022
Year Ended 2021
Q1 2022


Unnamed: 0,label,Year Ended 2021,Q1 2022
0,cash flows from operating activities,,
1,net income including noncontrolling interests,23598.0,5750.0
2,adjustments for noncash transactions,,
3,depreciation and depletion,20607.0,8883.0
4,deferred income tax charges,303.0,
5,postretirement benefits expense in excess of n...,754.0,
6,other long-term obligation provisions in exces...,50.0,
7,dividends received greater than equity in curr...,-668.0,
8,"changes in operational working capital, exclud...",,1086.0
9,reduction - notes and accounts receivable,-12098.0,


In [191]:
class CompileBalanceStatement(HelperFunctions):
    def __init__(self,ticker,fromDate,endDate=2022):
        self.ticker = ticker
        self.fromDate = fromDate
        self.endDate = endDate
        
        self.path = os.path.join("input",f"Financial Statement {self.ticker}")
        self.input = os.path.join(self.path, f"Balance Statements All-{self.ticker}.xlsx")
        self.output = os.path.join(self.path, f"Compiled Balance Statement-{self.ticker}.xlsx")
        
        
        self.Excel = self.readExcel()
        self.Excel = self.getExcelSheetsPostDate(self.Excel)
        
        self.compiledStatement = self.addData(self.Excel)

        self.compiledStatement = self.reorderQuarters(self.compiledStatement)

        
    def readExcel(self):
        Excel = pd.read_excel(self.input,sheet_name=None)
        return Excel
    
    
   
            

In [192]:
class Balance:
    def __init__(self,ticker,fromDate):
        self.ticker = ticker
        self.fromDate = fromDate
        
        self.path = os.path.join("input",f"Financial Statement {self.ticker}")
        self.output = os.path.join(self.path, f"Compiled Balance Statement-{self.ticker}.xlsx")
        
        self.createSeparateStatements()
        self.createStatement()
        
        self.writeExcel()
        
    def createSeparateStatements(self):
        self.separateCompiledStatement = pd.DataFrame()
        for year in range(self.fromDate,2023):
            statement = CompileBalanceStatement(self.ticker, year,year)
            self.separateCompiledStatement = pd.concat([self.separateCompiledStatement,statement.compiledStatement],axis=1)
    
    def createStatement(self):
        self.compiledStatement = CompileBalanceStatement(self.ticker, self.fromDate).compiledStatement
        
    def writeExcel(self):
        Excel = pd.ExcelWriter(self.output)
        self.compiledStatement.to_excel(Excel, index=False, sheet_name="Compiled Statement")
        self.separateCompiledStatement.to_excel(Excel, index=False, sheet_name="Separately Compiled Statement")
        
        Excel.save()
         
    

In [194]:
balance = Balance("WMT",2020)
balance.compiledStatement



Unnamed: 0,label,Year Ended 2019,Q1 2020,Q2 2020,Q3 2020,Year Ended 2020,Q1 2021,Q2 2021,Q3 2021,Year Ended 2021,Q1 2022,Q2 2022,Q3 2022,Year Ended 2022
0,current assets:,,,,,,,,,,,,,
1,cash and cash equivalents,7722.0,9255.0,9283.0,8606.0,9465.0,14930.0,16906.0,14325.0,17741.0,22846.0,22831.0,16111.0,14760.0
2,"receivables, net",6283.0,5342.0,5382.0,5612.0,6284.0,5029.0,5111.0,5770.0,6516.0,5797.0,6103.0,7349.0,8280.0
3,inventories,44269.0,44751.0,44134.0,51546.0,44435.0,41217.0,41084.0,51842.0,44949.0,46383.0,47754.0,57484.0,56511.0
4,prepaid expenses and other,3623.0,2391.0,2572.0,2148.0,1622.0,2152.0,1895.0,1665.0,20861.0,1565.0,1555.0,2020.0,1519.0
5,total current assets,61897.0,61739.0,61371.0,67912.0,61806.0,63328.0,64996.0,73602.0,90067.0,76591.0,78243.0,82964.0,81070.0
6,property and equipment:,,,,,,,,,,,,,
7,property and equipment,185810.0,,,,,,,,,,,,
8,less accumulated depreciation,-81493.0,,,,,,,,,,,,
9,"property and equipment, net",104317.0,104604.0,104674.0,104326.0,105208.0,101872.0,101182.0,102232.0,92201.0,90996.0,91621.0,92242.0,94515.0
