In [1]:
import pandas as pd
import os
import openpyxl
import numpy as np
import re
from datetime import datetime
import requests

In [73]:
class HelperFunctions():
    def getYears(self,quarters): #go through each file name and find the year of filing
        return [int(re.findall("\d{4}",quarter)[0]) for quarter in quarters]
    def getUniqueYears(self,quarters):
        return set(self.getYears(quarters))
    
    def getExcelSheetsPostDate(self,Excel): #use IncomeExcel dictionary to get all dates and only keep where date is later than year specified 
        quarters = Excel.keys()
        fileDates = self.getYears(quarters) 
        files = [file for file, date in zip(Excel, fileDates) if date>self.fromDate or (date==self.fromDate and "Year Ended" in file)]
        Excel = {file:Excel[file] for file in files}
        return Excel
        
    def reorderQuarters(self,compiledStatement):
        quarters = self.getQuarters(compiledStatement)
        years = self.getUniqueYears(quarters)
        cols = [q + " " + str(year) for year in years for q in ["Q1","Q2","Q3","Q4","Year Ended"] if q + " " + str(year) in quarters]
        compiledStatement = compiledStatement[["label"] + cols]
        return compiledStatement
        
    def getQuarters(self, compiledStatement):
        return list(compiledStatement.columns[1:])
    
    
    def cleanup_label(self, label):
        new_label = label.lower()
        new_label = new_label.replace("gain","loss").replace("decrease","increase")
        new_label = re.sub("\s\([\w\W]+?\)","",new_label).replace("  "," ").replace("—"," - ").strip()
        new_label = re.sub(" \$|\s*\d+\,*", "", new_label) 
        return new_label
    

In [78]:
class Income(HelperFunctions):
    def __init__(self,ticker,fromDate):
        self.ticker = ticker
        self.fromDate = fromDate
        
        self.path = os.path.join("input",f"Financial Statement {self.ticker}")
        self.input = os.path.join(self.path, f"Income Statements All-{self.ticker}.xlsx")
        self.output = os.path.join(self.path, f"Compiled Income Statement-{self.ticker}.xlsx")
        
        
        self.Excel = self.readExcel()
        self.Excel = self.getExcelSheetsPostDate(self.Excel)
        
        self.compiledStatement = self.addData(self.Excel)
        self.performIncomeMath()
        self.compiledStatement = self.reorderQuarters(self.compiledStatement)
        self.writeExcel()
        
    def readExcel(self):
        Excel = pd.read_excel(self.input,sheet_name=None)
        return Excel
    
    def writeExcel(self):
        Excel = pd.ExcelWriter(self.output)
        self.compiledStatement.to_excel(Excel, index=False, sheet_name="Compiled Statement")
        
        Excel.save()
        
    def addData(self, Excel):
        compiledStatement = pd.DataFrame(columns=["label"])
        
        for quarter,sheet in list(Excel.items()):
            quarter =  re.sub("\s\([\w\W]+?\)","",quarter) #remove the parenthesis ie. (2021) from the quarter
            sheet.columns = ["label",quarter]
            sheet.label = sheet.label.apply(self.cleanup_label)
            
            #sheet = sheet.dropna(axis=0, how="any")
           
            #sheet = sheet[~sheet[quarter].astype("string").str.contains("[a-zA-Z]",regex=True)]

            sheeti = statementi = 0
            newStatement = pd.DataFrame(columns=["label"])
            #merge the current finalstatement and new sheet into one statement 
            #go through each row of the two statements and keep track of the indexes of each statement
            #continue until one of either the compiled statement or sheet ends
            while sheeti < len(sheet) and statementi < len(compiledStatement):
              
                sheet_label = sheet.iloc[sheeti].label
                statement_label = compiledStatement.iloc[statementi].label
                newRowStatement = compiledStatement.iloc[statementi:statementi+1].reset_index(drop=True) #grab the row and drop the index so that they merge
                newRowSheet = sheet.iloc[sheeti:sheeti+1].reset_index(drop=True)
                
                #check if the labels are the same 
                if statement_label == sheet_label:
                    #if labels are the same then create a new row by concating the rows
                    newRowSheet = newRowSheet.drop(columns="label")
                    newRow = pd.concat([newRowStatement,newRowSheet],axis=1)
                    
                    #concat the newRow to the newstatement
                    newStatement = pd.concat([newStatement, newRow])
                    #move on to the next row
                    sheeti+=1
                    statementi+=1
                    
                else:
                    maxi = min(sheeti+4,len(sheet))
                    
                    #next, before I append the final statement label, I want to check if the statement label corresponds to the next label for the sheet
                    #if it corresponds, then I will move the sheet up instead
                    if statement_label in list(sheet.iloc[sheeti:maxi].label): 
                        newStatement = pd.concat([newStatement,newRowSheet]) #concat the sheet row to the new statement
                        sheeti += 1
                        continue
                    
                    #if the two labels don't match, default is to add the row of the final statment
                    newStatement = pd.concat([newStatement,newRowStatement])
                    statementi += 1
                    
            #if either the finalstatement or the new sheet hasn't finished, then add the end of the statement to the new one
            newStatement = pd.concat([newStatement,sheet.iloc[sheeti:len(sheet)]])
            newStatement = pd.concat([newStatement,compiledStatement.iloc[statementi:len(compiledStatement)]])
            
            compiledStatement = newStatement
            
        compiledStatement = compiledStatement.reset_index(drop=True)
        return compiledStatement
    
    def performIncomeMath(self):
        quarters = self.getQuarters(self.compiledStatement)
        years = self.getUniqueYears(quarters)
        
        for year in years:
            year = str(year)
            if "Year Ended "+year in quarters and "Q3 "+year in quarters: 
                self.compiledStatement["Q4 "+year] = self.compiledStatement["Year Ended "+year] - self.compiledStatement["Q3 "+year]
                self.compiledStatement.loc[self.compiledStatement.label.str.startswith("weighted-average"),"Q4 "+year] = self.compiledStatement.loc[self.compiledStatement.label.str.startswith("weighted-average"),"Year Ended "+year]


            

In [82]:
income = Income("WMT",2015)
income.compiledStatement



Unnamed: 0,label,Q3 2018,Q4 2018,Year Ended 2018,Q1 2019,Q2 2019,Q3 2019,Q4 2019,Year Ended 2019,Q1 2020,...,Q2 2021,Q3 2021,Q4 2021,Year Ended 2021,Q1 2022,Q2 2022,Q3 2022,Q4 2022,Year Ended 2022,Q1 2023
0,revenues:,,,,,,,,,,...,,,,,,,,,,
1,net sales,122136.0,373625.0,495761.0,121630.0,127059.0,123897.0,386432.0,510329.0,122949.0,...,136824.0,133752.0,421481.0,555233.0,137159.0,139871.0,139207.0,428555.0,567762.0,140288.0
2,membership and other income,1043.0,3539.0,4582.0,1060.0,969.0,997.0,3079.0,4076.0,976.0,...,918.0,956.0,2962.0,3918.0,1151.0,1177.0,1318.0,3674.0,4992.0,1281.0
3,total revenues,123179.0,377164.0,500343.0,122690.0,128028.0,124894.0,389511.0,514405.0,123925.0,...,137742.0,134708.0,424443.0,559151.0,138310.0,141048.0,140525.0,432229.0,572754.0,141569.0
4,costs and expenses:,,,,,,,,,,...,,,,,,,,,,
5,cost of sales,91547.0,281849.0,373396.0,91707.0,95571.0,93116.0,292185.0,385301.0,93034.0,...,102689.0,100339.0,319976.0,420315.0,103272.0,105183.0,105023.0,323977.0,429000.0,106847.0
6,"operating, selling, general and administrative...",26868.0,79642.0,106510.0,25829.0,26707.0,26792.0,80355.0,107147.0,25946.0,...,28994.0,28591.0,87697.0,116288.0,28129.0,28511.0,29710.0,88102.0,117812.0,29404.0
7,operating income,4764.0,15673.0,20437.0,5154.0,5750.0,4986.0,16971.0,21957.0,4945.0,...,6059.0,5778.0,16770.0,22548.0,6909.0,7354.0,5792.0,20150.0,25942.0,5318.0
8,interest:,,,,,,,,,,...,,,,,,,,,,
9,debt,502.0,1476.0,1978.0,437.0,460.0,501.0,1474.0,1975.0,588.0,...,577.0,455.0,1521.0,1976.0,481.0,437.0,408.0,1266.0,1674.0,372.0


In [88]:
class Cashflow(HelperFunctions):
    def __init__(self,ticker,fromDate):
        self.ticker = ticker
        self.fromDate = fromDate
        
        self.path = os.path.join("input",f"Financial Statement {self.ticker}")
        self.input = os.path.join(self.path, f"Cashflow Statements All-{self.ticker}.xlsx")
        self.output = os.path.join(self.path, f"Compiled Cashflow Statement-{self.ticker}.xlsx")
        
        
        self.Excel = self.readExcel()
        self.Excel = self.getExcelSheetsPostDate(self.Excel)
        
        self.compiledStatement = self.addData(self.Excel)

        self.compiledStatement = self.reorderQuarters(self.compiledStatement)
        self.writeExcel()
        
    def readExcel(self):
        Excel = pd.read_excel(self.input,sheet_name=None)
        return Excel
    
    def writeExcel(self):
        Excel = pd.ExcelWriter(self.output)
        self.compiledStatement.to_excel(Excel, index=False, sheet_name="Compiled Statement")
        
        Excel.save()
        
    def addData(self, Excel):
        compiledStatement = pd.DataFrame(columns=["label"])
        
        for quarter,sheet in list(Excel.items()):
            quarter =  re.sub("\s\([\w\W]+?\)","",quarter) #remove the parenthesis ie. (2021) from the quarter
            sheet.columns = ["label",quarter]
            sheet.label = sheet.label.apply(self.cleanup_label)
            
            #sheet = sheet.dropna(axis=0, how="any")
           
            #sheet = sheet[~sheet[quarter].astype("string").str.contains("[a-zA-Z]",regex=True)]

            sheeti = statementi = 0
            newStatement = pd.DataFrame(columns=["label"])
            #merge the current finalstatement and new sheet into one statement 
            #go through each row of the two statements and keep track of the indexes of each statement
            #continue until one of either the compiled statement or sheet ends
            while sheeti < len(sheet) and statementi < len(compiledStatement):
              
                sheet_label = sheet.iloc[sheeti].label
                statement_label = compiledStatement.iloc[statementi].label
                newRowStatement = compiledStatement.iloc[statementi:statementi+1].reset_index(drop=True) #grab the row and drop the index so that they merge
                newRowSheet = sheet.iloc[sheeti:sheeti+1].reset_index(drop=True)
                
                #check if the labels are the same 
                if statement_label == sheet_label:
                    #if labels are the same then create a new row by concating the rows
                    newRowSheet = newRowSheet.drop(columns="label")
                    newRow = pd.concat([newRowStatement,newRowSheet],axis=1)
                    
                    #concat the newRow to the newstatement
                    newStatement = pd.concat([newStatement, newRow])
                    #move on to the next row
                    sheeti+=1
                    statementi+=1
                    
                else:
                    maxi = min(sheeti+4,len(sheet))
                    
                    #next, before I append the final statement label, I want to check if the statement label corresponds to the next label for the sheet
                    #if it corresponds, then I will move the sheet up instead
                    if statement_label in list(sheet.iloc[sheeti:maxi].label): 
                        newStatement = pd.concat([newStatement,newRowSheet]) #concat the sheet row to the new statement
                        sheeti += 1
                        continue
                    
                    #if the two labels don't match, default is to add the row of the final statment
                    newStatement = pd.concat([newStatement,newRowStatement])
                    statementi += 1
                    
            #if either the finalstatement or the new sheet hasn't finished, then add the end of the statement to the new one
            newStatement = pd.concat([newStatement,sheet.iloc[sheeti:len(sheet)]])
            newStatement = pd.concat([newStatement,compiledStatement.iloc[statementi:len(compiledStatement)]])
            
            compiledStatement = newStatement
            
        compiledStatement = compiledStatement.reset_index(drop=True)
        return compiledStatement
    
   
            

In [94]:
cashflow = Cashflow("WMT",2020)
cashflow.compiledStatement



Unnamed: 0,label,Year Ended 2020,Q1 2021,Q2 2021,Q3 2021,Year Ended 2021,Q1 2022,Q2 2022,Q3 2022,Year Ended 2022,Q1 2023
0,cash flows from operating activities:,,,,,,,,,,
1,consolidated net income,15201.0,4074.0,10513.0,15714.0,13706.0,2811.0,7175.0,10307.0,13940.0,2103.0
2,adjustments to reconcile income from continuin...,,,,,,,,,,
3,adjustments to reconcile consolidated net inco...,,,,,,,,,,
4,depreciation and amortization,10987.0,2791.0,5562.0,8333.0,11152.0,2661.0,5302.0,7952.0,10658.0,2680.0
5,unrealized and losses,-1886.0,-783.0,-4006.0,-6883.0,,,,,,
6,(losss) and losses for disposal of business op...,15.0,,,,,,,,,
7,net unrealized and realized and losses,,,,,-8589.0,2077.0,3019.0,1831.0,2440.0,1989.0
8,losses on disposal of business operations,,,,,8401.0,433.0,433.0,433.0,433.0,0.0
9,asda pension contribution,-1036.0,,,,0.0,,,,0.0,
