In [3]:
import pandas as pd
import os
import openpyxl
import numpy as np
import re
from datetime import datetime
import requests

In [4]:
class HelperFunctions():
    def getYears(self,quarters): #go through each file name and find the year of filing
        return [int(re.findall("\d{4}",quarter)[0]) for quarter in quarters]
    def getUniqueYears(self,quarters):
        return set(self.getYears(quarters))
    
    def getExcelSheetsPostDate(self,Excel): #use IncomeExcel dictionary to get all dates and only keep where date is later than year specified 
        quarters = Excel.keys()
        fileDates = self.getYears(quarters) 
        files = [file for file, date in zip(Excel, fileDates) if date>self.fromDate or (date==self.fromDate and "Year Ended" in file)]
        Excel = {file:Excel[file] for file in files}
        return Excel
        
    def reorderQuarters(self):
        quarters = self.getQuarters()
        years = self.getUniqueYears(quarters)
        cols = [q + " " + str(year) for year in years for q in ["Q1","Q2","Q3","Q4","Year Ended"] if q + " " + str(year) in quarters]
        self.compiledStatement = self.compiledStatement[["label"] + cols]
        
    def getQuarters(self):
        return list(self.compiledStatement.columns[1:])
    
    
    def cleanup_label(self, label):
        new_label = label.lower()
        new_label = new_label.replace("gain","loss").replace("decrease","increase")
        new_label = re.sub("\s\([\w\W]+?\)","",new_label).replace("  "," ").replace("—"," - ").strip()
        new_label = re.sub(" \$|\s*\d+\,*", "", new_label) 
        return new_label
    

In [205]:
class Income(HelperFunctions):
    def __init__(self,ticker,fromDate):
        
        self.ticker = ticker
        self.fromDate = fromDate
        self.compiledStatement = pd.DataFrame(columns=["label"])
        
        self.IncomeExcel = self.readIncomeExcel()
        self.IncomeExcel = self.getExcelSheetsPostDate(self.IncomeExcel)
        
        self.compiledStatement = self.addData()
#         self.performIncomeMath()
#         self.reorderQuarters()
        
    def readIncomeExcel(self):
        self.path = os.path.join("input",f"Financial Statement {self.ticker}")
        file = os.path.join(self.path, f"Income Statements All-{self.ticker}.xlsx")
        IncomeExcel = pd.read_excel(file,sheet_name=None)
        return IncomeExcel
    
    def addData(self):
        compiledStatement = pd.DataFrame(columns=["label","value"])
        for quarter,sheet in list(self.IncomeExcel.items()):
            print(quarter)
            quarter =  re.sub("\s\([\w\W]+?\)","",quarter) #remove the parenthesis ie. (2021) from the quarter
            sheet.columns = ["label",quarter]
            sheet.label = sheet.label.apply(self.cleanup_label)
            
            new_sheet = sheet.reset_index()
            new_sheet["index 0"] = sheet.index
#             new_sheet["index 1"] = sheet.index+1
#             new_sheet["index 2"] = sheet.index-1
#             new_sheet["index 3"] = sheet.index+2
#             new_sheet["index 4"] = sheet.index
            
            new_sheet = pd.melt(new_sheet, id_vars=["label", quarter, "index"], value_vars=["index 0"])
            new_sheet = new_sheet.drop(columns="variable")

            compiledStatement = pd.merge(compiledStatement, new_sheet, on=["value","label"], how="left")

            for i, row in sheet.iterrows():
                if i not in list(compiledStatement["index"]):
                    row["value"] = i
                    compiledStatement = pd.concat([compiledStatement, pd.DataFrame(row).T],axis=0)

            compiledStatement = compiledStatement.drop(columns="index")
        
        compiledStatement = compiledStatement.sort_values(by="value")
        compiledStatement = compiledStatement.drop_duplicates()
        return compiledStatement
            
    def performIncomeMath(self):
        quarters = self.getQuarters()
        years = self.getUniqueYears(quarters)
        
        for year in years:
            year = str(year)
            if "Year Ended "+year in quarters and "Q3 "+year in quarters: 
                
                self.compiledStatement["Q4 "+year] = self.compiledStatement["Year Ended "+year] - self.compiledStatement["Q3 "+year]
                self.compiledStatement.loc[self.compiledStatement.label.str.startswith("weighted-average"),"Q4 "+year] = self.compiledStatement.loc[self.compiledStatement.label.str.startswith("weighted-average"),"Year Ended "+year]


            

In [206]:
income = Income("TSLA",2019)
finalIncome = income.compiledStatement
finalIncome.to_excel("try.xlsx")
income.compiledStatement

Year Ended 2019
Q1 2020
Q2 2020
Q3 2020
Year Ended 2020
Q1 2021
Q2 2021
Q3 2021
Year Ended 2021
Q1 2022
Q2 2022


Unnamed: 0,label,Year Ended 2019,value,Q1 2020,Q2 2020,Q3 2020,Year Ended 2020,Q1 2021,Q2 2021,Q3 2021,Year Ended 2021,Q1 2022,Q2 2022
0,revenues,,0,,,,,,,,,,
66,total revenues,,1,,,,,10389.0,11958.0,13757.0,53823.0,18756.0,16934
1,automotive leasing,869.0,1,239.0,268.0,265.0,1052.0,,,,,,
2,total automotive revenues,20821.0,2,5132.0,5179.0,7611.0,27236.0,,,,,,
67,cost of revenues,,2,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
113,cost of revenues,,52,,,,,,,,3906.0,,
114,gross profit,,53,,,,,-101.0,20.0,,,,
146,total revenues,,53,,,,,,,806.0,,,
147,cost of revenues,,54,,,,,,,,,,


In [159]:
income.c

Unnamed: 0,label,Q3 2018,value,Year Ended 2018,index
0,revenue,,0,110360.0,0.0
1,product,15114.0,1,,
2,service and other,11705.0,2,,
3,total revenue,26819.0,3,,
4,cost of revenue,,4,,
5,product,3425.0,5,,
6,service and other,5844.0,6,,
7,total cost of revenue,9269.0,7,,
8,gross margin,17550.0,8,,
9,research and development,3715.0,9,,


In [133]:
income.sheet

Unnamed: 0,label,Q1 2020
0,revenue,33055.0
1,cost of revenue,10406.0
2,gross margin,22649.0
3,research and development,4565.0
4,sales and marketing,4337.0
5,general and administrative,1061.0
6,operating income,12686.0
7,"other income, net",0.0
8,income before income taxes,12686.0
9,provision for income taxes,2008.0
