In [102]:
from bs4 import BeautifulSoup
import requests


headers = {"User-agent":"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36"}
# Obtain HTML for search page
base_url = "https://www.sec.gov/Archives/edgar/data/320193/000032019319000010/a10-qq1201912292018.htm"
edgar_resp = requests.get(base_url, headers=headers)
edgar_str = edgar_resp.text


soup = BeautifulSoup(edgar_str, 'html.parser')
s =  soup.find(['span','div'], recursive=True, string='Products and Services Performance')
t = s.find_next('table')
trs = t.find_all('tr')
table = [list(tr.stripped_strings) for tr in trs if tr.text]
table
        

[['Three Months Ended'],
 ['December\xa029,', '2018', 'December\xa030,', '2017', 'Change'],
 ['Net sales by category:'],
 ['iPhone', '(1)', '$', '51,982', '$', '61,104', '(15', ')%'],
 ['Mac', '(1)', '7,416', '6,824', '9', '%'],
 ['iPad', '(1)', '6,729', '5,755', '17', '%'],
 ['Wearables, Home and Accessories', '(1)(2)', '7,308', '5,481', '33', '%'],
 ['Services', '(3)', '10,875', '9,129', '19', '%'],
 ['Total net sales', '$', '84,310', '$', '88,293', '(5', ')%']]

In [88]:
import pandas as pd
import os
import openpyxl
import numpy as np
import re
from datetime import datetime
import requests
import shutil


In [89]:

class SECSubmissions:
    def __init__(self,ticker):
        self.ticker = ticker 
        self.CIK = self.getCIK()
        self.filings = self.getSECFilings(self.CIK)
        self.filings = self.findDate(self.filings)
        self.fiscalYear = self.findFiscalYear(self.filings)
        self.filings = self.mergeFiscalYearWithFilings(self.filings, self.fiscalYear)
        self.filings = self.createSubmissionName(self.filings)
        
    def getCIK(self): #get SEC code for company based on ticker
        headers={"User-Agent": "Mozilla/5.0"}
        symbol_to_cik = requests.get("https://www.sec.gov/files/company_tickers.json").json() #returns a json dictionary with a indexed list of all different companies 
        ciks = {info["ticker"]:info["cik_str"] for key,info in symbol_to_cik.items()} #create dictionary indexable by ticker
        return ciks[self.ticker]
    
        
    #input: CIK
    #request the data on every submission to SEC website for the company
    def getSECFilings(self,CIK):
        headers={"User-Agent": "Mozilla/5.0"}
        edgar_filings = requests.get(f"https://data.sec.gov/submissions/CIK{CIK:0>10}.json", headers=headers).json()
        filings = pd.DataFrame(edgar_filings["filings"]["recent"])
        filings = filings.loc[filings["reportDate"]>"2014-01-01"]#filings pre-2014 are unable to be downloaded due to different excel format
        filings = filings.loc[filings["form"].isin(["10-Q","10-K"])].reset_index(drop=True) #drop all filings that are not 10Q or 10K
        return filings 
    
    #input filings
    #output filings with mo and year
    def findDate(self,filings):
        filings["Month"] = pd.to_datetime(filings["reportDate"]).dt.strftime("%m").astype("int") #get Month of all filings 
        filings["Year"] = pd.to_datetime(filings["reportDate"]).dt.strftime("%Y").astype("int") #get Year of all filings
        return filings
    
    #input a list of 10k filings with mo and year
    #find one fiscal year of data via 10k, then find all possible months filed and organize to fiscal period
    def findFiscalYear(self,filings):
        #get a fiscal Year of data by finding the last 10K and the three 10Qs before it
        TenKIndex = filings.loc[filings["form"]=="10-K"].index[0] 
        fiscalYearKey = filings.loc[TenKIndex:TenKIndex+3,["Month","form"]].copy().reset_index(drop=True)
        fiscalYearKey["Period"] = ["Year Ended", "Q3","Q2","Q1"]
        
        #fiscal year data for certain period can vary between months ie. both Jan & Feb, therefore I found all possible months of filings and found appropriate period for each month
        fiscalYear = pd.DataFrame({"Month":filings.Month.unique()})
        fiscalYear.index = list(fiscalYear.Month.apply(lambda x: np.argmin(np.abs(fiscalYearKey.Month-x)))) #using fiscal year key, I found the index of the period in fiscalYearKey with closest month to each month 
        fiscalYear = pd.merge(fiscalYear,fiscalYearKey, left_index=True, right_index=True, suffixes=("","_x")) #I merged together together all of the possible months with their corresponding period based on the key
        fiscalYear = fiscalYear.drop(columns="Month_x")
        
        return fiscalYear
    
    #input filings of 10k/q
    #merge by month to find which fiscal period, if filing of 10q is later in year than 10k, then adjust year so it is a part of right fiscal year
    def mergeFiscalYearWithFilings(self, filings, fiscalYear):
        print(fiscalYear)
        #merge fiscal year labels with filings by the month that they were reported
        filings = pd.merge(filings,fiscalYear,on=["Month","form"],how="left")    
        #set Fiscal Year
        filings["Fiscal Year"] = filings["Year"]
        #get Quarters that are a part of different fiscal Year than report date
        TenKMo = fiscalYear.loc[fiscalYear["form"]=="10-K"]["Month"].iloc[0]
        filings.loc[(filings["Month"]>TenKMo)&(filings["form"]!="10-K"),"Fiscal Year"] += 1 #the 10K is reported in a month before the 10Q so therefore the 10Q is the next fiscal year 
        
        return filings
    
    

    #input filings
    #take the fiscal period and year and create a string, if different fiscal Year than filing date use parenthesis
    def createSubmissionName(self,filings):
        filings["Name"] = filings.apply(self.submissionName,axis=1)
        return filings
        
    def submissionName(self,row):
        period = row["Period"]
        fiscalYear = row["Fiscal Year"]
        Year = row["Year"]
        if fiscalYear != Year:
                name = f"{period} {fiscalYear} ({Year})"
        else: 
                name = f"{period} {fiscalYear}"
        return name
    
    
    
        

In [103]:
class WriteSECProductStatements(SECSubmissions):
    def __init__(self,ticker):
        super().__init__(ticker)
        self.path =  os.path.join("../input",f"Financial Statement {self.ticker}")
        self.file = os.path.join(self.path,"Sales By Segment")
        
        self.writeProductSalesData(self.filings)
    
    def parseStatementForSalesTable(self,edgar_str):
        
        soup = BeautifulSoup(edgar_str, 'html.parser')
        s =  soup.find(['span','div'], recursive=True, string='Products and Services Performance')
        t = s.find_next('table')
        trs = t.find_all('tr')
        table = [list(tr.stripped_strings) for tr in trs if tr.text]
        table = [[x for x in row if not bool(re.search('[$%\(]',x))] for row in table]
        df = pd.DataFrame(table[3:]).iloc[:,[0,1,3]]
        df.columns = ["Product","Q1 2020","Change"]
        return df 
    #input: filings
    #iterate through and use the accession number to grab filings from SEC, compile into excel spreadsheet
    def writeProductSalesData(self,filings):
        finalSales = pd.DataFrame(columns=["Product"])
        for i,row in self.filings.iterrows():
            name = row["Name"]
            print(name)
            accessionNum = row["accessionNumber"].replace("-","")
            doc = row["primaryDocument"]
            url = f"https://www.sec.gov/Archives/edgar/data/{self.CIK}/{accessionNum}/{doc}"
            print(url)
            req = requests.get(url,headers={"User-Agent": "Mozilla/5.0"})
            edgar_str = req.text
            sales = self.parseStatementForSalesTable(edgar_str)
            pd.merge(finalSales, sales, on="Product", how="outer")

        finalSales.to_excel(self.file)
            
        

In [104]:
WriteSECProductStatements("AAPL")

   Month  form      Period
0      9  10-K  Year Ended
1      6  10-Q          Q3
1      7  10-Q          Q3
2      3  10-Q          Q2
2      4  10-Q          Q2
3     12  10-Q          Q1
Q3 2022
https://www.sec.gov/Archives/edgar/data/320193/000032019322000070/aapl-20220625.htm
Q2 2022
https://www.sec.gov/Archives/edgar/data/320193/000032019322000059/aapl-20220326.htm
Q1 2022 (2021)
https://www.sec.gov/Archives/edgar/data/320193/000032019322000007/aapl-20211225.htm
Year Ended 2021
https://www.sec.gov/Archives/edgar/data/320193/000032019321000105/aapl-20210925.htm
Q3 2021
https://www.sec.gov/Archives/edgar/data/320193/000032019321000065/aapl-20210626.htm
Q2 2021
https://www.sec.gov/Archives/edgar/data/320193/000032019321000056/aapl-20210327.htm
Q1 2021 (2020)
https://www.sec.gov/Archives/edgar/data/320193/000032019321000010/aapl-20201226.htm
Year Ended 2020
https://www.sec.gov/Archives/edgar/data/320193/000032019320000096/aapl-20200926.htm
Q3 2020
https://www.sec.gov/Archives/edgar/da

AttributeError: 'NoneType' object has no attribute 'find_next'

In [86]:
# x=0
# for tag in soup.find_all(['table']):
#     print(tag.name)
#     print("\n")
#     if x == 10:
#         break
#     x += 1
#     print(tag)

In [132]:
data = []
table = soup.find_all('table')[10]

for table in soup.find_all('table'):
    trs = table.find_all('tr')
    table = [list(tr.stripped_strings) for tr in trs if tr.text]

    y = ""
    for x in table: 
        for l in x:
            y+=l
    if "iPhone" in y:
        print("HERE")
        print(table)

# trs = table.find_all('tr')
# table = [list(tr.stripped_strings) for tr in trs if tr.text]

HERE
[['Three Months Ended'], ['December\xa029,', '2018', 'December\xa030,', '2017'], ['iPhone', '(1)', '$', '51,982', '$', '61,104'], ['Mac', '(1)', '7,416', '6,824'], ['iPad', '(1)', '6,729', '5,755'], ['Wearables, Home and Accessories', '(1)(2)', '7,308', '5,481'], ['Services', '(3)', '10,875', '9,129'], ['Total net sales', '(4)', '$', '84,310', '$', '88,293']]
HERE
[['Three Months Ended'], ['December\xa029,', '2018', 'December\xa030,', '2017', 'Change'], ['Net sales by category:'], ['iPhone', '(1)', '$', '51,982', '$', '61,104', '(15', ')%'], ['Mac', '(1)', '7,416', '6,824', '9', '%'], ['iPad', '(1)', '6,729', '5,755', '17', '%'], ['Wearables, Home and Accessories', '(1)(2)', '7,308', '5,481', '33', '%'], ['Services', '(3)', '10,875', '9,129', '19', '%'], ['Total net sales', '$', '84,310', '$', '88,293', '(5', ')%']]


In [117]:
data

[]

In [124]:
y

'Three Months EndedDecember\xa029,2018December\xa030,2017Net sales:Products$73,435$79,164Services10,8759,129Total net sales84,31088,293Cost of sales:Products48,23850,575Services4,0413,806Total cost of sales52,27954,381Gross margin32,03133,912Operating expenses:Research and development3,9023,407Selling, general and administrative4,7834,231Total operating expenses8,6857,638Operating income23,34626,274Other income/(expense), net560756Income before provision for income taxes23,90627,030Provision for income taxes3,9416,965Net income$19,965$20,065Earnings per share:Basic$4.22$3.92Diluted$4.18$3.89Shares used in computing earnings per share:Basic4,735,8205,112,877Diluted4,773,2525,157,787'