In [1]:
import pandas as pd
import requests
import json
import datetime
import time
from dateutil.relativedelta import relativedelta
from sqlalchemy import create_engine
from sqlalchemy.types import Integer, Text, String, Float, Date

In [2]:
SP_fname = "../data/SP500.csv"
sp500 = pd.read_csv(SP_fname, parse_dates = ["Quarter"])
sp500.head()

Unnamed: 0,Symbol,Name,Sector,Quarter
0,A,Agilent Technologies Inc,Health Care,2019-07-31
1,AAL,American Airlines Group,Industrials,2019-06-30
2,AAP,Advance Auto Parts,Consumer Discretionary,2019-06-30
3,AAPL,Apple Inc.,Information Technology,2019-06-30
4,ABBV,AbbVie Inc.,Health Care,2019-06-30


In [3]:
# Note: The Application Key will expire on 9/27/19
apiKey = "281abf0fbdfc022faded1b1495bb6ff6"

database_name = "../data/CompanyData.sqlite"
engine = create_engine(f"sqlite:///{database_name}", echo=False)
columns={'monthend_date': Date, 'ticker': String(8), 'cash': Float, 'st_debt':Float, 'lt_debt':Float, 'equity':Float, \
         'revenue':Float, 'ebit':Float, 'net_income':Float, 'basic_eps':Float, 'diluted_eps':Float, 'shares': Float, \
         'qtr_date':Date}

first_pass = True
ticker_list = sp500['Symbol'].tolist()
for ticker in ticker_list:

    # Initialize the dataframe
    financial_df = pd.DataFrame(columns=['monthend_date', 'ticker', 'cash', 'st_debt', 'lt_debt', 'equity', \
                                     'revenue', 'ebit', 'net_income', 'basic_eps', 'diluted_eps', 'shares', 'qtr_date'])
    
    # Consolidated balance sheet and income statement - LTM
    url = "https://datafied.api.edgar-online.com/v2/corefinancials/ttm?Appkey=" + apiKey + "&fields=BalanceSheetConsolidated%2CIncomeStatementConsolidated&primarysymbols=" + ticker + "&numPeriods=80&activecompanies=false&deleted=false&sortby=primarysymbol%20asc"

    # Get the data from Edgar-Online for the given ticker
    # Wait for 1 second, per the documentation from Edgar-Online
    time.sleep(1)
    response = requests.get(url).json()

    # Find the number of quarters of data
    n_quarters = response["result"]["totalrows"]

    # Set the monthend dates array
    company_info = sp500.loc[sp500["Symbol"].isin([ticker])]

    # Quarter20 is the date of the most recent quarterly results. Force it to be the month end date. 
    # To do so, take the year and month, add one to the month, get the datevalue for the first of that month and
    # then subtract one day.  If the month is December, then the trick doesn't work (it does in Excel), so do it explicity.

    # Find the date of the most recent quarterly data
    temp_date = company_info.iloc[0]["Quarter"]
    if (temp_date.month == 12):
        current_quarter = datetime.datetime (temp_date.year,12,31)
    else:
        current_quarter = datetime.datetime (temp_date.year, temp_date.month + 1, 1) - relativedelta(days=1)

    # Based on the number of quarters, calculate the number of years and months to go back in time
    n_years = n_quarters // 4
    n_months = (n_quarters % 4) * 3
    
    # Set prior_quarter to the date "n_quarters" earlier.  To calculate it, add 1 day, subract n_years & n_months, then subtract 1 day
    prior_quarter = (((current_quarter + relativedelta(days=1)) - relativedelta(years=n_years)) -relativedelta(months=n_months)) - relativedelta(days=1)

    # Calculate each quarter's ending date
    quarterly_dates = []
    for i in range (n_quarters):
        # add 3 months to the prior_quarter date and save it in the list
        temp_year = prior_quarter.year
        temp_month = prior_quarter.month + 3
        if (temp_month == 12):
            new_quarter = datetime.datetime (temp_year, 12, 31)
        elif (temp_month > 12):
            temp_year = temp_year + 1
            temp_month = temp_month - 12
            new_quarter = datetime.datetime (temp_year, temp_month + 1, 1) - relativedelta(days=1)
        else:
            new_quarter = datetime.datetime (temp_year, temp_month + 1, 1) - relativedelta(days=1)
        quarterly_dates.append (new_quarter)
        prior_quarter = new_quarter

    for i in range(n_quarters):
    
        qtr_date = quarterly_dates[i]
        cash = 0
        st_debt = 0
        lt_debt = 0
        equity = 0
        revenue = 0
        ebit = 0
        net_income = 0
        basic_eps = 0
        diluted_eps = 0
        shares = 0

        for item in response["result"]["rows"][i]["values"]:
            item_name = item["field"]
            if (item_name == "cashandcashequivalents"):
                cash = item["value"]
            elif (item_name == "totalshorttermdebt"):
                st_debt = item["value"]
            elif (item_name == "totallongtermdebt"):
                lt_debt = item["value"]
            elif (item_name == "totalstockholdersequity"):
                equity = item["value"]
            elif (item_name == "totalrevenue"):
                revenue = item["value"]        
            elif (item_name == "ebit"):
                ebit = item["value"]
            elif (item_name == "netincomeapplicabletocommon"):
                net_income = item["value"]
            elif (item_name == "basicepsnetincome"):
                basic_eps = item["value"]
            elif (item_name == "dilutedepsnetincome"):
                diluted_eps = item["value"]
            if (basic_eps==0):
                shares = None
            else: 
                shares = net_income / basic_eps

        monthend_date = qtr_date
        for lag_months in (2,3,4):
            temp_year = qtr_date.year
            temp_month = qtr_date.month + lag_months
            if (temp_month == 12):
                monthend_date = datetime.datetime (temp_year, 12, 31)
            elif (temp_month > 12):
                temp_year = temp_year + 1
                temp_month = temp_month - 12
                monthend_date = datetime.datetime (temp_year, temp_month + 1, 1) - relativedelta(days=1)
            else:
                monthend_date = datetime.datetime (temp_year, temp_month + 1, 1) - relativedelta(days=1)
        
            new_row = {'monthend_date':monthend_date, 'ticker':ticker, 'cash':cash, 'st_debt':st_debt, 'lt_debt':lt_debt, 'equity':equity, \
                      'revenue':revenue, 'ebit':ebit, 'net_income':net_income, 'basic_eps':basic_eps, 'diluted_eps':diluted_eps, \
                      'shares':shares, 'qtr_date':qtr_date}
            financial_df = financial_df.append(new_row, ignore_index=True)
    
    if (first_pass):
        update_mode = 'replace'
        first_pass = False
    else:
        update_mode = 'append'
    financial_df.to_sql('findata', con=engine, dtype=columns,  if_exists=update_mode)

In [4]:
    engine.execute("SELECT ticker, monthend_date, qtr_date, basic_eps FROM findata WHERE ticker='XOM'").fetchall()

[('XOM', '2014-11-30', '2014-09-30', 7.95),
 ('XOM', '2014-12-31', '2014-09-30', 7.95),
 ('XOM', '2015-01-31', '2014-09-30', 7.95),
 ('XOM', '2015-02-28', '2014-12-31', 7.6),
 ('XOM', '2015-03-31', '2014-12-31', 7.6),
 ('XOM', '2015-04-30', '2014-12-31', 7.6),
 ('XOM', '2015-05-31', '2015-03-31', 6.67),
 ('XOM', '2015-06-30', '2015-03-31', 6.67),
 ('XOM', '2015-07-31', '2015-03-31', 6.67),
 ('XOM', '2015-08-31', '2015-06-30', 5.62),
 ('XOM', '2015-09-30', '2015-06-30', 5.62),
 ('XOM', '2015-10-31', '2015-06-30', 5.62),
 ('XOM', '2015-11-30', '2015-09-30', 4.74),
 ('XOM', '2015-12-31', '2015-09-30', 4.74),
 ('XOM', '2016-01-31', '2015-09-30', 4.74),
 ('XOM', '2016-02-29', '2015-12-31', 3.85),
 ('XOM', '2016-03-31', '2015-12-31', 3.85),
 ('XOM', '2016-04-30', '2015-12-31', 3.85),
 ('XOM', '2016-05-31', '2016-03-31', 3.11),
 ('XOM', '2016-06-30', '2016-03-31', 3.11),
 ('XOM', '2016-07-31', '2016-03-31', 3.11),
 ('XOM', '2016-08-31', '2016-06-30', 2.52),
 ('XOM', '2016-09-30', '2016-06-30'