# Stock Market Database

In [1]:
import numpy as np
import pandas as pd
import json
from datetime import datetime
import requests

import matplotlib.pyplot as plt
import seaborn as sns

import os
import warnings

import src
from env import key
warnings.filterwarnings("ignore")

# Acquire

In [2]:
# Create a global variable to hold the apikey path
API_KEY_PATH = '&apikey={key}'

In [3]:
def base_url():
    '''
    Base url to acquire financial data.
    
    Parameters
    ----------
    None
    
    Returns
    -------
    url : str
        URL to acquire data using alphavantage REST API.
    '''
    url = 'https://www.alphavantage.co/query?'
    return url

In [4]:
base_url()

'https://www.alphavantage.co/query?'

In [5]:
def response_endpoint(data='INCOME_STATEMENT', ticker='IBM'):
    '''
    Endpoint to a acquire data using alphavantage REST API.
    
    Returns 
    Parameters
    ----------
    data : str, default 'INCOME_STATEMENT'

    ticker : str, default 'IBM'
        
    Returns
    -------
    get_request : requests.models.Response object
        Response object with data from a specified endpoint.
    '''
    global API_KEY_PATH

    function = f'function={data}'
    symbol = f'&symbol={ticker}'
    file_type = '&datatype=csv'

    endpoint = function + symbol + API_KEY_PATH + file_type

    get_request = requests.get(base_url() + endpoint)
    return get_request


In [6]:
data = response_endpoint()
data.status_code

200

In [7]:
report = data.json()
report.keys()

dict_keys(['symbol', 'annualReports', 'quarterlyReports'])

In [8]:
report.get('symbol')

'IBM'

In [9]:
annual_data = report.get('annualReports')
quarterly_data = report.get('quarterlyReports')

df_annual = pd.DataFrame(annual_data)
df_quarterly = pd.DataFrame(quarterly_data)

df_annual = df_annual.sort_values('fiscalDateEnding').reset_index(drop=True)
df_quarterly = df_quarterly.sort_values('fiscalDateEnding').reset_index(drop=True)

In [10]:
df_annual.head()

Unnamed: 0,fiscalDateEnding,reportedCurrency,grossProfit,totalRevenue,costOfRevenue,costofGoodsAndServicesSold,operatingIncome,sellingGeneralAndAdministrative,researchAndDevelopment,operatingExpenses,...,depreciation,depreciationAndAmortization,incomeBeforeTax,incomeTaxExpense,interestAndDebtExpense,netIncomeFromContinuingOperations,comprehensiveIncomeNetOfTax,ebit,ebitda,netIncome
0,2016-12-31,USD,38516000000,79919000000,41403000000,40580000000,10377000000,20869000000,5726000000,28139000000,...,2837000000,1544000000,12321000000,449000000,630000000,11881000000,12081000000,10377000000,11921000000,11872000000
1,2017-12-31,USD,36943000000,79139000000,42196000000,41703000000,10153000000,19680000000,5590000000,26790000000,...,3021000000,1520000000,11395000000,5642000000,615000000,5758000000,8559000000,10153000000,11673000000,5753000000
2,2018-12-31,USD,36936000000,79591000000,42655000000,42655000000,10838000000,19366000000,5379000000,26098000000,...,3127000000,1353000000,11347000000,2619000000,723000000,8723000000,8252000000,10838000000,12191000000,8728000000
3,2019-12-31,USD,36488000000,77147000000,40659000000,591000000,9004000000,20604000000,5989000000,27484000000,...,4209000000,1850000000,10162000000,731000000,1344000000,9435000000,10324000000,9004000000,10854000000,9431000000
4,2020-12-31,USD,35575000000,73620000000,38046000000,439000000,4609000000,23082000000,6333000000,30966000000,...,4227000000,2468000000,4726000000,-864000000,1288000000,5501000000,4850000000,4609000000,7077000000,5590000000


In [11]:
df_annual.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 26 columns):
 #   Column                             Non-Null Count  Dtype 
---  ------                             --------------  ----- 
 0   fiscalDateEnding                   5 non-null      object
 1   reportedCurrency                   5 non-null      object
 2   grossProfit                        5 non-null      object
 3   totalRevenue                       5 non-null      object
 4   costOfRevenue                      5 non-null      object
 5   costofGoodsAndServicesSold         5 non-null      object
 6   operatingIncome                    5 non-null      object
 7   sellingGeneralAndAdministrative    5 non-null      object
 8   researchAndDevelopment             5 non-null      object
 9   operatingExpenses                  5 non-null      object
 10  investmentIncomeNet                5 non-null      object
 11  netInterestIncome                  5 non-null      object
 12  interestInco

In [12]:
# Acquire Function
src.get_financial_data()

Unnamed: 0,fiscalDateEnding,reportedCurrency,grossProfit,totalRevenue,costOfRevenue,costofGoodsAndServicesSold,operatingIncome,sellingGeneralAndAdministrative,researchAndDevelopment,operatingExpenses,...,depreciation,depreciationAndAmortization,incomeBeforeTax,incomeTaxExpense,interestAndDebtExpense,netIncomeFromContinuingOperations,comprehensiveIncomeNetOfTax,ebit,ebitda,netIncome
0,2016-06-30,USD,9702000000,20238000000,10536000000,10536000000,2490000000,5349000000,1465000000,7212000000,...,705000000,398000000,3048000000,544000000,167000000,2505000000,2899000000,2490000000,2888000000,2504000000
1,2016-09-30,USD,9013000000,19226000000,10213000000,10213000000,2515000000,4732000000,1397000000,6498000000,...,724000000,403000000,3262000000,409000000,158000000,2854000000,3293000000,2515000000,2918000000,2853000000
2,2016-12-31,USD,11115000000,21770000000,10655000000,10877000000,4755000000,4776000000,1406000000,6360000000,...,731000000,396000000,4981000000,480000000,157000000,4506000000,3268000000,4755000000,5151000000,4501000000
3,2017-03-31,USD,7944000000,18155000000,10211000000,10383000000,1043000000,5027000000,1484000000,6901000000,...,709000000,390000000,1421000000,-329000000,135000000,1753000000,2438000000,1043000000,1433000000,1750000000
4,2017-06-30,USD,8968000000,19289000000,10321000000,10495000000,2111000000,5033000000,1436000000,6857000000,...,730000000,388000000,2442000000,111000000,147000000,2332000000,2852000000,2111000000,2499000000,2331000000
5,2017-09-30,USD,8981000000,19153000000,10172000000,10353000000,2701000000,4606000000,1291000000,6281000000,...,792000000,384000000,3065000000,339000000,168000000,2726000000,3394000000,2701000000,3085000000,2726000000
6,2017-12-31,USD,11049000000,22542000000,11493000000,11681000000,4298000000,5014000000,1378000000,6751000000,...,790000000,359000000,4468000000,5522000000,164000000,-1053000000,-125000000,4298000000,4657000000,-1054000000
7,2018-03-31,USD,8247000000,19072000000,10825000000,10825000000,1057000000,5445000000,1405000000,7190000000,...,774000000,340000000,1139000000,-540000000,165000000,1675000000,2109000000,1057000000,1397000000,1679000000
8,2018-06-30,USD,9199000000,20003000000,10804000000,10804000000,2636000000,4857000000,1364000000,6563000000,...,773000000,342000000,2777000000,373000000,173000000,2402000000,2697000000,2636000000,2978000000,2404000000
9,2018-09-30,USD,8803000000,18756000000,9953000000,9953000000,2841000000,4363000000,1252000000,5963000000,...,790000000,348000000,2998000000,304000000,192000000,2692000000,3164000000,2841000000,3189000000,2694000000


In [16]:
src.get_financial_data(report_name='BALANCE_SHEET')

Unnamed: 0,fiscalDateEnding,reportedCurrency,totalAssets,totalCurrentAssets,cashAndCashEquivalentsAtCarryingValue,cashAndShortTermInvestments,inventory,currentNetReceivables,totalNonCurrentAssets,propertyPlantEquipment,...,currentLongTermDebt,longTermDebtNoncurrent,shortLongTermDebtTotal,otherCurrentLiabilities,otherNonCurrentLiabilities,totalShareholderEquity,treasuryStock,retainedEarnings,commonStock,commonStockSharesOutstanding
0,2016-06-30,USD,118056000000,43524000000,10017000000,10017000000,1685000000,26547000000.0,64926000000,11092000000,...,4586000000,39638000000,44224000000,11508000000,8385000000,15733000000,157298000000,148071000000,53565000000,957400000
1,2016-09-30,USD,115606000000,41433000000,9039000000,9039000000,1729000000,25196000000.0,64851000000,11104000000,...,6561000000,35563000000,42124000000,10815000000,8138000000,17010000000,158170000000,149585000000,53759000000,954000000
2,2016-12-31,USD,117470000000,43888000000,7826000000,8527000000,1553000000,29245000000.0,36199000000,10830000000,...,6239000000,34655000000,40893000000,11035000000,7477000000,18246000000,159050000000,152759000000,53935000000,945867403
3,2017-03-31,USD,117495000000,42889000000,10096000000,10695000000,1609000000,25869000000.0,63625000000,10865000000,...,8055000000,34441000000,42498000000,12351000000,7601000000,18327000000,160359000000,153292000000,54104000000,942400000
4,2017-06-30,USD,120495000000,45013000000,11696000000,12295000000,1604000000,26958000000.0,63942000000,10903000000,...,7834000000,37612000000,45446000000,11541000000,7437000000,18419000000,161860000000,154234000000,54235000000,934900000
5,2017-09-30,USD,121636000000,44742000000,10915000000,11515000000,1711000000,27126000000.0,64832000000,11057000000,...,3892000000,41327000000,45218000000,10649000000,7723000000,19627000000,162812000000,155565000000,54395000000,929437441
6,2017-12-31,USD,125356000000,49735000000,11972000000,12580000000,1583000000,,36788000000,11116000000,...,5214000000,39837000000,45052000000,11552000000,9965000000,17594000000,163507000000,153126000000,54566000000,922179225
7,2018-03-31,USD,125285000000,49122000000,11949000000,12842000000,1753000000,29229000000.0,65516000000,11278000000,...,3758000000,40410000000,44168000000,13059000000,10250000000,18166000000,164334000000,156371000000,54712000000,920680222
8,2018-06-30,USD,121622000000,46795000000,10741000000,11707000000,1742000000,28340000000.0,67621000000,11024000000,...,5447000000,37851000000,43298000000,11752000000,10000000000,18520000000,165366000000,157349000000,54827000000,915064434
9,2018-09-30,USD,121990000000,48257000000,11563000000,14495000000,1893000000,27087000000.0,67024000000,10949000000,...,6773000000,35989000000,42762000000,10704000000,9979000000,19784000000,165995000000,158612000000,54987000000,911152848


In [17]:
src.get_financial_data(report_name='CASH_FLOW')

Unnamed: 0,fiscalDateEnding,reportedCurrency,operatingCashflow,paymentsForOperatingActivities,proceedsFromOperatingActivities,changeInOperatingLiabilities,changeInOperatingAssets,depreciationDepletionAndAmortization,capitalExpenditures,changeInReceivables,...,dividendPayoutCommonStock,dividendPayoutPreferredStock,proceedsFromIssuanceOfCommonStock,proceedsFromIssuanceOfLongTermDebtAndCapitalSecuritiesNet,proceedsFromIssuanceOfPreferredStock,proceedsFromRepurchaseOfEquity,proceedsFromSaleOfTreasuryStock,changeInCashAndCashEquivalents,changeInExchangeRate,netIncome
0,2016-06-30,USD,3458000000,,,,,398000000,870000000,,...,1340000000,,,178000000,,-780000000,,-4338000000,-103000000.0,2504000000
1,2016-09-30,USD,4117000000,,,,,1275000000,768000000,,...,1337000000,,,105000000,,-806000000,,-978000000,41000000.0,2853000000
2,2016-12-31,USD,3979000000,,,,,695000000,973000000,712000000.0,...,1329000000,,,764000000,,-1036000000,,-1212000000,-206000000.0,4501000000
3,2017-03-31,USD,3955000000,,,,,1099000000,740000000,,...,1321000000,,,2887000000,,-1293000000,,2270000000,100000000.0,1750000000
4,2017-06-30,USD,3466000000,,,,,388000000,685000000,,...,1403000000,,,2948000000,,-1432000000,,1600000000,447000000.0,2331000000
5,2017-09-30,USD,3570000000,,,,,1176000000,848000000,,...,1395000000,,,3520000000,,-949000000,,-781000000,328000000.0,2726000000
6,2017-12-31,USD,5733000000,,,,,1149000000,956000000,1297000000.0,...,1387000000,,,288000000,,-666000000,,1057000000,62000000.0,-1054000000
7,2018-03-31,USD,4602000000,,,,,1114000000,870000000,,...,1382000000,,,2170000000,,-777000000,,-71000000,,1679000000
8,2018-06-30,USD,2294000000,,,,,342000000,931000000,,...,1437000000,,,336000000,,-990000000,,2404000000,,2404000000
9,2018-09-30,USD,4232000000,,,,,1138000000,812000000,,...,1431000000,,,2208000000,,-626000000,,827000000,,2694000000


# Prepare

In [13]:
numeric_cols = df_annual.columns[2:]
date_col = df_annual.columns[0]

df_annual[numeric_cols] = df_annual.loc[:, numeric_cols].apply(pd.to_numeric, downcast='integer', errors='coerce')
df_quarterly[numeric_cols] = df_quarterly.loc[:, numeric_cols].apply(pd.to_numeric, downcast='integer', errors='coerce')

df_annual[date_col] = pd.to_datetime(df_annual.loc[:,date_col])
df_quarterly[date_col] = pd.to_datetime(df_quarterly.loc[:,date_col])

In [14]:
df_annual.insert(0, 'ticker', report.get('symbol'))
df_quarterly.insert(0, 'ticker', report.get('symbol'))

In [15]:
df_annual.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 27 columns):
 #   Column                             Non-Null Count  Dtype         
---  ------                             --------------  -----         
 0   ticker                             5 non-null      object        
 1   fiscalDateEnding                   5 non-null      datetime64[ns]
 2   reportedCurrency                   5 non-null      object        
 3   grossProfit                        5 non-null      int64         
 4   totalRevenue                       5 non-null      int64         
 5   costOfRevenue                      5 non-null      int64         
 6   costofGoodsAndServicesSold         5 non-null      int64         
 7   operatingIncome                    5 non-null      int64         
 8   sellingGeneralAndAdministrative    5 non-null      int64         
 9   researchAndDevelopment             5 non-null      int64         
 10  operatingExpenses                  5 non-n