# Stock Market Database

In [1]:
import numpy as np
import pandas as pd
import json
from datetime import datetime
import requests

import matplotlib.pyplot as plt
import seaborn as sns

import os
import warnings

import src
from env import key
warnings.filterwarnings("ignore")

# Acquire

In [2]:
# Create a global variable to hold the apikey path
API_KEY_PATH = '&apikey={key}'

In [3]:
def base_url():
    '''
    Base url to acquire financial data.
    
    Parameters
    ----------
    None
    
    Returns
    -------
    url : str
        URL to acquire data using alphavantage REST API.
    '''
    url = 'https://www.alphavantage.co/query?'
    return url

In [4]:
base_url()

'https://www.alphavantage.co/query?'

In [5]:
def response_endpoint(data='INCOME_STATEMENT', ticker='IBM'):
    '''
    Endpoint to a acquire data using alphavantage REST API.
    
    Returns 
    Parameters
    ----------
    data : str, default 'INCOME_STATEMENT'

    ticker : str, default 'IBM'
        
    Returns
    -------
    get_request : requests.models.Response object
        Response object with data from a specified endpoint.
    '''
    global API_KEY_PATH

    function = f'function={data}'
    symbol = f'&symbol={ticker}'
    file_type = '&datatype=csv'

    endpoint = function + symbol + API_KEY_PATH + file_type

    get_request = requests.get(base_url() + endpoint)
    return get_request


In [6]:
data = response_endpoint()
data.status_code

200

In [7]:
report = data.json()
report.keys()

dict_keys(['symbol', 'annualReports', 'quarterlyReports'])

In [8]:
report.get('symbol')

'IBM'

In [9]:
quarterly_data = report.get('quarterlyReports')

df_quarterly = pd.DataFrame(quarterly_data)

df_quarterly = df_quarterly.sort_values('fiscalDateEnding').reset_index(drop=True)

In [10]:
df_quarterly.head()

Unnamed: 0,fiscalDateEnding,reportedCurrency,grossProfit,totalRevenue,costOfRevenue,costofGoodsAndServicesSold,operatingIncome,sellingGeneralAndAdministrative,researchAndDevelopment,operatingExpenses,...,depreciation,depreciationAndAmortization,incomeBeforeTax,incomeTaxExpense,interestAndDebtExpense,netIncomeFromContinuingOperations,comprehensiveIncomeNetOfTax,ebit,ebitda,netIncome
0,2016-06-30,USD,9702000000,20238000000,10536000000,10536000000,2490000000,5349000000,1465000000,7212000000,...,705000000,398000000,3048000000,544000000,167000000,2505000000,2899000000,2490000000,2888000000,2504000000
1,2016-09-30,USD,9013000000,19226000000,10213000000,10213000000,2515000000,4732000000,1397000000,6498000000,...,724000000,403000000,3262000000,409000000,158000000,2854000000,3293000000,2515000000,2918000000,2853000000
2,2016-12-31,USD,11115000000,21770000000,10655000000,10877000000,4755000000,4776000000,1406000000,6360000000,...,731000000,396000000,4981000000,480000000,157000000,4506000000,3268000000,4755000000,5151000000,4501000000
3,2017-03-31,USD,7944000000,18155000000,10211000000,10383000000,1043000000,5027000000,1484000000,6901000000,...,709000000,390000000,1421000000,-329000000,135000000,1753000000,2438000000,1043000000,1433000000,1750000000
4,2017-06-30,USD,8968000000,19289000000,10321000000,10495000000,2111000000,5033000000,1436000000,6857000000,...,730000000,388000000,2442000000,111000000,147000000,2332000000,2852000000,2111000000,2499000000,2331000000


In [11]:
df_quarterly.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 26 columns):
 #   Column                             Non-Null Count  Dtype 
---  ------                             --------------  ----- 
 0   fiscalDateEnding                   20 non-null     object
 1   reportedCurrency                   20 non-null     object
 2   grossProfit                        20 non-null     object
 3   totalRevenue                       20 non-null     object
 4   costOfRevenue                      20 non-null     object
 5   costofGoodsAndServicesSold         20 non-null     object
 6   operatingIncome                    20 non-null     object
 7   sellingGeneralAndAdministrative    20 non-null     object
 8   researchAndDevelopment             20 non-null     object
 9   operatingExpenses                  20 non-null     object
 10  investmentIncomeNet                20 non-null     object
 11  netInterestIncome                  20 non-null     object
 12  interestIn

In [12]:
df_quarterly.insert(0, 'ticker', report.get('symbol'))

## Acquire Function

In [13]:
df = src.get_financial_data()
df.head()

Unnamed: 0,ticker,fiscalDateEnding,grossProfit,totalRevenue,costOfRevenue,costofGoodsAndServicesSold,operatingIncome,sellingGeneralAndAdministrative,researchAndDevelopment,operatingExpenses,...,depreciation,depreciationAndAmortization,incomeBeforeTax,incomeTaxExpense,interestAndDebtExpense,netIncomeFromContinuingOperations,comprehensiveIncomeNetOfTax,ebit,ebitda,netIncome
0,IBM,2016-06-30,9702000000.0,20238000000.0,10536000000.0,10536000000.0,2490000000.0,5349000000.0,1465000000.0,7212000000.0,...,705000000.0,398000000.0,3048000000.0,544000000.0,167000000.0,2505000000.0,2899000000.0,2490000000.0,2888000000.0,2504000000.0
1,IBM,2016-09-30,9013000000.0,19226000000.0,10213000000.0,10213000000.0,2515000000.0,4732000000.0,1397000000.0,6498000000.0,...,724000000.0,403000000.0,3262000000.0,409000000.0,158000000.0,2854000000.0,3293000000.0,2515000000.0,2918000000.0,2853000000.0
2,IBM,2016-12-31,11115000000.0,21770000000.0,10655000000.0,10877000000.0,4755000000.0,4776000000.0,1406000000.0,6360000000.0,...,731000000.0,396000000.0,4981000000.0,480000000.0,157000000.0,4506000000.0,3268000000.0,4755000000.0,5151000000.0,4501000000.0
3,IBM,2017-03-31,7944000000.0,18155000000.0,10211000000.0,10383000000.0,1043000000.0,5027000000.0,1484000000.0,6901000000.0,...,709000000.0,390000000.0,1421000000.0,-329000000.0,135000000.0,1753000000.0,2438000000.0,1043000000.0,1433000000.0,1750000000.0
4,IBM,2017-06-30,8967999000.0,19289000000.0,10321000000.0,10495000000.0,2111000000.0,5033000000.0,1436000000.0,6857000000.0,...,730000000.0,388000000.0,2442000000.0,111000000.0,147000000.0,2332000000.0,2852000000.0,2111000000.0,2499000000.0,2331000000.0


In [14]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 26 columns):
 #   Column                             Non-Null Count  Dtype         
---  ------                             --------------  -----         
 0   ticker                             20 non-null     object        
 1   fiscalDateEnding                   20 non-null     datetime64[ns]
 2   grossProfit                        20 non-null     float32       
 3   totalRevenue                       20 non-null     float32       
 4   costOfRevenue                      20 non-null     float32       
 5   costofGoodsAndServicesSold         20 non-null     float32       
 6   operatingIncome                    20 non-null     float32       
 7   sellingGeneralAndAdministrative    20 non-null     float32       
 8   researchAndDevelopment             20 non-null     float32       
 9   operatingExpenses                  20 non-null     float32       
 10  investmentIncomeNet                20 no

In [15]:
df.fillna(0, inplace=True)

# Prepare

In [16]:
df = df.replace(r'None', '0', regex=True)

date_col = df.columns[1]
numeric_cols = df.columns[2:]

df[date_col] = pd.to_datetime(df.loc[:,date_col])
df[numeric_cols] = df.loc[:, numeric_cols].apply(pd.to_numeric,
                                                 downcast='float',
                                                 errors='coerce')

In [17]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 26 columns):
 #   Column                             Non-Null Count  Dtype         
---  ------                             --------------  -----         
 0   ticker                             20 non-null     object        
 1   fiscalDateEnding                   20 non-null     datetime64[ns]
 2   grossProfit                        20 non-null     float32       
 3   totalRevenue                       20 non-null     float32       
 4   costOfRevenue                      20 non-null     float32       
 5   costofGoodsAndServicesSold         20 non-null     float32       
 6   operatingIncome                    20 non-null     float32       
 7   sellingGeneralAndAdministrative    20 non-null     float32       
 8   researchAndDevelopment             20 non-null     float32       
 9   operatingExpenses                  20 non-null     float32       
 10  investmentIncomeNet                20 no

In [18]:
df.head()

Unnamed: 0,ticker,fiscalDateEnding,grossProfit,totalRevenue,costOfRevenue,costofGoodsAndServicesSold,operatingIncome,sellingGeneralAndAdministrative,researchAndDevelopment,operatingExpenses,...,depreciation,depreciationAndAmortization,incomeBeforeTax,incomeTaxExpense,interestAndDebtExpense,netIncomeFromContinuingOperations,comprehensiveIncomeNetOfTax,ebit,ebitda,netIncome
0,IBM,2016-06-30,9702000000.0,20238000000.0,10536000000.0,10536000000.0,2490000000.0,5349000000.0,1465000000.0,7212000000.0,...,705000000.0,398000000.0,3048000000.0,544000000.0,167000000.0,2505000000.0,2899000000.0,2490000000.0,2888000000.0,2504000000.0
1,IBM,2016-09-30,9013000000.0,19226000000.0,10213000000.0,10213000000.0,2515000000.0,4732000000.0,1397000000.0,6498000000.0,...,724000000.0,403000000.0,3262000000.0,409000000.0,158000000.0,2854000000.0,3293000000.0,2515000000.0,2918000000.0,2853000000.0
2,IBM,2016-12-31,11115000000.0,21770000000.0,10655000000.0,10877000000.0,4755000000.0,4776000000.0,1406000000.0,6360000000.0,...,731000000.0,396000000.0,4981000000.0,480000000.0,157000000.0,4506000000.0,3268000000.0,4755000000.0,5151000000.0,4501000000.0
3,IBM,2017-03-31,7944000000.0,18155000000.0,10211000000.0,10383000000.0,1043000000.0,5027000000.0,1484000000.0,6901000000.0,...,709000000.0,390000000.0,1421000000.0,-329000000.0,135000000.0,1753000000.0,2438000000.0,1043000000.0,1433000000.0,1750000000.0
4,IBM,2017-06-30,8967999000.0,19289000000.0,10321000000.0,10495000000.0,2111000000.0,5033000000.0,1436000000.0,6857000000.0,...,730000000.0,388000000.0,2442000000.0,111000000.0,147000000.0,2332000000.0,2852000000.0,2111000000.0,2499000000.0,2331000000.0


## Prepare Function

In [19]:
def prep_financial_report(df):
    '''
    
    '''
    date_col = df.columns[1]
    numeric_cols = df.columns[2:]

    df[date_col] = pd.to_datetime(df.loc[:,date_col])
    df[numeric_cols] = df.loc[:, numeric_cols].apply(pd.to_numeric,
                                                     downcast='float',
                                                     errors='coerce')
    df.fillna(0, inplace=True)
    
    return df

In [23]:
df_acquire = src.get_financial_data()
df_prep = prep_financial_report(df_acquire)

In [24]:
df_prep.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 26 columns):
 #   Column                             Non-Null Count  Dtype         
---  ------                             --------------  -----         
 0   ticker                             20 non-null     string        
 1   fiscalDateEnding                   20 non-null     datetime64[ns]
 2   grossProfit                        20 non-null     float32       
 3   totalRevenue                       20 non-null     float32       
 4   costOfRevenue                      20 non-null     float32       
 5   costofGoodsAndServicesSold         20 non-null     float32       
 6   operatingIncome                    20 non-null     float32       
 7   sellingGeneralAndAdministrative    20 non-null     float32       
 8   researchAndDevelopment             20 non-null     float32       
 9   operatingExpenses                  20 non-null     float32       
 10  investmentIncomeNet                20 no