In [3]:
import requests
import pandas as pd
import os
import numpy as np

from os import getenv
from dotenv import load_dotenv

load_dotenv()
av_api_key = getenv("ALPHA_VANTAGE_API_KEY")
louis_api_key = getenv("STLOUSID_FED_RESV_API_KEY")


# St. Louis Federal Reserve

Data from API is of the index, which do not have the same data structure as a stock.

A call with a finance professions (Nate's brother) told us that indices are not traded in the same way as stocks are.

A dataset that would resemble the stock data better would be ETFs.

eg. the SPY ETF is made from the S&P500 index
    the INDU ETF is made from the Dow jones industrial index

In [4]:
#alpha vantage does not have the stick data we need
louis_base_url = 'https://api.stlouisfed.org/fred/series/observations'

params = {
    'frequency' : 'd',
    'series_id' : 'Not a symbol', # stock ticker symbol
    'units' : 'lin',
    'observation_start' : '2022-06-01',
    'observation_end' : '2024-05-31',
    'order_by' : "observation_date",
    'sort_order' : "asc",
    'file_type' : 'json', #json also an option
    'api_key' : louis_api_key
}

# S&P500 (^GSPC) and Dow Jones (^DJI) stock symbols
# IMB to test

# Call with Jesse, Nate's bro who is a stock profile manager
# ETF will have better data than the indexes
# The indicies are not actively traded, WTFs build from those indicies are what we're looking for
# Dow - either DOW or INDU (ETFs)
# S&P - SPY spiders mini (1/10th of 1 S&P (it's an ETF so it'll probably have volumes unlike SP500))
# yahoo or google finance for API
symbols = ['SP500', 'DJIA']

In [5]:
responses = []
for symbol in symbols:
    #these 2 values need to be updated when the param dict is updated for a new api test.
    params['series_id'] = symbol
    av_response = requests.get(louis_base_url, params=params)

    if av_response == 200:
        print(f'API response sucessful: {symbol}')
    else:
        print(av_response.status_code)

    av_response_json = av_response.json()

    responses.append(av_response_json)

200
200


In [6]:
responses[0]

{'realtime_start': '2024-06-07',
 'realtime_end': '2024-06-07',
 'observation_start': '2022-06-01',
 'observation_end': '2024-05-31',
 'units': 'lin',
 'output_type': 1,
 'file_type': 'json',
 'order_by': 'observation_date',
 'sort_order': 'asc',
 'count': 523,
 'offset': 0,
 'limit': 100000,
 'observations': [{'realtime_start': '2024-06-07',
   'realtime_end': '2024-06-07',
   'date': '2022-06-01',
   'value': '4101.23'},
  {'realtime_start': '2024-06-07',
   'realtime_end': '2024-06-07',
   'date': '2022-06-02',
   'value': '4176.82'},
  {'realtime_start': '2024-06-07',
   'realtime_end': '2024-06-07',
   'date': '2022-06-03',
   'value': '4108.54'},
  {'realtime_start': '2024-06-07',
   'realtime_end': '2024-06-07',
   'date': '2022-06-06',
   'value': '4121.43'},
  {'realtime_start': '2024-06-07',
   'realtime_end': '2024-06-07',
   'date': '2022-06-07',
   'value': '4160.68'},
  {'realtime_start': '2024-06-07',
   'realtime_end': '2024-06-07',
   'date': '2022-06-08',
   'value': 

# Alpha Vantage ETFs of S&P 500 and Dow Jones Industrial

|symbol|name|reason|
|---|---|---|
|SPY|SSgA Active Trust - SPDR S&P 500 ETF Trust|S&P500 ETF for data similarity to kaggle data|
|DOW|Dow Inc|Dow industrial investment company|
|INDU| Dow Industrials| DOW Jones Industrial index 1/100 ETF|
|IBM|IBM corp.| Alpha Ventures example stock for API calls, to check if calls were valid|

In [7]:
#alpha vantage does not have the stick data we need
av_base_url = 'https://www.alphavantage.co/query'

params = {
    'function' : 'TIME_SERIES_DAILY',
    'symbol' : 'Not a symbol', # stock ticker symbol
    #outputsize : compact returns only the latest 100 data points; full returns the full-length time series of 20+ years of historical data. 
    'outputsize' : 'full',
    'datatype' : 'json', #json also an option
    'apikey' : av_api_key
}

# S&P500 (^GSPC) and Dow Jones (^DJI) stock symbols
# IMB to test
symbols = ['SPY','DOW', 'INDU'] #, 'IBM' used for testing

In [8]:
responses = []
for symbol in symbols:
    params['symbol'] = symbol
    av_response = requests.get(av_base_url, params=params)

    if av_response.status_code == 200:
        print(f'API response sucessful: {symbol}')
    else:
        print(av_response.status_code)

    av_response_json = av_response.json()

    responses.append(av_response_json)


API response sucessful: SPY
API response sucessful: DOW
API response sucessful: INDU


In [9]:

df = pd.DataFrame.from_dict(responses[0]['Time Series (Daily)']).transpose()
 
df.rename(columns={'1. open': 'open',
                    '2. high': 'high',
                    '3. low': 'low',
                    '4. close': 'close',
                    '5. volume': 'volume'}, 
                    inplace=True)

df.index = pd.to_datetime(df.index)

for column in ['open','high','low','close','volume']:
        df[column] = df[column].astype(np.double)
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 6190 entries, 2024-06-07 to 1999-11-01
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   open    6190 non-null   float64
 1   high    6190 non-null   float64
 2   low     6190 non-null   float64
 3   close   6190 non-null   float64
 4   volume  6190 non-null   float64
dtypes: float64(5)
memory usage: 290.2 KB


In [10]:

for response in responses:
    #
    meta_data = response['Meta Data']
    time_series = response['Time Series (Daily)']

    data = pd.DataFrame.from_dict(time_series).transpose()
    
    data['symbol'] = meta_data['2. Symbol']
    data.rename(columns={'1. open': 'open',
                        '2. high': 'high',
                        '3. low': 'low',
                        '4. close': 'close',
                        '5. volume': 'volume'},
                        inplace=True)
    
    data.index = pd.to_datetime(data.index)

    for column in ['open','high','low','close','volume']:
        df[column] = df[column].astype(np.double)

    path = f'./Resources/{meta_data['2. Symbol']}.csv'
    data.to_csv(path)
        


In [11]:
# kaggle dataset header
# symbol	date	open	high	low	close	volume	adjusted


In [12]:
df = pd.read_csv('Resources/DOW.csv')

In [13]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1315 entries, 0 to 1314
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Unnamed: 0  1315 non-null   object 
 1   open        1315 non-null   float64
 2   high        1315 non-null   float64
 3   low         1315 non-null   float64
 4   close       1315 non-null   float64
 5   volume      1315 non-null   int64  
 6   symbol      1315 non-null   object 
dtypes: float64(4), int64(1), object(2)
memory usage: 72.0+ KB


In [14]:
df.isnull().sum()

Unnamed: 0    0
open          0
high          0
low           0
close         0
volume        0
symbol        0
dtype: int64