In [114]:
import pandas as pd
import numpy as np

from alpha_vantage.timeseries import TimeSeries 

import os
from dotenv import load_dotenv
load_dotenv()  # take environment variables from .env

True

In [115]:
symbols = [
    "AAPL",
    "MSFT",
    "AMZN",
    "FB",
    "GOOG",
    "GOOGL",
    "TSLA",
    "NVDA",
    "PYPL",
    "ASML",
    "INTC",
    "CMCSA",
    "NFLX",
    "ADBE",
    "CSCO",
    "PEP",
    "AVGO",
    "TXN",
    "TMUS",
]

In [116]:
slices = [
    "year1month1",
    "year1month2",
    "year1month3",
    "year1month4",
    "year1month5",
    "year1month6",
    "year1month7",
    "year1month8",
    "year1month9",
    "year1month10",
    "year1month11",
    "year1month12",
    "year2month1",
    "year2month2",
    "year2month3",
    "year2month4",
    "year2month5",
    "year2month6",
    "year2month7",
    "year2month8",
    "year2month9",
    "year2month10",
    "year2month11",
    "year2month12",
]

### Configuration

In [117]:
config = {
    "alpha_vantage": {
        "key": os.environ.get("API_KEY"), # Claim your free API key here: https://www.alphavantage.co/support/#api-key
        "symbol": None,
        "interval": "30min",
        "slice": None,
        "adjusted": True
    },
}

In [118]:
ts = TimeSeries(key=config["alpha_vantage"]["key"], output_format='csv')


In [119]:
data = ts.get_intraday_extended(symbol="AAPL", interval="30min")

In [120]:
# csv to dataframe
df_test = pd.DataFrame(list(data[0]))

# define header row
header_row = 0
df_test.columns = df_test.iloc[header_row]
df_test = df_test.drop(header_row)
df_test.set_index("time", inplace=True)

In [121]:
df_test.head()

Unnamed: 0_level_0,open,high,low,close,volume
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-03-04 20:00:00,163.05,163.05,162.91,162.97,17754
2022-03-04 19:30:00,163.05,163.05,162.96,163.05,30836
2022-03-04 19:00:00,163.08,163.16,163.0,163.05,25122
2022-03-04 18:30:00,163.05,163.08,163.01,163.06,6195
2022-03-04 18:00:00,163.2,163.2,163.01,163.05,9391


In [122]:
def download_data(config, symbol, time_slice):
    ts = TimeSeries(key=config["alpha_vantage"]["key"], output_format='csv')
    data = ts.get_intraday_extended(symbol=symbol, slice=time_slice, interval=config["alpha_vantage"]["interval"])

    #csv --> dataframe
    df = pd.DataFrame(list(data[0]))

    #setup of column and index
    header_row = 0
    df.columns = df.iloc[header_row]
    df = df.drop(header_row)
    df.set_index('time', inplace=True)

    num_data_points = df.shape[0]
    print(f"{symbol} - {time_slice}: number data points", num_data_points)

    return df

In [123]:
interval = config["alpha_vantage"]["interval"]

for symbol in symbols:
    
    # initialize empty dataframe for each symbol
    df_all_slices = pd.DataFrame()
    
    # loop for all time slices
    for time_slice in slices:
        print(symbol, time_slice)
        df = download_data(config, symbol, time_slice)
        df_all_slices = pd.concat([df_all_slices, df])
    
    # write combined symbol data to csv
    df_all_slices.to_csv(f"./data/{symbol}_intraday_extended_{interval}.csv")


AAPL year1month1
AAPL - year1month1: number data points 672
AAPL year1month2
AAPL - year1month2: number data points 672
AAPL year1month3
AAPL - year1month3: number data points 639
AAPL year1month4
AAPL - year1month4: number data points 629
AAPL year1month5
AAPL - year1month5: number data points 704
AAPL year1month6
AAPL - year1month6: number data points 672
AAPL year1month7
AAPL - year1month7: number data points 640
AAPL year1month8
AAPL - year1month8: number data points 704
AAPL year1month9
AAPL - year1month9: number data points 672
AAPL year1month10
AAPL - year1month10: number data points 640
AAPL year1month11
AAPL - year1month11: number data points 672
AAPL year1month12
AAPL - year1month12: number data points 672
AAPL year2month1
AAPL - year2month1: number data points 672
AAPL year2month2
AAPL - year2month2: number data points 608
AAPL year2month3
AAPL - year2month3: number data points 629
AAPL year2month4
AAPL - year2month4: number data points 661
AAPL year2month5
AAPL - year2month

In [124]:
df_all_slices.tail()

Unnamed: 0_level_0,open,high,low,close,volume
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-03-16 10:00:00,78.34,78.34,74.84,76.88,310740
2020-03-16 09:30:00,76.12,79.24,76.12,77.21,1373
2020-03-16 09:00:00,77.01,77.02,77.0,77.0,744
2020-03-16 08:30:00,76.7,77.01,76.7,77.01,1338
2020-03-16 08:00:00,79.98,79.98,78.36,78.36,531
