# Query Target and Feature Data

> Using **Alpha Vantage** API to query all data and features needed.

Except for the target stock, we include following features:
- Value of the Dow Jones Index
- Value of the Nasdaq Index
- Value of the S&P 500 Index
- Price of the two-year Treasury bond
- Price of the five-year Treasury bond
- Price of the ten-year Treasury bond
- Price of gold
- Price of crude oil


In [1]:
import requests
from datetime import datetime, timedelta
import pandas as pd
import os

api_key = "GZTM1C89JRLUEWI4"

In [2]:
def get_15min_stock_data(symbol, month):
    """
    Retrieves 15min historical price data for a given stock symbol from Alpha Vantage.
    """
    
    url = f'https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY&symbol={symbol}&interval=15min&month={month}&outputsize=full&apikey={api_key}&extended_hours=false'
    response = requests.get(url)
    if response.status_code != 200:
        raise Exception("Failed to retrieve data from Alpha Vantage")
    
    price_data = response.json()['Time Series (15min)']

    return price_data

In [3]:
def get_historical_data_for_range(symbol, start_date, end_date):
    start = datetime.strptime(start_date, "%Y-%m-%d")
    end = datetime.strptime(end_date, "%Y-%m-%d")

    all_data_frames = []  # List to store individual DataFrames

    while start <= end:
        month = start.strftime("%Y-%m")
        price_data = get_15min_stock_data(symbol, month)

        # Create a DataFrame for this month
        df = pd.DataFrame(price_data).transpose()
        df['timestamp'] = df.index  # Store the timestamp in a column
        df['month'] = month  # Store the month in a column

        all_data_frames.append(df)
        start = start + timedelta(days=31)  # Increment to the next month (approximately)

    # Combine all DataFrames into one
    combined_df = pd.concat(all_data_frames)
    combined_df = combined_df.sort_values('timestamp')
    
    return combined_df

In [4]:
def get_and_save_symbol_data(symbol, start_date, end_date, directory):
    data = get_historical_data_for_range(symbol, start_date, end_date)
    if not os.path.exists(directory):
        os.makedirs(directory)
    file_path = os.path.join(directory, f'{symbol}.csv')
    data.to_csv(file_path, index=False)
    print(f"Data for {symbol} saved to {file_path}")

# Download Data

1. Define target stocks in `targets`, and select `start_date` and `end_date`.
2. Then download data in to `feature_path` and `targets_path` separately.

In [5]:
features = ['DIA', 'QQQ', 'IVV', 'SHY', 'IEI', 'IEF', 'GLD', 'USO']
targets = ["NVDA", "AAPL", "AMGN", "CSCO", "INTC", "MSFT", "HON", "WBA", "AXP", "BA", "CAT", "CRM", "CVX", "DIS", "DOW", "GS", "HD", "IBM", "JNJ", "JPM", "KO", "MCD", "MMM", "MRK", "NKE","PG", "TRV", "UNH", "V", "VZ", "WMT"]
start_date = '2020-06-01'
end_date = '2023-06-01'

feature_path = './data/features/'
targets_path = './data/targets/'

In [None]:
for symbol in features:
    get_and_save_symbol_data(symbol, start_date, end_date, feature_path)

In [6]:
for symbol in targets:
    get_and_save_symbol_data(symbol, start_date, end_date, targets_path)

Data for NVDA saved to ./data/targets/NVDA.csv
Data for AAPL saved to ./data/targets/AAPL.csv
Data for AMGN saved to ./data/targets/AMGN.csv
Data for CSCO saved to ./data/targets/CSCO.csv
Data for INTC saved to ./data/targets/INTC.csv
Data for MSFT saved to ./data/targets/MSFT.csv
Data for HON saved to ./data/targets/HON.csv
Data for WBA saved to ./data/targets/WBA.csv
Data for AXP saved to ./data/targets/AXP.csv
Data for BA saved to ./data/targets/BA.csv
Data for CAT saved to ./data/targets/CAT.csv
Data for CRM saved to ./data/targets/CRM.csv
Data for CVX saved to ./data/targets/CVX.csv
Data for DIS saved to ./data/targets/DIS.csv
Data for DOW saved to ./data/targets/DOW.csv
Data for GS saved to ./data/targets/GS.csv
Data for HD saved to ./data/targets/HD.csv
Data for IBM saved to ./data/targets/IBM.csv
Data for JNJ saved to ./data/targets/JNJ.csv
Data for JPM saved to ./data/targets/JPM.csv
Data for KO saved to ./data/targets/KO.csv
Data for MCD saved to ./data/targets/MCD.csv
Data f