# Automatic Data Collection

In [None]:
# required libaries
import requests
import json
from datetime import datetime, timedelta
import pytz

In [None]:
# advanced function to fetch data automatically during market hours
def fetch_ticker_data(ticker, start_date, token):
    base_url = f"https://api.tiingo.com/tiingo/daily/{ticker}/prices"
    params = {
        "startDate": start_date,
        "token": token
    }
    headers = {
        "Content-Type": "application/json"
    }
    response = requests.get(base_url, headers=headers, params=params)
    
    # check for errors
    if response.status_code != 200:
        print(f"Failed to fetch data for {ticker}: {response.status_code}")
        return None

    # save data to a json file
    try:
        data = response.json()
        with open(f"data/{ticker}.json", "w") as file:
            json.dump(data, file, indent=4)
        print(f"Data for {ticker} saved to data/{ticker}.json")
    except Exception as e:
        print(f"Error saving data for {ticker}: {e}")

# dictionary with specific start dates for each ticker
ticker_start_dates = {
    "aapl": "1980-12-12",
    "msft": "1986-03-13",
    "nvda": "1999-01-22",
    "tsla": "2010-06-29",
    "amzn": "1997-05-15",
    "googl": "2004-08-19",
    "meta": "2012-05-18",
    "nflx": "2002-05-23",
    "avgo": "2009-08-06",
    "pypl": "2015-07-20"
}

# API token
token = "xxxxxxxxxxxxxxx"

# time period configuration
start_time = datetime.now(pytz.timezone('US/Eastern')).replace(hour=9, minute=30, second=0, microsecond=0)
end_time = datetime.now(pytz.timezone('US/Eastern')).replace(hour=16, minute=0, second=0, microsecond=0)

# loop to fetch data during market hours
while True:
    current_time = datetime.now(pytz.timezone('US/Eastern'))

    # check if within the specified time period
    if start_time <= current_time <= end_time:
        # fetch data for each ticker
        for ticker, start_date in ticker_start_dates.items():
            fetch_ticker_data(ticker, start_date, token)

        # wait for 12 seconds before the next iteration (avoids hitting the api rate limit)
        time.sleep(12)
    else:
        # if outside market hours, wait until the next day
        print("Outside market hours. Waiting for the next market open...")
        time_to_wait = (start_time + timedelta(days=1) - current_time).total_seconds()
        time.sleep(max(time_to_wait, 0))
