In [15]:
import requests
import pandas as pd
# from datetime import datetime, timedelta, date
import datetime
import time
from polygon import RESTClient
import logging
import signal
import sys
import pickle
import lz4.frame  # type: ignore
import concurrent.futures
import os

In [None]:
# Define the API details
api_key = 'PfZz8S38d1PO4heiApoM7SGLFohMi3KE'

# Set up logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(message)s")

# Define the path where the files will be saved
save_path = "C:\\Users\\SamuliMustonen\\Documents\\Ready Solutions\\Docs\\StockTrading\\Data\\rawAggs"

# Create the directory if it doesn't exist
os.makedirs(save_path, exist_ok=True)

# Signal handler for graceful shutdown
def signal_handler(sig, frame):
    print("You pressed Ctrl+C!")
    sys.exit(0)

signal.signal(signal.SIGINT, signal_handler)

# Function to retrieve and save aggregate data for a given ticker over a date range
def get_aggs_for_ticker(ticker, start_date, end_date):
    """Retrieve aggregates for a given ticker and date range"""
    client = RESTClient(api_key=api_key)  # Initialize client

    aggs = []
    for day in weekdays_between(start_date, end_date):
        for a in client.list_aggs(ticker, 1, "day", day, day, limit=50000):
            aggs.append(a)

    # Save the data to a compressed .pickle.lz4 file
    filename = os.path.join(save_path, f"{ticker}-aggs-{start_date}_to_{end_date}.pickle.lz4")
    with open(filename, "wb") as file:
        try:
            compressed_data = lz4.frame.compress(pickle.dumps(aggs))
            file.write(compressed_data)
        except TypeError as e:
            logging.error(f"Serialization Error for {ticker}: {e}")

    logging.info(f"Downloaded aggs for {ticker} from {start_date} to {end_date} and saved to {filename}")

# Function to generate all weekdays between two dates
def weekdays_between(start_date, end_date):
    """Generate all weekdays between start_date and end_date"""
    day = start_date
    while day <= end_date:
        if day.weekday() < 5:  # Only Monday to Friday
            yield day
        day += timedelta(days=1)

def main():
    start_date = datetime.date(2022, 1, 1)  # Start date: Beginning of 2022
    end_date = datetime.date(2024, 9, 30)   # End date: 2024-09-30

    # Load tickers from CSV file (assumes the CSV file has columns 'Ticker' and 'Market-Cap')
    csv_file_path = "C:\\Users\\SamuliMustonen\\Documents\\Ready Solutions\\Docs\\StockTrading\\Data\\Tickers\\tickers_500m_to_10b.csv"  # Update this path with your actual file location
    df_tickers = pd.read_csv(csv_file_path, delimiter=',', header=0)

    # Extract tickers from the 'Ticker' column
    symbols = df_tickers['Ticker'].tolist()

    # Use ThreadPoolExecutor to download data for each ticker in parallel
    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
        futures = [executor.submit(get_aggs_for_ticker, symbol, start_date, end_date) for symbol in symbols]
        
        # Optional: to make sure each task completes
        for future in concurrent.futures.as_completed(futures):
            try:
                future.result()  # This will raise any exceptions that occurred during execution
            except Exception as e:
                logging.error(f"Error fetching data: {e}")

if __name__ == "__main__":
    main()

2024-10-18 15:17:45,320 - Downloaded aggs for AACT from 2022-01-01 to 2024-09-30 and saved to C:\Users\SamuliMustonen\Documents\Ready Solutions\Docs\StockTrading\Data\rawAggs\AACT-aggs-2022-01-01_to_2024-09-30.pickle.lz4
2024-10-18 15:17:46,969 - Downloaded aggs for ABCL from 2022-01-01 to 2024-09-30 and saved to C:\Users\SamuliMustonen\Documents\Ready Solutions\Docs\StockTrading\Data\rawAggs\ABCL-aggs-2022-01-01_to_2024-09-30.pickle.lz4
2024-10-18 15:17:47,053 - Downloaded aggs for ABUS from 2022-01-01 to 2024-09-30 and saved to C:\Users\SamuliMustonen\Documents\Ready Solutions\Docs\StockTrading\Data\rawAggs\ABUS-aggs-2022-01-01_to_2024-09-30.pickle.lz4
2024-10-18 15:17:47,319 - Downloaded aggs for ABL from 2022-01-01 to 2024-09-30 and saved to C:\Users\SamuliMustonen\Documents\Ready Solutions\Docs\StockTrading\Data\rawAggs\ABL-aggs-2022-01-01_to_2024-09-30.pickle.lz4
2024-10-18 15:17:47,469 - Downloaded aggs for AAOI from 2022-01-01 to 2024-09-30 and saved to C:\Users\SamuliMustonen\

In [12]:
# THIS IS TO FETCH DATA FROM THE API

# Define the API details
# client = RESTClient(api_key="<API_KEY>")

# Set up logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(message)s")


def signal_handler(sig, frame):
    print("You pressed Ctrl+C!")
    sys.exit(0)


signal.signal(signal.SIGINT, signal_handler)


def get_aggs_for_symbol_and_date(symbol_date_pair):
    """Retrieve aggs for a given symbol and date"""
    symbol, date = symbol_date_pair
    aggs = []
    client = RESTClient(api_key="PfZz8S38d1PO4heiApoM7SGLFohMi3KE")  # Uses POLYGON_API_KEY environment variable

    for a in client.list_aggs(
        symbol,
        1,
        "day",
        date,
        date,
        limit=50000,
    ):
        aggs.append(a)

    # print(len(aggs))

    filename = f"{symbol}-aggs-{date}.pickle.lz4"
    with open(filename, "wb") as file:
        try:
            compressed_data = lz4.frame.compress(pickle.dumps(aggs))
            file.write(compressed_data)
        except TypeError as e:
            print(f"Serialization Error: {e}")

    logging.info(f"Downloaded aggs for {date} and saved to {filename}")


def weekdays_between(start_date, end_date):
    """Generate all weekdays between start_date and end_date"""
    day = start_date
    while day <= end_date:
        if day.weekday() < 5:  # 0-4 denotes Monday to Friday
            yield day
        day += datetime.timedelta(days=1)


def main():
    start_date = datetime.date(2023, 8, 1)
    end_date = datetime.date(2023, 8, 31)

    symbols = ["AAPL"]  # The array of symbols you want

    dates = list(weekdays_between(start_date, end_date))

    # Generate a list of (symbol, date) pairs
    symbol_date_pairs = [(symbol, date) for symbol in symbols for date in dates]

    # Use ThreadPoolExecutor to download data in parallel
    with concurrent.futures.ThreadPoolExecutor(max_workers=50) as executor:
        executor.map(get_aggs_for_symbol_and_date, symbol_date_pairs)


if __name__ == "__main__":
    main()

2024-10-18 14:43:14,837 - Downloaded aggs for 2023-08-17 and saved to AAPL-aggs-2023-08-17.pickle.lz4
2024-10-18 14:43:14,840 - Downloaded aggs for 2023-08-18 and saved to AAPL-aggs-2023-08-18.pickle.lz4
2024-10-18 14:43:14,841 - Downloaded aggs for 2023-08-24 and saved to AAPL-aggs-2023-08-24.pickle.lz4
2024-10-18 14:43:14,842 - Downloaded aggs for 2023-08-07 and saved to AAPL-aggs-2023-08-07.pickle.lz4
2024-10-18 14:43:14,850 - Downloaded aggs for 2023-08-16 and saved to AAPL-aggs-2023-08-16.pickle.lz4
2024-10-18 14:43:14,857 - Downloaded aggs for 2023-08-25 and saved to AAPL-aggs-2023-08-25.pickle.lz4
2024-10-18 14:43:14,884 - Downloaded aggs for 2023-08-22 and saved to AAPL-aggs-2023-08-22.pickle.lz4
2024-10-18 14:43:14,890 - Downloaded aggs for 2023-08-15 and saved to AAPL-aggs-2023-08-15.pickle.lz4
2024-10-18 14:43:14,890 - Downloaded aggs for 2023-08-11 and saved to AAPL-aggs-2023-08-11.pickle.lz4
2024-10-18 14:43:14,891 - Downloaded aggs for 2023-08-14 and saved to AAPL-aggs-20

In [14]:
# THIS IS TO READ THE FETCHED LZ4 FILES FROM FOLDER PATH

def read_trades_for_date(symbol, date):
    """Reads trades for a given symbol and date, then prints them."""

    # Construct the filename, similar to your writer script
    filename = f"{symbol}-aggs-{date}.pickle.lz4"

    try:
        with open(filename, "rb") as file:
            compressed_data = file.read()
            trades = pickle.loads(lz4.frame.decompress(compressed_data))
            print(trades)
            return trades
    except FileNotFoundError:
        print(f"No file found for {date}")
    except Exception as e:
        print(f"An error occurred: {e}")


def main():
    start_date = datetime.date(2023, 8, 1)
    end_date = datetime.date(2023, 8, 31)
    symbol = "AAPL"

    # Loop through each weekday between the start and end dates and read the trades
    day = start_date
    while day <= end_date:
        if day.weekday() < 5:  # 0-4 denotes Monday to Friday
            read_trades_for_date(symbol, day)
        day += datetime.timedelta(days=1)


if __name__ == "__main__":
    main()

[Agg(open=196.235, high=196.73, low=195.28, close=195.605, volume=35281426.0, vwap=195.8486, timestamp=1690862400000, transactions=477616, otc=None)]
[Agg(open=195.04, high=195.18, low=191.8507, close=192.58, volume=50388811.0, vwap=192.9395, timestamp=1690948800000, transactions=620582, otc=None)]
[Agg(open=191.57, high=192.37, low=190.69, close=191.17, volume=62243282.0, vwap=191.1312, timestamp=1691035200000, transactions=754703, otc=None)]
[Agg(open=185.52, high=187.38, low=181.92, close=181.99, volume=115956841.0, vwap=184.279, timestamp=1691121600000, transactions=1336765, otc=None)]
[Agg(open=182.13, high=183.13, low=177.35, close=178.85, volume=97105069.0, vwap=179.0119, timestamp=1691380800000, transactions=1159281, otc=None)]
[Agg(open=179.69, high=180.27, low=177.58, close=179.8, volume=67769003.0, vwap=178.9854, timestamp=1691467200000, transactions=709019, otc=None)]
[Agg(open=180.87, high=180.93, low=177.01, close=178.19, volume=60303492.0, vwap=178.4887, timestamp=169155

In [None]:
# Define API key and base URL
api_key = "PfZz8S38d1PO4heiApoM7SGLFohMi3KE"
base_url_open_close = "https://api.polygon.io/v1/open-close"
base_url_financials = "https://api.polygon.io/vX/reference/financials"

# Read the existing CSV file to determine the last date
existing_file_path = r"C:\Users\SamuliMustonen\Documents\Ready Solutions\Docs\StockTrading\Data\ntnx_data_raw.csv"
existing_data = pd.read_csv(existing_file_path, delimiter=';', header=0)

# Ensure the 'Date' column is in datetime format and get the last date
existing_data['Date'] = pd.to_datetime(existing_data['Date'])

# Define ticker and date range
ticker = "NTNX"
start_date = "2022-10-18"
end_date = "2022-12-31"

# Convert date strings to datetime objects
start = datetime.strptime(start_date, "%Y-%m-%d")
end = datetime.strptime(end_date, "%Y-%m-%d")

# Initialize an empty list to hold all stock data
all_stock_data = []

In [39]:
# Initialize API call counter
# api_call_count = 0
# # Function to handle API call rate limit
# def check_rate_limit():
#     global api_call_count
#     api_call_count += 1
#     if api_call_count == 5:
#         print("Pausing for 1 minute to respect API rate limit...")
#         time.sleep(60)  # Pause for 1 minute
#         api_call_count = 0

In [40]:
# Function to fetch open/close data for a specific ticker and date
def fetch_stock_data(ticker, date):
    url = f"{base_url_open_close}/{ticker}/{date}"
    headers = {"Authorization": f"Bearer {api_key}"}
    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        data = response.json()
        if data.get("status") == "OK":
            stock_data = {
                "Symbol": data.get("symbol"),
                "Date": data.get("from"),
                "Close": data.get("close"),
                "Open": data.get("open"),
                "High": data.get("high"),
                "Low": data.get("low"),
                "Volume": data.get("volume")
            }
            return stock_data
        else:
            print(f"No trading data for {ticker} on {date}.")
            return None
    else:
        print(f"Failed to fetch data for {ticker} on {date}: {response.status_code}")
        return None

In [41]:
# Function to fetch financial report filing dates
def fetch_filing_dates(ticker):
    filing_dates = set()
    url = f"{base_url_financials}?ticker={ticker}&apiKey={api_key}"
    
    while url:
        response = requests.get(url)
        # check_rate_limit()  # Check and handle API rate limit
        
        if response.status_code == 200:
            data = response.json()
            for item in data.get('results', []):
                filing_date = item.get('filing_date')
                if filing_date:
                    filing_dates.add(filing_date)  # Collect filing dates
                    
            url = data.get('next_url')  # Check if there's a next page to fetch
        else:
            print(f"Failed to fetch financial data for {ticker}: {response.status_code}")
            url = None  # Stop the loop if there's an error

    return filing_dates


# Fetch financial report filing dates
filing_dates = fetch_filing_dates(ticker)

In [None]:
# Loop over each date in the range
current_date = start
api_call_count = 0

while current_date <= end:
    date_str = current_date.strftime("%Y-%m-%d")
    
    # Fetch stock data for the current date
    stock_data = fetch_stock_data(ticker, date_str)
    check_rate_limit()
    
    if stock_data:
        # Check if a financial report was released on this date
        stock_data["financialRelease"] = 1 if date_str in filing_dates else 0
        
        all_stock_data.append(stock_data)

    # Increment API call count
    api_call_count += 1

    # After every 5 API calls, pause for 1 minute to respect the API limit
    # if api_call_count == 5:
    #     print("Pausing for 1 minute to respect API rate limit...")
    #     time.sleep(60)
    #     api_call_count = 0

    # Move to the next date
    current_date += timedelta(days=1)

In [43]:
# Create a DataFrame from the collected data
df = pd.DataFrame(all_stock_data)

In [None]:
# Define the output path for the CSV file
# file_name = '_data'
# output_path = r"C:\Users\SamuliMustonen\Documents\Ready Solutions\Docs\StockTrading\Data\{file_name}.csv"

# Append the DataFrame to the existing CSV file, avoiding duplicates
if not df.empty:
    df.to_csv(existing_file_path, mode='a', index=False, header=False, sep=';')

print(f"Data has been appended to {existing_file_path}") 