## Fetching Week's Worth of Common Stock Price Data

In [49]:
# Import dependencies
import requests
import json
import pandas as pd
from dotenv import load_dotenv, find_dotenv
import os
import time

# Load API Key
load_dotenv(find_dotenv('api.env')) # api.env not included in github. Please make separate env file.
api_key = os.getenv("POLYGON_KEY")

base_url = "https://api.polygon.io"

# Names of stock we want to grab
stockNames = ['SPY', 'QQQ', 'VXX', 'DIA']

multiplier = 5
timespan = 'minute' # multiplier + timespan = 5 minutes
t_from = '2024-03-18'
t_to = '2024-03-22'

df = pd.DataFrame() # Empty DF to merge below into

for stocksTicker in stockNames:

    query_url = f"{base_url}/v2/aggs/ticker/{stocksTicker}/range/{multiplier}/{timespan}/{t_from}/{t_to}?adjusted=true&sort=asc&limit=50000&apiKey={api_key}"
    # Get the response
    response = requests.get(query_url)

    # Retrieve reviews
    reviews = response.json()

    # Only append DF if the result isn't empty
    if reviews['resultsCount'] > 0:
        # Convert JSON to Pandas Dataframe
        dft = pd.json_normalize(reviews["results"])
        dft = dft.rename(columns={'v':'Volume', 'vw':'Volume Weighted', 'o':'Open Price', 'c':'Close Price', 'h':'Highest Price', 'l':'Lowest Price', 't':'Timestamp', 'n':'Number of Transaction'})
        dft['Timestamp'] = pd.to_datetime(dft['Timestamp'], unit='ms')
        dft['Stock Name'] = stocksTicker

        df = pd.concat([df, dft], axis = 0, ignore_index= True)


In [50]:
# Grab data from 13:00 - 19:55 UTC only
testdf = df.copy()
testdf.set_index('Timestamp', inplace=True)
testdf = testdf.between_time('13:00:00', '19:55:00')
testdf = testdf.reset_index()
testdf

Unnamed: 0,Timestamp,Volume,Volume Weighted,Open Price,Close Price,Highest Price,Lowest Price,Number of Transaction,Stock Name
0,2024-03-18 13:00:00,39717.0,513.5795,513.52,513.57,513.66,513.52,308,SPY
1,2024-03-18 13:05:00,57825.0,513.5411,513.55,513.59,513.61,513.49,228,SPY
2,2024-03-18 13:10:00,7746.0,513.6786,513.59,513.69,513.74,513.59,158,SPY
3,2024-03-18 13:15:00,23093.0,513.7971,513.69,513.65,513.84,513.65,307,SPY
4,2024-03-18 13:20:00,11279.0,513.6390,513.68,513.65,513.70,513.57,214,SPY
...,...,...,...,...,...,...,...,...,...
1668,2024-03-22 19:35:00,33612.0,395.1014,395.01,395.10,395.16,395.00,505,DIA
1669,2024-03-22 19:40:00,33958.0,395.0963,395.10,395.18,395.23,394.99,657,DIA
1670,2024-03-22 19:45:00,48819.0,395.1423,395.16,395.22,395.22,395.08,756,DIA
1671,2024-03-22 19:50:00,110974.0,395.2337,395.24,395.14,395.31,395.13,1344,DIA


In [51]:
#Save data to CSV
testdf.to_csv('../Data/stocks-agg-03-18-03-22.csv', date_format='%Y-%m-%d %H:%M:%S', index=False)

In [73]:
# Test CSV Import

testdf = pd.read_csv('../Data/stocks-agg-03-18-03-22.csv', parse_dates=['Timestamp'])
testdf

Unnamed: 0,Timestamp,Volume,Volume Weighted,Open Price,Close Price,Highest Price,Lowest Price,Number of Transaction,Stock Name
0,2024-03-18 13:00:00,39717.0,513.5795,513.52,513.57,513.66,513.52,308,SPY
1,2024-03-18 13:05:00,57825.0,513.5411,513.55,513.59,513.61,513.49,228,SPY
2,2024-03-18 13:10:00,7746.0,513.6786,513.59,513.69,513.74,513.59,158,SPY
3,2024-03-18 13:15:00,23093.0,513.7971,513.69,513.65,513.84,513.65,307,SPY
4,2024-03-18 13:20:00,11279.0,513.6390,513.68,513.65,513.70,513.57,214,SPY
...,...,...,...,...,...,...,...,...,...
1668,2024-03-22 19:35:00,33612.0,395.1014,395.01,395.10,395.16,395.00,505,DIA
1669,2024-03-22 19:40:00,33958.0,395.0963,395.10,395.18,395.23,394.99,657,DIA
1670,2024-03-22 19:45:00,48819.0,395.1423,395.16,395.22,395.22,395.08,756,DIA
1671,2024-03-22 19:50:00,110974.0,395.2337,395.24,395.14,395.31,395.13,1344,DIA


In [53]:
# Grab Average Close Price for last week's stock, rounded up to nearest integer
from math import ceil
testdf.groupby(by='Stock Name')['Close Price'].mean().apply(lambda x: ceil(x))

Stock Name
DIA    393
QQQ    442
SPY    518
VXX     14
Name: Close Price, dtype: int64

## Generate Options Call DataFrame

In [86]:
# Dic of stock name and avg price from above
stock_dic = {'SPY':518, 'QQQ':442, 'VXX':14, 'DIA':393}

# Empty DF to concat on
df = pd.DataFrame()

# For loop from dictionary
for i, (stocksTicker, price) in enumerate(stock_dic.items()):
    for od in range(240318, 240323): # Dates range, Change as necessary
        for strk in range (price - 5, price + 6): # Strike range
            odate = str(od)
            strike = str(strk)
            optionsTicker = f"O:{stocksTicker}{odate}C00{strike}000" # This assumes strike is 3-digit. remove a 0 in front if it's 4-digit
            # Because if it's below 100 it's a digit change
            if price < 100:
                optionsTicker = f"O:{stocksTicker}{odate}C000{strike}000"
            multiplier = 5
            timespan = 'minute' # multiplier + timespan = 5 minutes
            test = f"20{odate[:2]}-{odate[2:4]}-{odate[4:]}" # Strike timestamp

            query_url = f"{base_url}/v2/aggs/ticker/{optionsTicker}/range/{multiplier}/{timespan}/{test}/{test}?adjusted=true&sort=asc&limit=50000&apiKey={api_key}"
            # Get the response
            response = requests.get(query_url)

            # Retrieve reviews
            reviews = response.json()
            print(reviews)

            # Err catching
            if reviews['resultsCount'] > 0:
                # Convert JSON to Pandas Dataframe
                reviews_df = pd.json_normalize(reviews["results"])
                reviews_df = reviews_df.rename(columns={'v':'Volume', 'vw':'Volume Weighted', 'o':'Open Price', 'c':'Close Price', 'h':'Highest Price', 'l':'Lowest Price', 't':'Timestamp', 'n':'Number of Transaction'})
                reviews_df['Timestamp'] = pd.to_datetime(reviews_df['Timestamp'], unit='ms')
                reviews_df['Stock Name'] = stocksTicker
                reviews_df['Strike'] = strk
                reviews_df['Strike Date'] = test

                # Concat
                df = pd.concat([df, reviews_df], ignore_index= True, axis = 0)

{'ticker': 'O:SPY240318C000513000', 'queryCount': 0, 'resultsCount': 0, 'adjusted': True, 'status': 'OK', 'request_id': 'ed75f5f462495a1a553939e2ed2a2ccc'}
{'ticker': 'O:SPY240318C000514000', 'queryCount': 0, 'resultsCount': 0, 'adjusted': True, 'status': 'OK', 'request_id': '22cfaee1984bdae82686c6d0708c86bf'}
{'ticker': 'O:SPY240318C000515000', 'queryCount': 0, 'resultsCount': 0, 'adjusted': True, 'status': 'OK', 'request_id': 'cfec17a5fb1069b8c5751ed13569920e'}
{'ticker': 'O:SPY240318C000516000', 'queryCount': 0, 'resultsCount': 0, 'adjusted': True, 'status': 'OK', 'request_id': 'bbda02782c29b6807e2f3102b701af55'}
{'ticker': 'O:SPY240318C000517000', 'queryCount': 0, 'resultsCount': 0, 'adjusted': True, 'status': 'OK', 'request_id': 'c4c5b9cf5d98c867330f9c4398c2a075'}
{'ticker': 'O:SPY240318C000518000', 'queryCount': 0, 'resultsCount': 0, 'adjusted': True, 'status': 'OK', 'request_id': 'd33aa3878a66ac731252199e193018c2'}
{'ticker': 'O:SPY240318C000519000', 'queryCount': 0, 'resultsCou

KeyboardInterrupt: 

In [85]:
df

In [84]:
def add_stock_price(row):
    time = row['Timestamp']
    sname = row['Stock Name']
    return testdf.loc[(testdf['Timestamp'] == time) & (testdf['Stock Name'] == sname)]['Close Price'].iloc[0]

tdf = df.copy()
tdf['Stock Close Price'] = tdf.apply(add_stock_price, axis = 1)

tdf

ValueError: Cannot set a DataFrame with multiple columns to the single column Stock Close Price

In [80]:
tdf['Stock Name'].value_counts()

Stock Name
QQQ    3892
SPY    3867
DIA     282
Name: count, dtype: int64

In [78]:
#Save data to CSV
tdf.to_csv('../Data/options-agg-03-18-03-22.csv', date_format='%Y-%m-%d %H:%M:%S', index=False)

In [79]:
# Test CSV Import

t2df = pd.read_csv('../Data/options-agg-03-18-03-22.csv', parse_dates=['Timestamp'])
t2df

Unnamed: 0,Volume,Volume Weighted,Open Price,Close Price,Highest Price,Lowest Price,Timestamp,Number of Transaction,Stock Name,Strike,Stock Close Price
0,6879,1.9704,1.76,2.27,2.28,1.69,2024-03-18 13:30:00,1170,SPY,513,514.7100
1,2164,2.1936,2.28,1.98,2.35,1.97,2024-03-18 13:35:00,341,SPY,513,514.2700
2,1644,2.1707,1.99,2.04,2.29,1.99,2024-03-18 13:40:00,190,SPY,513,514.3800
3,858,2.0008,2.10,1.87,2.23,1.81,2024-03-18 13:45:00,184,SPY,513,514.1900
4,655,1.8321,1.94,1.80,1.98,1.73,2024-03-18 13:50:00,117,SPY,513,514.1200
...,...,...,...,...,...,...,...,...,...,...,...
8036,8,0.0200,0.02,0.02,0.02,0.02,2024-03-22 18:20:00,2,DIA,398,395.8200
8037,5,0.0100,0.01,0.01,0.01,0.01,2024-03-22 18:25:00,1,DIA,398,396.0300
8038,1,0.0100,0.01,0.01,0.01,0.01,2024-03-22 18:45:00,1,DIA,398,395.5900
8039,1,0.0100,0.01,0.01,0.01,0.01,2024-03-22 18:50:00,1,DIA,398,395.6600


In [54]:
# Analysis - Checking Max values
print(f"Max values for Volume: {testdf['Volume'].max()}")
print(f"Max values for Volume Weighted: {testdf['Volume Weighted'].max()}")
print(f"Max values for Open Price: {testdf['Open Price'].max()}")
print(f"Max values for Close Price: {testdf['Close Price'].max()}")
print(f"Max values for Highest Price: {testdf['Highest Price'].max()}")
print(f"Max values for Lowest Price: {testdf['Lowest Price'].max()}")
print(f"Max values for Number of Transaction: {testdf['Number of Transaction'].max()}")

# Looking at the result, standardizing of data will be needed!

Max values for Volume: 10364121.0
Max values for Volume Weighted: 523.9702
Max values for Open Price: 524.0
Max values for Close Price: 524.01
Max values for Highest Price: 524.11
Max values for Lowest Price: 523.81
Max values for Number of Transaction: 42817
