<a href="https://colab.research.google.com/github/GuyInFreezer/project-2/blob/Yeong-branch/DataFrame_Construction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Import Dependencies
import requests
import json
import pandas as pd
#from google.colab import userdata
import time
from dotenv import load_dotenv, find_dotenv
import os
# Import API key from secret data
#api_key = userdata.get('POLYGONKEY')
load_dotenv(find_dotenv('api.env'))

api_key = os.getenv("POLYGON_KEY")

# Setup basics for the polygon API
base_url = 'https://api.polygon.io'
stock_names = ['SPY', 'QQQ', 'VXX', 'DIA']
start_date = '2022-01-03'
end_date = '2023-03-22'
start_date_5d = '2021-12-27'

In [None]:
# ###DF structure###

# Timestamp

# vvvvvvvvvvvvvvvv repeat for each stock vvvvvvvvvvvvvvvvvvv
# 5D Change [Open 5 days ago - Close 1 day ago] / Open 5 days ago
# 3D change [Open 3 days ago - Close 1 day ago] / Open 3 days ago
# 1D change [Open 1 days ago - Close 1 day ago] / Open 1 days ago
# Stock Price at 9:35 (Lowest Price)
# Stock Price at 15:45 (Lowest Price)
# Strike - Ceil above
# Covered Call at 9:35 (Lowest Price)
# Covered Call at 15:45 (Highest Price)
# 5D Avg Total Volume
# 3D Avg Total Volume
# 1D Avg Total Volume
# ^^^^^^^^^^^^^^^ repeat for each stock ^^^^^^^^^^^^^^^^^^^^
#
# Net - SPY Closing Stock Price + (SPY CC sold[open] - SPY CC buyback[close])
#	   if (SPY Closing stock price - SPY Opening stock price) is negative, SPY Closing Stock Price + (SPY CC sold[open])
#	   y = net / SPY Stock Price at 9:35
#
#
# Need 5 DFs
#
# 5 Days before at 1D interval
# 5 Days before at 5M interval
# Regular at 1D interval
# Regular at 5M interval
# Regular Options at 5M interval

In [3]:
# Step 1 - Grab Regular Stock Data at 1D Interval

# Create empty DF to merge
df_stock_1d = pd.DataFrame()

for stocksTicker in stock_names:
  # Generate Quert URL
  query_url = f"{base_url}/v2/aggs/ticker/{stocksTicker}/range/1/day/{start_date}/{end_date}?adjusted=true&sort=asc&limit=50000&apiKey={api_key}"

  # Grab JSON
  response = requests.get(query_url)
  json_data = response.json()

  # Only append DF if the result isn't empty
  if json_data['resultsCount'] > 0:
    # Convert JSON to Pandas Dataframe
    temp_df = pd.json_normalize(json_data['results'])
    # Rename columns
    temp_df = temp_df.rename(columns={'v':'Volume', 'vw':'Volume Weighted', 'o':'Open Price', 'c':'Close Price', 'h':'Highest Price', 'l':'Lowest Price', 't':'Timestamp', 'n':'Number of Transaction'})
    # Convert microsecond timestamp to Pandas Timestamp. We're only interested in Date so only grab date
    temp_df['Timestamp'] = pd.to_datetime(temp_df['Timestamp'], unit='ms').dt.date # We won't need .dt.date for 5 minute interval one because we actually want the time.
    # Add stock name column for visibility
    temp_df['Stock Name'] = stocksTicker

    # Concat the temp_df to the main DF
    df_stock_1d = pd.concat([df_stock_1d, temp_df], axis = 0, ignore_index = True)

# Review DF
df_stock_1d.head(10)

Unnamed: 0,Volume,Volume Weighted,Open Price,Close Price,Highest Price,Lowest Price,Timestamp,Number of Transaction,Stock Name
0,72668233.0,476.527,476.3,477.71,477.85,473.85,2022-01-03,535421,SPY
1,71070678.0,477.8703,479.22,477.55,479.98,475.58,2022-01-04,565655,SPY
2,104494940.0,473.2328,477.16,468.38,477.98,468.2801,2022-01-05,788712,SPY
3,86498500.0,468.4813,467.89,467.94,470.82,465.43,2022-01-06,806488,SPY
4,85111593.0,466.939,467.95,466.09,469.2,464.65,2022-01-07,625067,SPY
5,119361988.0,461.5904,462.7,465.51,465.74,456.5973,2022-01-10,963294,SPY
6,74189562.0,466.4853,465.23,469.75,469.85,462.05,2022-01-11,626792,SPY
7,67602444.0,470.9955,471.59,471.02,473.2,468.94,2022-01-12,632057,SPY
8,91137601.0,467.9368,472.19,464.53,472.88,463.44,2022-01-13,724347,SPY
9,95890948.0,462.9488,461.19,464.72,465.09,459.9,2022-01-14,855904,SPY


In [4]:
# Step 2 - Grab Regular Stock Data at 5M Interval (To be done by Mat)

# To Mat - Do the same thing as above, but change '/range/1/day' in query_url to '/range/5/minute'.
# Also, change the DF name from df_stock_1d to df_stock_5m

### Start code from here ###
# Create empty DF to merge
df_stock_5m = pd.DataFrame()

for stocksTicker in stock_names:
  # Generate Query URL
  query_url = f"{base_url}/v2/aggs/ticker/{stocksTicker}/range/5/minute/{start_date}/{end_date}?adjusted=true&sort=asc&limit=50000&apiKey={api_key}"

  # Grab JSON
  response = requests.get(query_url)
  json_data = response.json()

  # Only append DF if the result isn't empty
  if json_data['resultsCount'] > 0:
    # Convert JSON to Pandas Dataframe
    temp_df = pd.json_normalize(json_data['results'])
    # Rename columns
    temp_df = temp_df.rename(columns={'v':'Volume', 'vw':'Volume Weighted', 'o':'Open Price', 'c':'Close Price', 'h':'Highest Price', 'l':'Lowest Price', 't':'Timestamp', 'n':'Number of Transaction'})
    # Convert microsecond timestamp to Pandas Timestamp. We're only interested in Date so only grab date
    temp_df['Timestamp'] = pd.to_datetime(temp_df['Timestamp'], unit='ms') # We won't need .dt.date for 5 minute interval one because we actually want the time.
    # Add stock name column for visibility
    temp_df['Stock Name'] = stocksTicker

    # Concat the temp_df to the main DF
    df_stock_5m = pd.concat([df_stock_5m, temp_df], axis = 0, ignore_index = True)
    
# Review DF
df_stock_5m.head(10)


Unnamed: 0,Volume,Volume Weighted,Open Price,Close Price,Highest Price,Lowest Price,Timestamp,Number of Transaction,Stock Name
0,9426.0,476.6581,476.38,476.56,476.92,476.38,2022-01-03 09:00:00,115,SPY
1,1778.0,476.4585,476.48,476.44,476.49,476.44,2022-01-03 09:05:00,37,SPY
2,9767.0,476.7624,476.62,476.83,476.87,476.58,2022-01-03 09:10:00,80,SPY
3,1743.0,476.7269,476.84,476.53,476.84,476.53,2022-01-03 09:15:00,44,SPY
4,1037.0,476.7011,476.65,476.82,476.82,476.62,2022-01-03 09:20:00,21,SPY
5,472.0,476.8251,476.84,476.76,476.85,476.76,2022-01-03 09:25:00,21,SPY
6,16189.0,476.8218,476.81,476.88,476.88,476.73,2022-01-03 09:30:00,67,SPY
7,863.0,476.8243,476.87,476.76,476.87,476.76,2022-01-03 09:35:00,27,SPY
8,6188.0,476.7981,476.78,476.8,476.8,476.78,2022-01-03 09:40:00,30,SPY
9,926.0,476.7946,476.8,476.79,476.8,476.77,2022-01-03 09:45:00,19,SPY


In [5]:
# Step 3 - Grab 5-Days before Stock Data at 1D Interval

# Create empty DF to merge
df_stock_1d_5d = pd.DataFrame()

for stocksTicker in stock_names:
  # Generate Quert URL
  query_url = f"{base_url}/v2/aggs/ticker/{stocksTicker}/range/1/day/{start_date_5d}/{end_date}?adjusted=true&sort=asc&limit=50000&apiKey={api_key}"

  # Grab JSON
  response = requests.get(query_url)
  json_data = response.json()

  # Only append DF if the result isn't empty
  if json_data['resultsCount'] > 0:
    # Convert JSON to Pandas Dataframe
    temp_df = pd.json_normalize(json_data['results'])
    # Rename columns
    temp_df = temp_df.rename(columns={'v':'Volume', 'vw':'Volume Weighted', 'o':'Open Price', 'c':'Close Price', 'h':'Highest Price', 'l':'Lowest Price', 't':'Timestamp', 'n':'Number of Transaction'})
    # Convert microsecond timestamp to Pandas Timestamp. We're only interested in Date so only grab date
    temp_df['Timestamp'] = pd.to_datetime(temp_df['Timestamp'], unit='ms').dt.date # We won't need .dt.date for 5 minute interval one because we actually want the time.
    # Add stock name column for visibility
    temp_df['Stock Name'] = stocksTicker

    # Concat the temp_df to the main DF
    df_stock_1d_5d = pd.concat([df_stock_1d_5d, temp_df], axis = 0, ignore_index = True)

# Review DF
df_stock_1d_5d.head(10)

Unnamed: 0,Volume,Volume Weighted,Open Price,Close Price,Highest Price,Lowest Price,Timestamp,Number of Transaction,Stock Name
0,56808619.0,475.279,472.06,477.26,477.31,472.01,2021-12-27,380197,SPY
1,46974585.0,477.2276,477.72,476.87,478.81,476.06,2021-12-28,372331,SPY
2,54091464.0,477.2659,476.98,477.48,478.56,475.92,2021-12-29,345712,SPY
3,55329041.0,477.4587,477.93,476.16,479.0,475.67,2021-12-30,353567,SPY
4,64917431.0,475.6196,475.64,474.96,476.86,474.67,2021-12-31,435448,SPY
5,72668233.0,476.527,476.3,477.71,477.85,473.85,2022-01-03,535421,SPY
6,71070678.0,477.8703,479.22,477.55,479.98,475.58,2022-01-04,565655,SPY
7,104494940.0,473.2328,477.16,468.38,477.98,468.2801,2022-01-05,788712,SPY
8,86498500.0,468.4813,467.89,467.94,470.82,465.43,2022-01-06,806488,SPY
9,85111593.0,466.939,467.95,466.09,469.2,464.65,2022-01-07,625067,SPY


In [6]:
# Step 4 - Grab 5-Days before Stock Data a 5M Interval (To be done by Mat)

# To Mat - Do the same thing as above, but change '/range/1/day' in query_url to '/range/5/minute'.
# Also, change the DF name from df_stock_1d_5d to df_stock_5m_5d

### Start code from here ###
# Create empty DF to merge
df_stock_5m_5d = pd.DataFrame()

for stocksTicker in stock_names:
  # Generate Query URL
  query_url = f"{base_url}/v2/aggs/ticker/{stocksTicker}/range/5/minute/{start_date_5d}/{end_date}?adjusted=true&sort=asc&limit=50000&apiKey={api_key}"

  # Grab JSON
  response = requests.get(query_url)
  json_data = response.json()

  # Only append DF if the result isn't empty
  if json_data['resultsCount'] > 0:
    # Convert JSON to Pandas Dataframe
    temp_df = pd.json_normalize(json_data['results'])
    # Rename columns
    temp_df = temp_df.rename(columns={'v':'Volume', 'vw':'Volume Weighted', 'o':'Open Price', 'c':'Close Price', 'h':'Highest Price', 'l':'Lowest Price', 't':'Timestamp', 'n':'Number of Transaction'})
    # Convert microsecond timestamp to Pandas Timestamp. We're only interested in Date so only grab date
    temp_df['Timestamp'] = pd.to_datetime(temp_df['Timestamp'], unit='ms') # We won't need .dt.date for 5 minute interval one because we actually want the time.
    # Add stock name column for visibility
    temp_df['Stock Name'] = stocksTicker

    # Concat the temp_df to the main DF
    df_stock_5m_5d = pd.concat([df_stock_5m_5d, temp_df], axis = 0, ignore_index = True)
    
# Review DF
df_stock_5m_5d.head(10)

Unnamed: 0,Volume,Volume Weighted,Open Price,Close Price,Highest Price,Lowest Price,Timestamp,Number of Transaction,Stock Name
0,1792.0,470.9614,471.03,470.82,471.03,470.82,2021-12-27 09:00:00,60,SPY
1,1591.0,471.2771,471.31,471.25,471.31,471.25,2021-12-27 09:10:00,36,SPY
2,524.0,471.092,471.15,471.06,471.15,471.06,2021-12-27 09:15:00,16,SPY
3,429.0,471.0071,470.97,471.02,471.03,470.97,2021-12-27 09:20:00,15,SPY
4,619.0,471.0015,471.0,471.0,471.01,471.0,2021-12-27 09:25:00,17,SPY
5,1644.0,471.0166,471.07,470.9,471.08,470.88,2021-12-27 09:30:00,33,SPY
6,711.0,471.0586,471.01,471.14,471.14,471.01,2021-12-27 09:35:00,19,SPY
7,1348.0,471.1498,471.13,471.17,471.17,471.13,2021-12-27 09:40:00,26,SPY
8,2857.0,471.2719,471.25,471.31,471.31,471.25,2021-12-27 09:45:00,56,SPY
9,118.0,471.449,471.45,471.45,471.45,471.45,2021-12-27 09:50:00,4,SPY


In [7]:
# Step 5 - Begin creating base DF

df = df_stock_1d.copy()
df = df[['Timestamp']]

df.head()

Unnamed: 0,Timestamp
0,2022-01-03
1,2022-01-04
2,2022-01-05
3,2022-01-06
4,2022-01-07
