<a href="https://colab.research.google.com/github/GuyInFreezer/project-2/blob/Yeong-branch/DataFrame_Construction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
# Import Dependencies
import requests
import json
import pandas as pd
from google.colab import userdata
import time

# Import API key from secret data
api_key = userdata.get('POLYGONKEY')

# Setup basics for the polygon API
base_url = 'https://api.polygon.io'
stock_names = ['SPY', 'QQQ', 'VXX', 'DIA']
start_date = '2022-01-03'
end_date = '2023-03-22'
start_date_5d = '2021-12-27'

In [None]:
# ###DF structure###

# Timestamp

# vvvvvvvvvvvvvvvv repeat for each stock vvvvvvvvvvvvvvvvvvv
# 5D Change [Open 5 days ago - Close 1 day ago] / Open 5 days ago
# 3D change [Open 3 days ago - Close 1 day ago] / Open 3 days ago
# 1D change [Open 1 days ago - Close 1 day ago] / Open 1 days ago
# Stock Price at 9:35 (Lowest Price)
# Stock Price at 15:45 (Lowest Price)
# Strike - Ceil above
# Covered Call at 9:35 (Lowest Price)
# Covered Call at 15:45 (Highest Price)
# 5D Avg Total Volume
# 3D Avg Total Volume
# 1D Avg Total Volume
# ^^^^^^^^^^^^^^^ repeat for each stock ^^^^^^^^^^^^^^^^^^^^
#
# Net - SPY Closing Stock Price + (SPY CC sold[open] - SPY CC buyback[close])
#	   if (SPY Closing stock price - SPY Opening stock price) is negative, SPY Closing Stock Price + (SPY CC sold[open])
#	   y = net / SPY Stock Price at 9:35
#
#
# Need 5 DFs
#
# 5 Days before at 1D interval
# 5 Days before at 5M interval
# Regular at 1D interval
# Regular at 5M interval
# Regular Options at 5M interval

In [11]:
# Step 1 - Grab Regular Stock Data at 1D Interval

# Create empty DF to merge
df_stock_1d = pd.DataFrame()

for stocksTicker in stock_names:
  # Generate Quert URL
  query_url = f"{base_url}/v2/aggs/ticker/{stocksTicker}/range/1/day/{start_date}/{end_date}?adjusted=true&sort=asc&limit=50000&apiKey={api_key}"

  # Grab JSON
  response = requests.get(query_url)
  json_data = response.json()

  # Only append DF if the result isn't empty
  if json_data['resultsCount'] > 0:
    # Convert JSON to Pandas Dataframe
    temp_df = pd.json_normalize(json_data['results'])
    # Rename columns
    temp_df = temp_df.rename(columns={'v':'Volume', 'vw':'Volume Weighted', 'o':'Open Price', 'c':'Close Price', 'h':'Highest Price', 'l':'Lowest Price', 't':'Timestamp', 'n':'Number of Transaction'})
    # Convert microsecond timestamp to Pandas Timestamp. We're only interested in Date so only grab date
    temp_df['Timestamp'] = pd.to_datetime(temp_df['Timestamp'], unit='ms').dt.date # We won't need .dt.date for 5 minute interval one because we actually want the time.
    # Add stock name column for visibility
    temp_df['Stock Name'] = stocksTicker

    # Concat the temp_df to the main DF
    df_stock_1d = pd.concat([df_stock_1d, temp_df], axis = 0, ignore_index = True)

# Review DF
df_stock_1d.head(10)

Unnamed: 0,Volume,Volume Weighted,Open Price,Close Price,Highest Price,Lowest Price,Timestamp,Number of Transaction,Stock Name
0,72668233.0,476.527,476.3,477.71,477.85,473.85,2022-01-03,535421,SPY
1,71070678.0,477.8703,479.22,477.55,479.98,475.58,2022-01-04,565655,SPY
2,104494940.0,473.2328,477.16,468.38,477.98,468.2801,2022-01-05,788712,SPY
3,86498500.0,468.4813,467.89,467.94,470.82,465.43,2022-01-06,806488,SPY
4,85111593.0,466.939,467.95,466.09,469.2,464.65,2022-01-07,625067,SPY
5,119361988.0,461.5904,462.7,465.51,465.74,456.5973,2022-01-10,963294,SPY
6,74189562.0,466.4853,465.23,469.75,469.85,462.05,2022-01-11,626792,SPY
7,67602444.0,470.9955,471.59,471.02,473.2,468.94,2022-01-12,632057,SPY
8,91137601.0,467.9368,472.19,464.53,472.88,463.44,2022-01-13,724347,SPY
9,95890948.0,462.9488,461.19,464.72,465.09,459.9,2022-01-14,855904,SPY


In [14]:
# Step 2 - Grab Regular Stock Data at 5M Interval (To be done by Mat)

# To Mat - Do the same thing as above, but change '/range/1/day' in query_url to '/range/5/minute'.
# Also, change the DF name from df_stock_1d to df_stock_5m

### Start code from here ###


In [13]:
# Step 3 - Grab 5-Days before Stock Data at 1D Interval

# Create empty DF to merge
df_stock_1d_5d = pd.DataFrame()

for stocksTicker in stock_names:
  # Generate Quert URL
  query_url = f"{base_url}/v2/aggs/ticker/{stocksTicker}/range/1/day/{start_date_5d}/{end_date}?adjusted=true&sort=asc&limit=50000&apiKey={api_key}"

  # Grab JSON
  response = requests.get(query_url)
  json_data = response.json()

  # Only append DF if the result isn't empty
  if json_data['resultsCount'] > 0:
    # Convert JSON to Pandas Dataframe
    temp_df = pd.json_normalize(json_data['results'])
    # Rename columns
    temp_df = temp_df.rename(columns={'v':'Volume', 'vw':'Volume Weighted', 'o':'Open Price', 'c':'Close Price', 'h':'Highest Price', 'l':'Lowest Price', 't':'Timestamp', 'n':'Number of Transaction'})
    # Convert microsecond timestamp to Pandas Timestamp. We're only interested in Date so only grab date
    temp_df['Timestamp'] = pd.to_datetime(temp_df['Timestamp'], unit='ms').dt.date # We won't need .dt.date for 5 minute interval one because we actually want the time.
    # Add stock name column for visibility
    temp_df['Stock Name'] = stocksTicker

    # Concat the temp_df to the main DF
    df_stock_1d_5d = pd.concat([df_stock_1d_5d, temp_df], axis = 0, ignore_index = True)

# Review DF
df_stock_1d_5d.head(10)

Unnamed: 0,Volume,Volume Weighted,Open Price,Close Price,Highest Price,Lowest Price,Timestamp,Number of Transaction,Stock Name
0,56808619.0,475.279,472.06,477.26,477.31,472.01,2021-12-27,380197,SPY
1,46974585.0,477.2276,477.72,476.87,478.81,476.06,2021-12-28,372331,SPY
2,54091464.0,477.2659,476.98,477.48,478.56,475.92,2021-12-29,345712,SPY
3,55329041.0,477.4587,477.93,476.16,479.0,475.67,2021-12-30,353567,SPY
4,64917431.0,475.6196,475.64,474.96,476.86,474.67,2021-12-31,435448,SPY
5,72668233.0,476.527,476.3,477.71,477.85,473.85,2022-01-03,535421,SPY
6,71070678.0,477.8703,479.22,477.55,479.98,475.58,2022-01-04,565655,SPY
7,104494940.0,473.2328,477.16,468.38,477.98,468.2801,2022-01-05,788712,SPY
8,86498500.0,468.4813,467.89,467.94,470.82,465.43,2022-01-06,806488,SPY
9,85111593.0,466.939,467.95,466.09,469.2,464.65,2022-01-07,625067,SPY


In [None]:
# Step 4 - Grab 5-Days before Stock Data a 5M Interval (To be done by Mat)

# To Mat - Do the same thing as above, but change '/range/1/day' in query_url to '/range/5/minute'.
# Also, change the DF name from df_stock_1d_5d to df_stock_5m_5d

### Start code from here ###


In [15]:
# Step 5 - Begin creating base DF

df = df_stock_1d.copy()
df = df[['Timestamp']]

df.head()

Unnamed: 0,Timestamp
0,2022-01-03
1,2022-01-04
2,2022-01-05
3,2022-01-06
4,2022-01-07
