Imports

In [1]:
import boto3
import json

This initializes our cloudflare client

In [2]:
with open('ossig_r2_pull_keys', 'r') as f:
    data = f.read().splitlines()

access_key_id = data[4]
secret_access_key = data[7]
s3_endpoint = data[10]

s3 = boto3.resource(
    service_name ="s3",
    endpoint_url = s3_endpoint,
    aws_access_key_id = access_key_id,
    aws_secret_access_key = secret_access_key,
    region_name="enam", # Must be one of: wnam, enam, weur, eeur, apac, auto
)
bucket = s3.Bucket('ossig-stock-data')

Here we pull our object from memory and convert it back to a dict
- **Warning: Use this code as a block, putting code in between may have the downloaded object erased from memory**

In [3]:
pulled_json_bytes = bucket.Object("polygon-30m/NVDA/2025-03").get()
decoded_json = pulled_json_bytes['Body'].read().decode('utf-8')
decoded_dict = json.loads(decoded_json)

## Dict Structure
- The loaded dict is for a single month (as you can see in the object's name)
    - This the keys of this dictionary are the day values. **Except for the last value, this is the 'complete' value, which determines whether or not the dict has the complete month's worth of data. This should be ignored when iterating over the keys.
    - The values of this dict are sub-dictionaries
        - The sub-dictionaries have 3 different keys each: 'pre-market', 'regular-market', and 'after-hours'. These correspond to what you think they do.
        - The values of these sub-dictionaries is a list
            - This is a list of all the aggregate bars for each 30-minute timestamp of that section of market hours
                - Each element of this list is a dict
                - This dict contains
                    - ('v') the volume,
                    - ('vw') the volume-weighted average price,
                    - ('o') the open price,
                    - ('c') the close price,
                    - ('h') the highest price within this period,
                    - ('l') the lowest price within this period,
                    - ('t') the Unix millisecond timestamp for the start of the aggregate window,
                    - ('n') the number of transactions in the aggregate window

In [9]:
decoded_dict

{'2024-09-03': {'pre-market': [{'v': 729581,
    'vw': 118.2175,
    'o': 119.23,
    'c': 117.69,
    'h': 119.5,
    'l': 117.48,
    't': 1725350400000,
    'n': 11709},
   {'v': 602858,
    'vw': 117.3667,
    'o': 117.69,
    'c': 116.96,
    'h': 117.86,
    'l': 116.86,
    't': 1725352200000,
    'n': 9100},
   {'v': 687587,
    'vw': 116.7938,
    'o': 116.92,
    'c': 116.79,
    'h': 117.2,
    'l': 116.48,
    't': 1725354000000,
    'n': 8744},
   {'v': 405542,
    'vw': 116.7755,
    'o': 116.78,
    'c': 116.66,
    'h': 117.05,
    'l': 116.52,
    't': 1725355800000,
    'n': 5224},
   {'v': 384066,
    'vw': 116.7495,
    'o': 116.61,
    'c': 117.04,
    'h': 117.12,
    'l': 116.4,
    't': 1725357600000,
    'n': 5643},
   {'v': 264672,
    'vw': 117.018,
    'o': 117.04,
    'c': 117.21,
    'h': 117.26,
    'l': 116.66,
    't': 1725359400000,
    'n': 3821},
   {'v': 911254,
    'vw': 116.8836,
    'o': 117.25,
    'c': 116.96,
    'h': 117.4,
    'l': 116.52,
 