In [39]:
import requests
import httpx
import pandas as pd
import asyncio
import time
from tqdm.auto import tqdm

In [21]:
baseurl = "https://api.polygon.io"

api_key = "xxxxx"

# We don't get data past the final month time
start_month = pd.Timestamp("2018-01-01")
end_month = pd.Timestamp("2022-01-01") # + pd.DateOffset(months=1)

import os
if not os.path.exists('data'):
    os.mkdir('data')


In [17]:
all_months = []
counter = 0
while start_month + pd.DateOffset(months=counter) < end_month:
    all_months.append(start_month + pd.DateOffset(months=counter))
    counter += 1
    
all_months

[Timestamp('2018-01-01 00:00:00'),
 Timestamp('2018-02-01 00:00:00'),
 Timestamp('2018-03-01 00:00:00'),
 Timestamp('2018-04-01 00:00:00'),
 Timestamp('2018-05-01 00:00:00'),
 Timestamp('2018-06-01 00:00:00'),
 Timestamp('2018-07-01 00:00:00'),
 Timestamp('2018-08-01 00:00:00'),
 Timestamp('2018-09-01 00:00:00'),
 Timestamp('2018-10-01 00:00:00'),
 Timestamp('2018-11-01 00:00:00'),
 Timestamp('2018-12-01 00:00:00'),
 Timestamp('2019-01-01 00:00:00'),
 Timestamp('2019-02-01 00:00:00'),
 Timestamp('2019-03-01 00:00:00'),
 Timestamp('2019-04-01 00:00:00'),
 Timestamp('2019-05-01 00:00:00'),
 Timestamp('2019-06-01 00:00:00'),
 Timestamp('2019-07-01 00:00:00'),
 Timestamp('2019-08-01 00:00:00'),
 Timestamp('2019-09-01 00:00:00'),
 Timestamp('2019-10-01 00:00:00'),
 Timestamp('2019-11-01 00:00:00'),
 Timestamp('2019-12-01 00:00:00'),
 Timestamp('2020-01-01 00:00:00'),
 Timestamp('2020-02-01 00:00:00'),
 Timestamp('2020-03-01 00:00:00'),
 Timestamp('2020-04-01 00:00:00'),
 Timestamp('2020-05-

In [23]:
async def download_month_async(sym, month):
    """
    Month here is a pandas timestamp object
    """
    # end_timestamp is inclusive, so we take one off
    end_timestamp = int((month + pd.DateOffset(months=1)).timestamp()) * 1000 - 1
    start_timestamp = int(month.timestamp()) * 1000
    
    multiplier = 1
    timespan = "minute"
    
    req_url = baseurl + f"/v2/aggs/ticker/{sym}/range/{multiplier}/{timespan}/{start_timestamp}/{end_timestamp}"
    
    params = {
        "limit":50000,
        "apiKey":api_key,
    }
    
    async with httpx.AsyncClient() as client:
        data = await client.get(req_url, params=params)
        
    data = data.json()

    # If no data in range, then just return
    if data["resultsCount"] == 0:
        return

    data = data["results"]
    data = pd.DataFrame(data)
    
    if "otc" not in data:
        data["otc"] = False

    data.columns=["volume","volume_weighted_price","open","close","high","low","timestamp","trades","otc"]

    if os.path.exists(f"data/{sym}.csv"):
        data.to_csv(f"data/{sym}.csv", index=False, header=False, mode="a")
    else:
         data.to_csv(f"data/{sym}.csv", index=False, header=True, mode="w")


In [38]:
async def worker(queue):
    
    while True:
        
        params = await queue.get()
        print("Assigned job:",params)
        
        try:
            await download_month_async(*params)
        except Exception as e:
            print(params, e)
        finally:
            queue.task_done()

In [35]:
async def main():
    
    queue = asyncio.Queue()
    
    for sym in syms:
        for month in all_months:
            queue.put_nowait((sym, month))
            
    tasks = []
    for i in range(10):
        task = asyncio.create_task(worker(queue))
        tasks.append(task)
        
    await queue.join()
    
    for task in tasks:
        task.cancel()
        
    await asyncio.gather(*tasks, return_exceptions = True)
            
    

In [40]:
syms = ["AAPL","TSLA","GOOG","META"]
await main()

Assigned job: ('AAPL', Timestamp('2018-01-01 00:00:00'))
Assigned job: ('AAPL', Timestamp('2018-02-01 00:00:00'))
Assigned job: ('AAPL', Timestamp('2018-03-01 00:00:00'))
Assigned job: ('AAPL', Timestamp('2018-04-01 00:00:00'))
Assigned job: ('AAPL', Timestamp('2018-05-01 00:00:00'))
Assigned job: ('AAPL', Timestamp('2018-06-01 00:00:00'))
Assigned job: ('AAPL', Timestamp('2018-07-01 00:00:00'))
Assigned job: ('AAPL', Timestamp('2018-08-01 00:00:00'))
Assigned job: ('AAPL', Timestamp('2018-09-01 00:00:00'))
Assigned job: ('AAPL', Timestamp('2018-10-01 00:00:00'))
Assigned job: ('AAPL', Timestamp('2018-11-01 00:00:00'))
Assigned job: ('AAPL', Timestamp('2018-12-01 00:00:00'))
Assigned job: ('AAPL', Timestamp('2019-01-01 00:00:00'))
Assigned job: ('AAPL', Timestamp('2019-02-01 00:00:00'))
Assigned job: ('AAPL', Timestamp('2019-03-01 00:00:00'))
Assigned job: ('AAPL', Timestamp('2019-04-01 00:00:00'))
Assigned job: ('AAPL', Timestamp('2019-05-01 00:00:00'))
Assigned job: ('AAPL', Timestam

Assigned job: ('META', Timestamp('2018-01-01 00:00:00'))
Assigned job: ('META', Timestamp('2018-02-01 00:00:00'))
Assigned job: ('META', Timestamp('2018-03-01 00:00:00'))
Assigned job: ('META', Timestamp('2018-04-01 00:00:00'))
Assigned job: ('META', Timestamp('2018-05-01 00:00:00'))
Assigned job: ('META', Timestamp('2018-06-01 00:00:00'))
Assigned job: ('META', Timestamp('2018-07-01 00:00:00'))
Assigned job: ('META', Timestamp('2018-08-01 00:00:00'))
Assigned job: ('META', Timestamp('2018-09-01 00:00:00'))
Assigned job: ('META', Timestamp('2018-10-01 00:00:00'))
Assigned job: ('META', Timestamp('2018-11-01 00:00:00'))
Assigned job: ('META', Timestamp('2018-12-01 00:00:00'))
Assigned job: ('META', Timestamp('2019-01-01 00:00:00'))
Assigned job: ('META', Timestamp('2019-02-01 00:00:00'))
Assigned job: ('META', Timestamp('2019-03-01 00:00:00'))
Assigned job: ('META', Timestamp('2019-04-01 00:00:00'))
Assigned job: ('META', Timestamp('2019-05-01 00:00:00'))
Assigned job: ('META', Timestam

In [27]:
asyncio.run(main())

RuntimeError: asyncio.run() cannot be called from a running event loop

In [22]:
syms = ["AAPL"]

start_time = time.time()
for sym in syms:
    for month in all_months:
        print(sym, month)
        download_month_sync(sym, month)
print("time elapsed:", time.time() - start_time)

AAPL 2018-01-01 00:00:00
AAPL 2018-02-01 00:00:00
AAPL 2018-03-01 00:00:00
AAPL 2018-04-01 00:00:00
AAPL 2018-05-01 00:00:00
AAPL 2018-06-01 00:00:00
AAPL 2018-07-01 00:00:00
AAPL 2018-08-01 00:00:00
AAPL 2018-09-01 00:00:00
AAPL 2018-10-01 00:00:00
AAPL 2018-11-01 00:00:00
AAPL 2018-12-01 00:00:00
AAPL 2019-01-01 00:00:00
AAPL 2019-02-01 00:00:00
AAPL 2019-03-01 00:00:00
AAPL 2019-04-01 00:00:00
AAPL 2019-05-01 00:00:00
AAPL 2019-06-01 00:00:00
AAPL 2019-07-01 00:00:00
AAPL 2019-08-01 00:00:00
AAPL 2019-09-01 00:00:00
AAPL 2019-10-01 00:00:00
AAPL 2019-11-01 00:00:00
AAPL 2019-12-01 00:00:00
AAPL 2020-01-01 00:00:00
AAPL 2020-02-01 00:00:00
AAPL 2020-03-01 00:00:00
AAPL 2020-04-01 00:00:00
AAPL 2020-05-01 00:00:00
AAPL 2020-06-01 00:00:00
AAPL 2020-07-01 00:00:00
AAPL 2020-08-01 00:00:00
AAPL 2020-09-01 00:00:00
AAPL 2020-10-01 00:00:00
AAPL 2020-11-01 00:00:00
AAPL 2020-12-01 00:00:00
AAPL 2021-01-01 00:00:00
AAPL 2021-02-01 00:00:00
AAPL 2021-03-01 00:00:00
AAPL 2021-04-01 00:00:00
