# libraries 

In [1]:
import requests #The requests library for HTTP requests in Python
import pandas as pd
from secrets import API_KEY_POLYGON
import aiohttp
import pytz
import time
from datetime import datetime, timedelta
import os 
import io
import logging




In [2]:

# Function to initialize the logging file
def init_logging():
    
    global tz
    tz = pytz.timezone('Europe/Berlin') 
    if not os.path.exists('log'):
        os.makedirs('log')

    date = datetime.now(tz).date().strftime('%Y%m%d')
    logging.basicConfig(filename='log/{}.log'.format(date), level=logging.INFO)

    return(None)


# Get all the tickers

In [3]:
def return_commonstock_tickers(token):
    url = 'https://api.polygon.io/v3/reference/tickers'
    
    parameters = {
        'apiKey': token, # your API key
        'type': 'CS', # query common stocks
        'market': 'stocks',
        'limit': 1000 # extract max data possible
    }

    try:
        tickers_json = requests.get(url, parameters).json()
        tickers_list = tickers_json['results']
        
        while tickers_json['next_url']:
            tickers_json = requests.get(tickers_json["next_url"], parameters).json()
            tickers_list.extend(tickers_json["results"])
            if 'next_url' not in tickers_json.keys():
                break
            
    except:
        return None
    
    return tickers_list

In [4]:

token = API_KEY_POLYGON
tickers = return_commonstock_tickers(token)


In [5]:

def write_csv(data):
    status = data['status']
        
    # Case when empty file is returned with success 200 response
    if status == 'NOT_FOUND':
        print('status: ', status)
        print(data['message'])

    # Case when requested file is returned with success 200 response
    elif status == 'OK':
        print('status: ', status)
        print(data)

 # Function to get a list of all dates that are to be downloaded


In [6]:

# Function to get a list of all dates that are to be downloaded
def get_dates():
    
    # Get earliest date available on POLYGON side
    date_today = datetime.today().date()
    api_date = date_today - timedelta(days = 730) # decide how far back we wanna look 

    # Get last date for which download was done to get start date
    try:
        folder_date = []
        year = max([name for name in os.listdir('output/') if not name.startswith('.')])
        foldernames = os.listdir('output/{}'.format(year))
        folder_CW = [name for name in foldernames if not name.startswith('.')]

        for i in range(len(folder_CW)):
            foldernames = os.listdir('output/{}/{}'.format(year, folder_CW[i]))
            folder_date += [name for name in foldernames if not name.startswith('.')]
        last_date = datetime.strptime(max(folder_date)[0:8], '%Y-%m-%d').date()

        start_date = max(api_date, last_date + timedelta(days=1))
    except:
        start_date = api_date

    # Get todo dates with weekends filtered out
    dates = [start_date + timedelta(days=i) for i in range((date_today - start_date).days)]
    todo_dates = [date.strftime('%Y-%m-%d') for date in dates if date.isoweekday() <= 5]
    return(todo_dates)

get_dates()


['2020-01-29',
 '2020-01-30',
 '2020-01-31',
 '2020-02-03',
 '2020-02-04',
 '2020-02-05',
 '2020-02-06',
 '2020-02-07',
 '2020-02-10',
 '2020-02-11',
 '2020-02-12',
 '2020-02-13',
 '2020-02-14',
 '2020-02-17',
 '2020-02-18',
 '2020-02-19',
 '2020-02-20',
 '2020-02-21',
 '2020-02-24',
 '2020-02-25',
 '2020-02-26',
 '2020-02-27',
 '2020-02-28',
 '2020-03-02',
 '2020-03-03',
 '2020-03-04',
 '2020-03-05',
 '2020-03-06',
 '2020-03-09',
 '2020-03-10',
 '2020-03-11',
 '2020-03-12',
 '2020-03-13',
 '2020-03-16',
 '2020-03-17',
 '2020-03-18',
 '2020-03-19',
 '2020-03-20',
 '2020-03-23',
 '2020-03-24',
 '2020-03-25',
 '2020-03-26',
 '2020-03-27',
 '2020-03-30',
 '2020-03-31',
 '2020-04-01',
 '2020-04-02',
 '2020-04-03',
 '2020-04-06',
 '2020-04-07',
 '2020-04-08',
 '2020-04-09',
 '2020-04-10',
 '2020-04-13',
 '2020-04-14',
 '2020-04-15',
 '2020-04-16',
 '2020-04-17',
 '2020-04-20',
 '2020-04-21',
 '2020-04-22',
 '2020-04-23',
 '2020-04-24',
 '2020-04-27',
 '2020-04-28',
 '2020-04-29',
 '2020-04-

# Function to get and create a path for the current date 

In [7]:

# Function to get and create a path for the current date
def get_path(date_str):
    
    # Extract year and CW from the date
    date = datetime.strptime(date_str, '%Y-%m-%d')
    year = str(date.year)
    CW = str(date.isocalendar()[1]).rjust(2, '0')
    
    # Check if there exists a folder for this date 
    path = 'output/{}/{}-CW{}/{}'.format(year, year, CW, date_str)
    if not os.path.exists(path):
        os.makedirs(path)
        os.mkdir('{}/NONE/'.format(path))
        os.mkdir('{}/ERROR/'.format(path))
        os.mkdir('{}/DONE/'.format(path))
    
    return(path)


# difference between two numbers


In [8]:
def changePercent(first, second):
    first = round(first, 2)
    second = round(second, 2)
    return ((second - first) / first) * 100

# Loop through the tickers and bring up the daily OHLC

In [9]:

todo_dates = get_dates()

async def main():

    async with aiohttp.ClientSession() as session:

            for i in tickers:
                ticker = i['ticker']
        
                for date in todo_dates:

                    get_path(date)

                    api_url = f'https://api.polygon.io/v1/open-close/{ticker}/{date}?adjusted=true&apiKey={API_KEY_POLYGON}'
                    async with session.get(api_url) as resp:
                        data = await resp.json()
                        # try:
                        write_csv(data)
                        # print(data)
                        # except:
                            # get_csv(session, api_url, data)
                    

await main()
# print("--- %s seconds ---" % (time.time() - start_time))   



status:  OK
{'status': 'OK', 'from': '2020-01-29', 'symbol': 'A', 'open': 87.55, 'high': 88.36, 'low': 87.15, 'close': 87.24, 'volume': 1144884, 'afterHours': 87.24, 'preMarket': 87.55}
status:  OK
{'status': 'OK', 'from': '2020-01-30', 'symbol': 'A', 'open': 84.9, 'high': 85.03, 'low': 83.6, 'close': 84.38, 'volume': 3102057, 'afterHours': 84.38, 'preMarket': 85.54}
status:  OK
{'status': 'OK', 'from': '2020-01-31', 'symbol': 'A', 'open': 84.03, 'high': 84.335, 'low': 82.34, 'close': 82.56, 'volume': 2864742, 'afterHours': 82.56, 'preMarket': 84.03}
status:  OK
{'status': 'OK', 'from': '2020-02-03', 'symbol': 'A', 'open': 83.29, 'high': 83.69, 'low': 82.11, 'close': 82.15, 'volume': 1937281, 'afterHours': 82.15, 'preMarket': 83.29}
status:  OK
{'status': 'OK', 'from': '2020-02-04', 'symbol': 'A', 'open': 82.85, 'high': 83.9, 'low': 82.78, 'close': 83.52, 'volume': 1693390, 'afterHours': 83.52, 'preMarket': 82.78}
status:  OK
{'status': 'OK', 'from': '2020-02-05', 'symbol': 'A', 'open'

CancelledError: 