In [22]:
from polygon import RESTClient
import local_settings as settings #api key file
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import numpy as np
import requests
import json
import time
import os
import glob
import seaborn as sn
from matplotlib.pyplot import figure
from plotly.subplots import make_subplots
from datetime import date

pairs = sorted(['AAVEUSD','ADAUSD','ALGOUSD','BCHUSD','BTCUSD','COMPUSD','DOGEUSD','DOTUSD','ENJUSD','ETCUSD',
'ETHUSD','GRTUSD','LINKUSD','LTCUSD','MANAUSD','MKRUSD','OMGUSD','UNIUSD','SANDUSD','SOLUSD','SNXUSD','XLMUSD',
'XMRUSD','XRPUSD','YFIUSD','ZECUSD'])

project_dir = '/Users/csingh/Documents/Projects/Quant/polygonIO'
csv_dir = '1m_data'

api_key = settings.polygon['api_key']
client = RESTClient(api_key)
client

<polygon.rest.RESTClient at 0x7fefc81a52e0>

___

#### Get OHLCV data from API(function) & create CSVs

In [18]:
def coin_data(pair, ds, de = '2025-01-15', multiplier = '1', timespan = 'minute'): 
    date_start = ds #only goes back to 2 years, but this works for getting earliest
    date_end = de
    coin = pair
    date_spans = {}
    latest_added = date_start
    bdf = pd.DataFrame()

    while date_start not in date_spans:

        date_spans[date_start] = 0
        url = f'https://api.polygon.io/v2/aggs/ticker/X:{coin}/range/{multiplier}/{timespan}/{date_start}/{date_end}?limit=50000&apiKey={api_key}'
        response = requests.get(url)

        if response.status_code == 200:  # check if response from server is good
                df = pd.DataFrame(json.loads(response.text)['results'])
                df.rename(columns = {'v':'Volume','vw':'Volume Weighted','o':'Open','c':'Close','h':'High','l':'Low','t':'Time'}, inplace = True)
                df['Time'] = pd.to_datetime(df['Time'],unit='ms')

                bdf = pd.concat([bdf, df], ignore_index=True)

                if df is None:
                    print("Did not return any data from polygon.io for this symbol")
        else:
            print("Did not receieve OK response from polygon.io API")

        latest_added = str(bdf.loc[bdf.index[-1],'Time'])[0:10]
        date_start = latest_added
        time.sleep(13) # ONLY 5 calls/min

    bdf.drop_duplicates(subset='Time',keep="first", inplace=True)
    t1 = bdf.loc[bdf.index[0],'Time']
    t2 = bdf.loc[bdf.index[-1],'Time']
    bdf = bdf.loc[:,['Volume','Volume Weighted','Open','Close','High','Low','Time','n']]
    print(f'Pair:{coin}\tStart:\t{t1}\tEnd:\t{t2}\tShape:\t{bdf.shape}')
    #ONLY UNCOMMENT WHEN GETTING NEW DATASET
    # bdf.to_csv( f'{project_dir}/1m_data_new/{coin}_{str(bdf.iloc[0,6])[0:10]}_to_{str(bdf.iloc[-1,6])[0:10]}_{multiplier}{timespan[0]}_data.csv', index=False)
    return bdf

----

#### Obtaining all OHLC data in 'pairs' (DON'T RUN UNLESS you need a new dataset...can take some time depending on timeframe)

In [None]:
pairs_df = []
for pair in pairs:
    sdf = coin_data(pair,ds='2021-01-20')
    pairs_df.append(sdf)

---

#### Reloading CSVs from folder

In [23]:
dir_name = f"{project_dir}/{csv_dir}"
pairs_df = []
for file in np.sort(os.listdir(dir_name)):
    if '.csv' in file:
        df = pd.DataFrame(pd.read_csv(f'{dir_name}/{file}',header=0))
        df_name = file.split('_')[0]
        pairs_df.append(df)
        print(f'{file} \tPair:{df_name} \t Start:\t{df.loc[0,"Time"]} \t End:\t{df.loc[df.shape[0]-1,"Time"]} \t Shape:{df.shape} \tloaded!')

AAVEUSD_2021-01-23_to_2023-01-30_1minute_data.csv 	Pair:AAVEUSD 	 Start:	2021-01-23 14:49:00 	 End:	2023-01-30 23:58:00 	 Shape:(1027915, 8) 	loaded!
ADAUSD_2021-01-23_to_2023-01-30_1minute_data.csv 	Pair:ADAUSD 	 Start:	2021-01-23 14:55:00 	 End:	2023-01-30 23:58:00 	 Shape:(1061314, 8) 	loaded!
ALGOUSD_2021-01-23_to_2023-01-30_1minute_data.csv 	Pair:ALGOUSD 	 Start:	2021-01-23 15:00:00 	 End:	2023-01-30 23:58:00 	 Shape:(1061273, 8) 	loaded!
BCHUSD_2021-01-23_to_2023-01-30_1minute_data.csv 	Pair:BCHUSD 	 Start:	2021-01-23 15:06:00 	 End:	2023-01-30 23:58:00 	 Shape:(1049067, 8) 	loaded!
BTCUSD_2021-01-23_to_2023-01-30_1minute_data.csv 	Pair:BTCUSD 	 Start:	2021-01-23 15:12:00 	 End:	2023-01-30 23:58:00 	 Shape:(1063244, 8) 	loaded!
COMPUSD_2021-01-23_to_2023-01-30_1minute_data.csv 	Pair:COMPUSD 	 Start:	2021-01-23 15:17:00 	 End:	2023-01-30 23:58:00 	 Shape:(989998, 8) 	loaded!
DOGEUSD_2021-01-23_to_2023-01-30_1minute_data.csv 	Pair:DOGEUSD 	 Start:	2021-01-23 15:23:00 	 End:	2023-01

---

#### Updating pre-existing datasets with newer data

In [None]:
dir_name_old = "/Users/csingh/Documents/Projects/Quant/polygonIO/1m_data_new"
dir_name_new = "/Users/csingh/Documents/Projects/Quant/polygonIO/1m_data"
multiplier = '1'
timespan = 'minute'

for file in np.sort(os.listdir(dir_name_old)):
    if '.csv' in file:
        df = pd.DataFrame(pd.read_csv(f'{dir_name_old}/{file}',header=0))
        csv_pair = file.split('_')[0] #coin name ex.) BTC_USD
        print(f'{file} \t loaded!')
        last_date = df.loc[df.index[-1],'Time'][0:10]
        update_df = coin_data(csv_pair, last_date, str(date.today()),'1','minute')
        ndf = pd.concat([df,update_df]).drop_duplicates(subset='Time',keep='first').reset_index(drop=True)
        s = str(ndf.loc[ndf.index[0],'Time'])[0:10]
        e = str(ndf.loc[ndf.index[-1],'Time'])[0:10]
        ndf.to_csv( f'{dir_name_new}/{csv_pair}_{s}_to_{e}_{multiplier}{timespan}_data.csv', index=False)
        print(f'-----------------------------------------------------------------------------------------')


---