In [None]:
import os
from datetime import datetime, timedelta
import zipfile
import matplotlib.pyplot as plt
import requests
import pandas as pd
import requests
import zipfile
from tqdm.notebook import tqdm

# Environment Variables

In [None]:
COMPUTING_ENV = 'aws' # must set to "local" or "remote" before running
COMPUTING_ENV = 'ubuntu'

In [None]:
if COMPUTING_ENV == 'windows':
    WORKING_DIR = "C:\\Users\\regin\\Dropbox\\ibis"
    API_KEYS_DIR = "C:\\Users\\regin\\Dropbox\\API_KEYS"
elif COMPUTING_ENV == 'ubuntu':
    WORKING_DIR = "/home/reggie//Dropbox/ibis"
    API_KEYS_DIR = "/home/reggie/Dropbox/API_KEYS"
elif COMPUTING_ENV == 'aws':
    WORKING_DIR = "/home/ubuntu/ibis"
    API_KEYS_DIR = "/home/ubuntu/API_KEYS"

DATA_DIR = os.path.join(WORKING_DIR, "data")
FRD_DATA_DIR = os.path.join(DATA_DIR, 'frd-historical')

frd_download_directories = pd.read_csv(os.path.join(FRD_DATA_DIR, 'frd-download-directories.csv'))
frd_download_directories

In [None]:
timeframes = ["1min", "5min", "daily", "weekly", "monthly"]
letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"

print(f"Working directory:\n\t{WORKING_DIR}")
print(f"Data directory:\n\t{DATA_DIR}")
print(f"FRD data directory:\n\t{FRD_DATA_DIR}")

# make dirs
for d in [DATA_DIR, FRD_DATA_DIR,]:
    if not os.path.exists(d):
        print(f"Creating {d}")
        os.makedirs(d)



FRD_USER_ID = open(os.path.join(API_KEYS_DIR, 'FRD-USER-ID')).read().strip()
print(f"FRD User ID: {FRD_USER_ID}")

# Utilities

In [None]:
def download_frd_data(params, zips_dir, overwrite=False):
    period = params['period']
    adjustment = params['adjustment']
    timeframe = params['timeframe']

    if period == 'full':
        ticker_range = params['ticker_range']
        zip_fp = os.path.join(zips_dir, f"{ticker_range}_{period}_{adjustment}_{timeframe}.zip")
    else:
        zip_fp = os.path.join(zips_dir, f"{period}_{adjustment}_{timeframe}.zip")

    if os.path.exists(zip_fp) and not overwrite:
        print(f"File already exists: {zip_fp}")
        return
    
    base_url = "https://firstratedata.com/api/data_file"
    
    response = requests.get(base_url, params=params)
    print(response.url)
    if response.status_code == 200:      
        with open(zip_fp, 'wb') as file:
            file.write(response.content)
        print(f"ZIP file saved: {zip_fp}")
    else:
        print(f"Failed to download data: {response.status_code}")

def extract_frd_zip(
        src_zips_dir, 
        dest_csv_dir, 
        ticker_first_letter='', 
        period='full', 
        adjustment='adj_splitdiv', 
        timeframe='1min', 
        overwrite=False):
    if period == 'full':
        src_zip_fp = os.path.join(src_zips_dir, f"{ticker_first_letter}_{period}_{adjustment}_{timeframe}.zip")
        dest_csv_fp = os.path.join(dest_csv_dir, ticker_first_letter)
    else:
        src_zip_fp = os.path.join(src_zips_dir, f"{period}_{adjustment}_{timeframe}.zip")
        dest_csv_fp = dest_csv_dir
    dest_dir_size = len(os.listdir(dest_csv_fp))

    # extract if overwrite == True or directory is empty
    if (dest_dir_size == 0) or overwrite:
        if os.path.isfile(src_zip_fp):
            print(f"Extracting {src_zip_fp} to {dest_csv_fp}")
        else:
            print(f"File not found: {src_zip_fp}")
            return
        with zipfile.ZipFile(src_zip_fp, 'r') as zip_ref:
            zip_ref.extractall(dest_csv_fp)
    else:
        print(f"{dest_csv_fp} not empty with {dest_dir_size} files")
        return
    
def make_zips_and_csv_dirs(params):
    data_date = datetime.now().strftime('%Y%m%d')
    adjustment = params['adjustment']
    timeframe = params['timeframe']
    period = params['period']
    
    _dir = stock_download_directories.query(f"type == 'stock' & timeframe == '{timeframe}' & adjustment == '{adjustment}'")['directory'].values[0]
    
    if period == 'day':
        _dir = _dir.replace('stock', f'stock_day_{data_date}')
    elif period == 'week':
        _dir = _dir.replace('stock', f'stock_week_{data_date}')
    elif period == 'month':
        _dir = _dir.replace('stock', f'stock_month_{data_date}')
    _dir = os.path.join(FRD_DATA_DIR, _dir)
    zips_dir = os.path.join(_dir, 'zips')
    csv_dir = os.path.join(_dir, 'csv')


    return zips_dir, csv_dir

# Download Full Stock History

## 1 Minute Stock Bars Adjusted for Splits and Dividends

In [None]:
params = {
    "type": "stock",
    "ticker_range": "",
    "timeframe": "1min",
    "adjustment": "adj_splitdiv",
    "period": "full",
    'userid': FRD_USER_ID
}

In [None]:
ZIPS_DIR, CSV_DIR = make_zips_and_csv_dirs(params)
print(f"ZIPS_DIR: {ZIPS_DIR}")
print(f"CSV_DIR: {CSV_DIR}")

if not os.path.exists(CSV_DIR):
    print(f"Creating {CSV_DIR}")
    os.makedirs(CSV_DIR)
else:
    print(f"{CSV_DIR} exists with {len(os.listdir(CSV_DIR))} files")
if not os.path.exists(ZIPS_DIR):
    print(f"Creating {ZIPS_DIR}")
    os.makedirs(ZIPS_DIR)
else:
    print(f"{ZIPS_DIR} exists with {len(os.listdir(ZIPS_DIR))} files")

for ticker_first_letter in letters:
    ticker_csv_dir = os.path.join(CSV_DIR, ticker_first_letter)
    print(ticker_csv_dir)
    
    if not os.path.exists(ticker_csv_dir):
        os.makedirs(ticker_csv_dir)
        print(f"Creating {ticker_csv_dir}")
    else:
        print(f"{ticker_csv_dir} exists with {len(os.listdir(ticker_csv_dir))} files")

In [None]:
for ticker_first_letter in tqdm(letters):
    params['ticker_range'] = ticker_first_letter
    print(f"Downloading data for {ticker_first_letter}...")
    download_frd_data(params=params, zips_dir=ZIPS_DIR, overwrite=False)

In [None]:
for letter in tqdm(letters):
    extract_frd_zip(
        src_zips_dir=ZIPS_DIR, 
        dest_csv_dir=CSV_DIR,
        ticker_first_letter=letter,
        period=params['period'],
        adjustment=params['adjustment'],
        timeframe=params['timeframe'],
        overwrite=True
    )

## 5 Minute Stock Bars Adjusted for Splits and Dividends

In [None]:
params ={
    "type": "stock",
    "ticker_range": "",
    "timeframe": "5min",
    "adjustment": "adj_splitdiv",
    "period": "full",
    'userid': FRD_USER_ID
}

In [None]:
ZIPS_DIR, CSV_DIR = make_zips_and_csv_dirs(params)
print(f"ZIPS_DIR: {ZIPS_DIR}")
print(f"CSV_DIR: {CSV_DIR}")

if not os.path.exists(CSV_DIR):
    print(f"Creating {CSV_DIR}")
    os.makedirs(CSV_DIR)
else:
    print(f"{CSV_DIR} exists with {len(os.listdir(CSV_DIR))} files")
if not os.path.exists(ZIPS_DIR):
    print(f"Creating {ZIPS_DIR}")
    os.makedirs(ZIPS_DIR)
else:
    print(f"{ZIPS_DIR} exists with {len(os.listdir(ZIPS_DIR))} files")

for ticker_first_letter in letters:
    ticker_csv_dir = os.path.join(CSV_DIR, ticker_first_letter)
    print(ticker_csv_dir)
    
    if not os.path.exists(ticker_csv_dir):
        os.makedirs(ticker_csv_dir)
        print(f"Creating {ticker_csv_dir}")
    else:
        print(f"{ticker_csv_dir} exists with {len(os.listdir(ticker_csv_dir))} files")

In [None]:
letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
#letters = ['X', 'Y']
for ticker_first_letter in tqdm(letters):
    params['ticker_range'] = ticker_first_letter
    print(f"Downloading data for {ticker_first_letter}...")
    download_frd_data(params=params, zips_dir=ZIPS_DIR, overwrite=False)

In [None]:
for letter in tqdm(letters):
    extract_frd_zip(
        src_zips_dir=ZIPS_DIR, 
        dest_csv_dir=CSV_DIR,
        ticker_first_letter=letter,
        period=params['period'],
        adjustment=params['adjustment'],
        timeframe=params['timeframe'],
        overwrite=False
    )

# Download Weekly Updates

## 1 min

In [None]:
params ={
    "type": "stock",
    "timeframe": "1min",
    "adjustment": "adj_splitdiv",
    "period": "week",
    'userid': FRD_USER_ID
}

ZIPS_DIR, CSV_DIR = make_zips_and_csv_dirs(params)
print(f"ZIPS_DIR: {ZIPS_DIR}")
print(f"CSV_DIR: {CSV_DIR}")

if not os.path.exists(CSV_DIR):
    print(f"Creating {CSV_DIR}")
    os.makedirs(CSV_DIR)
else:
    print(f"{CSV_DIR} exists with {len(os.listdir(CSV_DIR))} files")
if not os.path.exists(ZIPS_DIR):
    print(f"Creating {ZIPS_DIR}")
    os.makedirs(ZIPS_DIR)
else:
    print(f"{ZIPS_DIR} exists with {len(os.listdir(ZIPS_DIR))} files")

In [None]:
download_frd_data(params=params, zips_dir=ZIPS_DIR, overwrite=False)

In [None]:
extract_frd_zip(
    src_zips_dir=ZIPS_DIR, 
    dest_csv_dir=CSV_DIR,
    
    period=params['period'],
    adjustment=params['adjustment'],
    timeframe=params['timeframe'],
    overwrite=True
)

In [None]:
X = pd.read_csv("/home/ubuntu/ibis/data/frd-historical/stock/adj_splitdiv/1min/csv/R/RDFN_full_1min_adjsplitdiv.txt",
                names=['date', 'open', 'high', 'low', 'close', 'volume'])
X['date'] = pd.to_datetime(X['date'], format='%Y-%m-%d %H:%M:%S')
X.set_index('date', inplace=True)
X.to_csv(os.path.join(DATA_DIR, 'RDFN_full_1min_adjsplitdiv.csv'), index=True)
X

In [None]:
Y = pd.read_csv("/home/ubuntu/ibis/data/frd-historical/stock_week_20240915/adj_splitdiv/1min/csv/RDFN_week_1min_adjsplitdiv.txt")
Y

In [None]:
ticker = 'RDFN'
fp = f"{ticker}_{period}_{timeframe}_{adjustment.replace('_','')}.txt"
fp = os.path.join(CSV_DIR, fp)
print(fp)
rdfn_df = pd.read_csv(fp, names=['date', 'open', 'high', 'low', 'close', 'volume'])
rdfn_df['ret'] = rdfn_df['close'].pct_change()
rdfn_df['date'] = pd.to_datetime(rdfn_df['date'])
rdfn_df.set_index('date', inplace=True)
rdfn_df

In [None]:
# add returns

plt.figure(figsize=(12, 6))
plt.plot(rdfn_df['ret'].values)
plt.show()

In [None]:
# resample to daily values
rdfn_daily_df = rdfn_df.resample('D').agg({
    'open': 'first',
    'high': 'max',
    'low': 'min',
    'close': 'last',
    'volume': 'sum',
})
print(rdfn_daily_df.info())
rdfn_daily_df['close_to_close_ret'] = rdfn_daily_df['close'].pct_change()
rdfn_daily_df['intraday_ret'] = rdfn_daily_df['close']/rdfn_daily_df['open'] - 1
rdfn_daily_df

In [None]:
plt.figure(figsize=(15, 7))
plt.plot(rdfn_df['open'].values, label='Open')
plt.plot(rdfn_df['high'].values, label='High')
plt.plot(rdfn_df['low'].values, label='Low')
plt.plot(rdfn_df['close'].values, label='Close')
plt.legend()
plt.title(f"{ticker} {period}")
plt.show()

# Index Data

In [None]:
params = {
    "type": "index",
    "timeframe": "1day",    
    "period": "full",
    'userid': FRD_USER_ID
}

In [None]:
asset_type = params['type']
timeframe = params['timeframe']
period = params['period']

INDEX_DATA_DIR = os.path.join(
    FRD_DATA_DIR, 
    frd_download_directories.query(f"type == '{asset_type}' & timeframe == '{timeframe}'")['directory'].values[0]
)
print(INDEX_DATA_DIR)
if not os.path.exists(INDEX_DATA_DIR):
    print(f"Creating {INDEX_DATA_DIR}")
    os.makedirs(INDEX_DATA_DIR)
else:
    print(f"{INDEX_DATA_DIR} exists with {len(os.listdir(INDEX_DATA_DIR))} files")
ZIPS_DIR = os.path.join(INDEX_DATA_DIR, 'zips')
CSV_DIR = os.path.join(INDEX_DATA_DIR, 'csv')
if not os.path.exists(CSV_DIR):
    print(f"Creating {CSV_DIR}")
    os.makedirs(CSV_DIR)
else:
    print(f"{CSV_DIR} exists with {len(os.listdir(CSV_DIR))} files")
if not os.path.exists(ZIPS_DIR):
    print(f"Creating {ZIPS_DIR}")
    os.makedirs(ZIPS_DIR)
else:
    print(f"{ZIPS_DIR} exists with {len(os.listdir(ZIPS_DIR))} files")
zip_fp = os.path.join(ZIPS_DIR, f'{asset_type}_{period}_{timeframe}.zip')
print(zip_fp)

In [None]:
base_url = "https://firstratedata.com/api/data_file"
response = requests.get(base_url, params=params)
print(response.url)
if response.status_code == 200:      
    with open(zip_fp, 'wb') as file:
        file.write(response.content)
    print(f"ZIP file saved: {zip_fp}")
else:
    print(f"Failed to download data: {response.status_code}")

In [None]:
print(f"Extracting {zip_fp} to {CSV_DIR}")
with zipfile.ZipFile(zip_fp, 'r') as zip_ref:
    zip_ref.extractall(CSV_DIR)

In [None]:
# get list of tickers based on file names
index_tickers = [x.split('_')[0] for x in os.listdir(CSV_DIR) if x.endswith('.txt')]
index_tickers[:3], len(index_tickers)


In [None]:
# get first and last date of coverage for each index
def index_date_range(ticker):
    fp = os.path.join(CSV_DIR, f"{ticker}_{period}_{timeframe}.txt")
    df = pd.read_csv(fp, names=['date', 'open', 'high', 'low', 'close',])
    df['date'] = pd.to_datetime(df['date'])
    return df['date'].min(), df['date'].max()
index_ranges = {ticker: index_date_range(ticker) for ticker in index_tickers}
index_ranges_df = pd.DataFrame(index_ranges).T.reset_index()
index_ranges_df.columns = ['ticker', 'min_date', 'max_date']
index_ranges_df['series_length'] = (index_ranges_df['max_date'] - index_ranges_df['min_date']).dt.days
index_ranges_df = index_ranges_df[index_ranges_df['series_length'] > 0]
index_ranges_df.sort_values('series_length', ascending=False, inplace=True)
index_ranges_df.to_csv(os.path.join(FRD_DATA_DIR, 'index_date_ranges.csv'), index=False)
index_ranges_df