In [1]:
import numpy as np
import pandas as pd
from dLoader import DataLoader

In [2]:
# Binance API REST Call without api keys
def millisecond(date, unit='ns', is_end=False):
    if len(date) <= 10:
        date +=' 23:59:59' if is_end else ' 00:00:01'
    if unit == 's':
        place = 10 ** 9
    elif unit == 'ms':
        place = 10 ** 6
    else:
        place = 1
    return pd.Timestamp(date).value // place

def url(symbol, interval, start=None, end=None, limit=1000):
    # Look back data of 3 years
    # interval: m, d, M
    # Max limit: 1000
    base_url = "https://api.binance.us/api/v3/klines?"
    symbol = "symbol=" + symbol
    interval = "&interval=" + interval
    start = "&startTime=" + str(millisecond(start, unit='ms')) if start is not None else ""
    end = "&endTime=" + str(millisecond(end, unit='ms', is_end=True)) if end is not None else ""
    limit = "&limit=" + str(limit)
    return base_url + symbol + interval + start + end + limit

def get_data(symbol, interval, start=None, end=None, limit=1000):
    raw = pd.read_json(url(symbol, interval, start=start, end=end, limit=limit))
    # Making change to DataFrame
    date = pd.to_datetime(raw.iloc[:, 0], unit='ms')
    date.name = None
    columns = ['Open', 'High', 'Low', 'Close', 'Volume']
    df = raw.iloc[:, 1:6]
    df.columns = columns
    df.index = date
    return df

In [3]:
# Use to get minutes, 1hour data from yahoo finance and realtime update
def query_yahoo_finance(symbol, start=None, end=None, interval=None, premarket=False):
    # Sample query url: query{1, 2}, v{7, 8}
    hidden = "https://query1.finance.yahoo.com/v8/finance/chart/AAPL?symbol=AAPL" \
             "&period1=1646256611&period2=1646775011&useYfid=true&interval=1m"
    # Max 2 years look back for 1h interval
    valid_interval = ['1m', '2m', '5m', '15m', '30m', '60m', '90m', 
                      '1h', '1d', '5d', '1wk', '1mo', '3mo']
    if interval is not None:
        assert interval in valid_interval, "Valid intervals: {}".format(valid_interval)
    # Base Yahoo Finance Query API URL
    base_api = "https://query1.finance.yahoo.com/v8/finance/chart/"
    # Query entries
    symbol = symbol + "?symbol=" + symbol
    period1 = "&period1=" + str(millisecond(start, unit='s')) if start is not None else ""
    period2 = "&period2=" + str(millisecond(end, unit='s', is_end=True)) if end is not None else ""
    interval = "&interval=" + interval if interval is not None else ""
    premarket = "&includePrePost=true" if premarket else ""
    # Create query url
    url = f"{base_api}{symbol}{period1}{period2}&useYfid=true{interval}{premarket}"
    # Get meta data
    meta = pd.read_json(url).loc['result', 'chart'][0]
    # Change to DataFrame using meta data
    columns = ['open', 'high', 'low', 'close', 'volume']
    # Turn pricing data into DataFrame
    df = pd.DataFrame.from_dict(meta['indicators']['quote'][0]).reindex(columns=columns)
    df.columns = [col.capitalize() for col in columns]
    # Timestamp index
    df.index = pd.to_datetime(meta['timestamp'], unit='s').tz_localize('UTC').tz_convert('US/Eastern')
    return df

In [4]:
# Load qyld csv
pd.read_csv('qyld_full-holdings_20220228.csv')

Unnamed: 0,% of Net Assets,Ticker,Name,SEDOL,Market Price ($),Shares Held,Market Value ($)
0,12.79,AAPL,APPLE INC,2046251,165.12,5059138.00,835364866.56
1,10.59,MSFT,MICROSOFT CORP,2588173,298.79,2315126.00,691736497.54
2,7.37,AMZN,AMAZON.COM INC,2000019,3071.26,156642.00,481088308.92
3,4.14,TSLA,TESLA INC,B616C79,870.43,310536.00,270299850.48
4,4.05,NVDA,NVIDIA CORP,2379504,243.85,1085828.00,264779157.80
...,...,...,...,...,...,...,...
98,0.18,SWKS,SKYWORKS SOLUTIONS INC,2961053,138.17,85943.00,11874744.31
99,0.18,DOCU,DOCUSIGN INC,BFYT7B7,118.43,98271.00,11638234.53
100,0.17,PDD,PINDUODUO INC-ADR,BYVW0F7,51.86,207277.00,10749385.22
101,0.14,SPLK,SPLUNK INC,B424494,118.10,78121.00,9226090.10


In [5]:
qlyd = pd.read_csv('qyld_full-holdings_20220228.csv').dropna()

In [6]:
# Download the stock data
import os
path = os.path.join(os.getcwd(), 'Database')

for ticker in qlyd.loc[:, 'Ticker']:
    try: 
        if not os.path.exists(os.path.join(path, ticker + '.csv')):
            DataLoader(ticker)
    except Exception as e:
        print(e)

# Analysis

In [7]:
def batching(data, batch: int):
    batches = len(data) // batch
    max_length = batches * batch
    return np.array(data)[-max_length:].reshape(-1, batch)

def regroup(data, period: int):
    opens = batching(data['Open'], period)[:, 0]
    high = batching(data['High'], period).max(1)
    low = batching(data['Low'], period).min(1)
    close = batching(data['Close'], period)[:, -1]
    timestamp = pd.to_datetime(batching(data.index, period)[:, 0])
    df = pd.DataFrame(np.stack([opens, high, low, close], axis=1),
                      columns=['Open', 'High', 'Low', 'Close'],
                      index=timestamp)
    return df

def calculation(data):
    data['Gap'] = data['Close'].shift(-1) / data['Close'] - 1
    data['Loss'] = data['Low'].shift(-2) / data['Close'].shift(-1) - 1
    data['Base'] = data['Close'].shift(-2) / data['Close'].shift(-1) - 1
    data['Gain'] = data['High'].shift(-2) / data['Close'].shift(-1) - 1
    return data.loc[:, ['Gap', 'Loss', 'Base', 'Gain']].dropna()

In [8]:
def cal_range(d, s, e, step=0.01):
    rang = np.arange(s, e, step=step)
    if s == 0:
        return {r: (d > r).mean() for r in rang}
    return {r: (d < r).mean() for r in rang}

def merger(g, l, Min, Max, step=0.01):
    above = cal_range(g, 0, Max, step=step)
    below = cal_range(l, Min, 0, step=step)
    return below | above

def probabilities(data, start, end, Min, Max, freq='Q', step=0.01, span=5):
    start_date = pd.date_range(start, end, freq=freq + 'S')
    end_date = pd.date_range(start, end, freq=freq)
    dic = {}
    for i, (s, e) in enumerate(zip(start_date, end_date)):
        d = data.loc[s: e]
        g = d['High'].shift(-1) / d['Close'] - 1
        l = d['Low'].shift(-1) / d['Close'] - 1
        # 
        dic[i] = merger(g, l, Min, Max)
    return pd.DataFrame.from_dict(dic, orient='index').ewm(span=span).mean().iloc[-1]

In [9]:
ticker = 'AAPL'
data = DataLoader(ticker).get_data('2016-01-01', '2020-12-31')
data = regroup(data, 3)
gain = data['High'].shift(-1) / data['Close'] - 1
Max = np.round(gain.max(), 1)
loss = data['Low'].shift(-1) / data['Close'] - 1
Min = np.round(loss.min(), 1)
probs = probabilities(data, '2016-01-01', '2020-12-31', Min, Max)

In [10]:
calc = calculation(data)

In [11]:
array = np.array(probs.index)
