In [1]:
# Installing dependencies
import numpy as np
import pandas as pd
from yfQuery import datareader

In [2]:
class DataLoader:
    '''
        Class for loading past stocks prices
        symbol: symbol can be either a single symbol or 
                a list of symbol
        start:  start date
        end:    end date
    '''
    def __init__(self, symbol, start, end):
        # Preload
        self.data = datareader(symbol, start, end)
    
    def get(self, start, end):
        # Return a period of the data
        return self.data.iloc[start: end]

In [20]:
def get_target_price(array, i, pct=.95):
    # Get next target
    target = array[i:][array[i:] > 0][0]
    # Get High and Low 
    high = array.max()
    low = array[array > 0].min()
    # Current price
    current = array[i - 1] if i != 0 else array[i]
    # Upper and Lower bound
    upper = (high - current) * pct
    lower = (low - current) * pct
    momen = target - current
    gain = current + np.random.uniform(0, upper)
    lost = current + np.random.uniform(lower, 0)
    if np.random.choice([True, False]):
        return gain
    return lost

In [21]:
def recur(arr, i=0, pct=.95):
    # Using Recursion to get price when price is not 0.
    if (i + 1) == len(arr):
        return arr
    elif (arr <= 0).sum() == 0:
        return arr
    else:
        if arr[i] == 0:
            arr[i] = get_target_price(arr, i, pct)
        return recur(arr, i+1)

In [22]:
def restack_array(array):
    # Restacking Array to Open, High, Low, Close
    narr = array.reshape(-1, 4).copy()
    nclose = narr[:, -1]
    nopen = narr[:, 0]
    nopen[1:] = nclose[:-1]
    nhigh = narr.max(1)
    nlow = narr.min(1)
    return np.stack([nopen, 
                     nhigh, 
                     nlow, 
                     nclose]).T

In [23]:
def get_different_time_frame(df, m='min'):
    if m != 'min':
        o = df['Open'].groupby(by=pd.Grouper(freq=m)).first()
        c = df['Close'].groupby(by=pd.Grouper(freq=m)).last()
        h = df['High'].groupby(by=pd.Grouper(freq=m)).max()
        l = df['Low'].groupby(by=pd.Grouper(freq=m)).min()
        return pd.concat([o, h, l, c], axis=1)
    return df

In [24]:
def price_df(price, m='min', pct=.95):
    # Create datetime index by the minutes
    # Start time : 9:30AM
    # 390 minuntes per trading day
    period = 390
    ts = price.name
    ts = ts.replace(hour=9, minute=31)
    index = pd.date_range(ts, periods=period, freq="1min")
    # Create empty array
    length = period * 4
    array = np.zeros(length)
    # Random points for high and low
    point_a = np.random.randint(1, int(length * .6))
    point_b = np.random.randint(point_a + int(length * .1), length - int(period * .1))
    swap = np.random.choice([True, False])
    if swap:
        pr = price['Open'], price['Low'], price['High'], price['Close']
    else:
        pr = price['Open'], price['High'], price['Low'], price['Close']
    array[[0, point_a, point_b, -1]] = pr
    # Generating psudo inbetween prices
    recur(array, pct=pct)
    # Stacking all price array
    stacked = restack_array(array)
    return stacked
    # df = pd.DataFrame(stacked, index=index, columns=['Open', 'High', 'Low', 'Close'])
    # return get_different_time_frame(df, m=m)

In [8]:
d = DataLoader('AAPL', '2018-01-01', '2019-12-31')
price = d.data.iloc[0]

In [25]:
price_df(price, m='15min')

array([[42.540001  , 43.04154638, 42.540001  , 43.04154638],
       [43.04154638, 43.04154638, 42.61757112, 42.61757112],
       [42.61757112, 42.81206556, 42.36975274, 42.36975274],
       ...,
       [42.76788351, 42.78580312, 42.54255183, 42.54255183],
       [42.54255183, 42.73333509, 42.36796773, 42.73333509],
       [42.73333509, 43.064999  , 42.73333509, 43.064999  ]])