In [1]:
# Installing dependencies
import numpy as np
import pandas as pd
from yfQuery import datareader

# Loading Data

In [2]:
class DataLoader:
    '''
        Class for loading past stocks prices
        symbol: symbol can be either a single symbol or 
                a list of symbol
        start:  start date
        end:    end date
    '''
    def __init__(self, symbol, start, end):
        # Preload
        self.data = datareader(symbol, start, end)
    
    def get(self, start, end):
        # Return a period of the data
        return self.data.iloc[start: end]

# Stack and DataFrame

In [3]:
def restack_array(array):
    # Restacking Array to Open, High, Low, Close
    narr = array.reshape(-1, 4).copy()
    nclose = narr[:, -1].copy()
    nopen = narr[:, 0].copy()
    nopen[1:] = nclose[:-1]
    nhigh = narr.max(1)
    nlow = narr.min(1)
    return np.stack([nopen, 
                     np.max([nhigh, nopen], axis=0), 
                     np.min([nlow, nopen], axis=0), 
                     nclose]).T

def create_datetime(t, period=390):
    # Create minute index for date
    ts = t.replace(hour=9, minute=31)
    return pd.date_range(ts, periods=period, freq="1min")

def to_dataframe(array, index):
    # Create minute DataFrame
    return pd.DataFrame(array, index=index,
                        columns=['Open', 'High', 'Low', 'Close'])

def get_different_time_frame(df, m='min'):
    if m != 'min':
        o = df['Open'].groupby(by=pd.Grouper(freq=m)).first()
        c = df['Close'].groupby(by=pd.Grouper(freq=m)).last()
        h = df['High'].groupby(by=pd.Grouper(freq=m)).max()
        l = df['Low'].groupby(by=pd.Grouper(freq=m)).min()
        return pd.concat([o, h, l, c], axis=1)
    return df

# Recursion

In [16]:
def get_target_price(array, i, pct=.95):
    # Get next target
    target = array[i:][array[i:] > 0][0]
    # Get High and Low 
    high = array.max()
    low = array[array > 0].min()
    # Current price
    current = array[i - 1] if i != 0 else array[i]
    # Upper and Lower bound
    upper = (high - current) * pct
    lower = (low - current) * pct
    momen = target - current
    gain = current + np.random.uniform(0, upper)
    lost = current + np.random.uniform(lower, 0)
    rand = np.random.choice([True, False])
    if rand:
        return np.random.choice([gain, lost])
    elif momen > 0:
        return gain
    return lost

def fill_gap(arr, i=0, pct=.95):
    # Using Recursion to get price when price is not 0.
    if (i + 1) == len(arr):
        return arr
    elif (arr <= 0).sum() == 0:
        return arr
    else:
        if arr[i] == 0:
            arr[i] = get_target_price(arr, i, pct)
        return fill_gap(arr, i+1)

In [17]:
def generate_df(price, m='min', period=390):
    # Generate psudo price for minutes data
    # Creating array
    length = period * 4
    array = np.zeros(length)
    pad = int(length * .1)
    point_a = np.random.randint(pad, int(length * .6))
    point_b = np.random.randint(point_a + pad, length - pad)
    swap = np.random.choice([True, False])
    if swap:
        pr = price['Open'], price['Low'], price['High'], price['Close']
    else:
        pr = price['Open'], price['High'], price['Low'], price['Close']
    array[[0, point_a, point_b, -1]] = pr
    # Filling gap
    fill_gap(array)
    # Restack
    restack = restack_array(array)
    # DataFrame
    date_index = create_datetime(price.name, period=period)
    df = to_dataframe(restack, date_index)
    return get_different_time_frame(df, m=m)
    

In [18]:
d = DataLoader('AAPL', '2018-01-01', '2019-12-31')
price = d.data.iloc[0]

In [20]:
generate_df(price, m='15min')

Unnamed: 0,Open,High,Low,Close
2018-01-02 09:30:00,42.540001,43.075001,42.379328,42.892568
2018-01-02 09:45:00,42.892568,43.074581,42.368139,42.929809
2018-01-02 10:00:00,42.929809,43.075001,42.487665,43.071912
2018-01-02 10:15:00,43.071912,43.075001,42.389357,43.036514
2018-01-02 10:30:00,43.036514,43.074736,42.346273,43.073399
2018-01-02 10:45:00,43.073399,43.075001,42.31538,42.315554
2018-01-02 11:00:00,42.315554,43.059817,42.315031,42.381985
2018-01-02 11:15:00,42.381985,43.042398,42.31507,42.332985
2018-01-02 11:30:00,42.332985,43.043373,42.315201,42.537062
2018-01-02 11:45:00,42.537062,43.031421,42.315007,42.791229
