In [1]:
# Installing dependencies
import numpy as np
import pandas as pd
from dLoader import DataLoader

# Stack and DataFrame

In [2]:
def restack_array(array, p):
    # Restacking Array to Open, High, Low, Close
    narr = array.reshape(p, -1).copy()
    nclose = narr[:, -1].copy()
    nopen = narr[:, 0].copy()
    nopen[1:] = nclose[:-1]
    nhigh = narr.max(1)
    nlow = narr.min(1)
    return np.stack([nopen, 
                     np.max([nhigh, nopen], axis=0), 
                     np.min([nlow, nopen], axis=0), 
                     nclose]).T

def create_datetime(t, period=390):
    # Create minute index for date
    ts = t.replace(hour=9, minute=31)
    return pd.date_range(ts, periods=period, freq="1min")

def to_dataframe(array, index):
    # Create minute DataFrame
    return pd.DataFrame(array, index=index,
                        columns=['Open', 'High', 'Low', 'Close'])

def get_different_time_frame(df, m='min'):
    # Grouping the psudo data to perform different minutes output
    if m != 'min':
        o = df['Open'].groupby(by=pd.Grouper(freq=m)).first()
        c = df['Close'].groupby(by=pd.Grouper(freq=m)).last()
        h = df['High'].groupby(by=pd.Grouper(freq=m)).max()
        l = df['Low'].groupby(by=pd.Grouper(freq=m)).min()
        return pd.concat([o, h, l, c], axis=1)
    return df

# Recursion

In [3]:
def get_bound(cur, high, low, p1, p2):
    # Finding bounds for getting prices
    upper = (high - cur) * p1
    lower = (low - cur) * p2
    gain = cur + np.random.uniform(0, upper)
    lost = cur + np.random.uniform(lower, 0)
    return gain, lost

def get_target_price(array, i):
    # Get next target
    target = array[i:][array[i:] > 0][0]
    # Get High and Low 
    high = array.max()
    low = array[array > 0].min()
    # Current price
    current = array[i - 1] if i != 0 else array[i]
    # Momentum
    momen = target - current
    # Randomize gain or lost percentage to create more noise
    low_ran = np.random.uniform(.1, .5)
    high_ran = np.random.uniform(.5, .9)
    if momen > 0:
        return np.random.choice(get_bound(current, high, low, high_ran, low_ran))
    return np.random.choice(get_bound(current, high, low, low_ran, high_ran))

def fill_gap(arr, i=0):
    # Using Recursion to get price when price is not 0.
    if (i + 1) == len(arr):
        return arr
    elif (arr <= 0).sum() == 0:
        return arr
    else:
        # Fill in zeros with psudo prices
        if arr[i] == 0:
            arr[i] = get_target_price(arr, i)
        fill_gap(arr, i+1)

# Psudo Data Generator

In [4]:
def generate_df(price, m='min', randomize_swap=False):
    # Generate psudo price for minutes data
    # Creating array
    period = 390 # 390 minutes in a single trading day
    length = period * 4
    array = np.zeros(length)
    pad = int(length * .1)
    point_a = np.random.randint(pad, int(length * .6))
    point_b = np.random.randint(point_a + pad, length - pad)
    # Swapping High and Low
    if randomize_swap:
        # Randomize
        swap = np.random.choice([True, False])
    else:
        # Calculate to see if Low need to be swap
        open_to_low = price['Open'] / price['Low'] - 1
        close_to_low = price['Close'] / price['Low'] - 1
        swap = True if close_to_low > open_to_low else False
    if swap:
        pr = price['Open'], price['Low'], price['High'], price['Close']
    else:
        pr = price['Open'], price['High'], price['Low'], price['Close']
    array[[0, point_a, point_b, -1]] = pr
    # Filling gap
    fill_gap(array)
    # Restack
    restack = restack_array(array, p=period)
    # DataFrame
    date_index = create_datetime(price.name, period=period)
    df = to_dataframe(restack, date_index)
    return get_different_time_frame(df, m=m)
    

# Test

In [5]:
d = DataLoader('AAPL', '2018-01-01', '2019-12-31')

In [6]:
price = d.data.iloc[0]

In [7]:
generate_df(price, m='15min')

Unnamed: 0,Open,High,Low,Close
2018-01-02 09:30:00,42.540001,42.7384,42.321847,42.384495
2018-01-02 09:45:00,42.384495,42.831625,42.335276,42.415573
2018-01-02 10:00:00,42.415573,42.752531,42.332719,42.533513
2018-01-02 10:15:00,42.533513,42.96147,42.395304,42.733006
2018-01-02 10:30:00,42.733006,43.04746,42.314999,42.591714
2018-01-02 10:45:00,42.591714,43.055486,42.591714,42.856388
2018-01-02 11:00:00,42.856388,42.972555,42.479395,42.857668
2018-01-02 11:15:00,42.857668,43.071249,42.46234,42.763798
2018-01-02 11:30:00,42.763798,43.066374,42.581327,43.022111
2018-01-02 11:45:00,43.022111,43.075001,42.596025,42.799225
