In [1]:
# Installing dependencies
import numpy as np
import pandas as pd
from yfQuery import datareader

# Loading Data

In [2]:
class DataLoader:
    '''
        Class for loading past stocks prices
        symbol: symbol can be either a single symbol or 
                a list of symbol
        start:  start date
        end:    end date
    '''
    def __init__(self, symbol, start, end):
        # Preload
        self.data = datareader(symbol, start, end)
    
    def get(self, start, end):
        # Return a period of the data
        return self.data.iloc[start: end]

# Stack and DataFrame

In [3]:
def restack_array(array, p):
    # Restacking Array to Open, High, Low, Close
    narr = array.reshape(p, -1).copy()
    nclose = narr[:, -1].copy()
    nopen = narr[:, 0].copy()
    nopen[1:] = nclose[:-1]
    nhigh = narr.max(1)
    nlow = narr.min(1)
    return np.stack([nopen, 
                     np.max([nhigh, nopen], axis=0), 
                     np.min([nlow, nopen], axis=0), 
                     nclose]).T

def create_datetime(t, period=390):
    # Create minute index for date
    ts = t.replace(hour=9, minute=31)
    return pd.date_range(ts, periods=period, freq="1min")

def to_dataframe(array, index):
    # Create minute DataFrame
    return pd.DataFrame(array, index=index,
                        columns=['Open', 'High', 'Low', 'Close'])

def get_different_time_frame(df, m='min'):
    if m != 'min':
        o = df['Open'].groupby(by=pd.Grouper(freq=m)).first()
        c = df['Close'].groupby(by=pd.Grouper(freq=m)).last()
        h = df['High'].groupby(by=pd.Grouper(freq=m)).max()
        l = df['Low'].groupby(by=pd.Grouper(freq=m)).min()
        return pd.concat([o, h, l, c], axis=1)
    return df

# Recursion

In [19]:
def get_bound(cur, high, low, p1, p2):
    upper = (high - cur) * p1
    lower = (low - cur) * p2
    gain = cur + np.random.uniform(0, upper)
    lost = cur + np.random.uniform(lower, 0)
    return gain, lost

def get_target_price(array, i):
    # Get next target
    target = array[i:][array[i:] > 0][0]
    # Get High and Low 
    high = array.max()
    low = array[array > 0].min()
    # Current price
    current = array[i - 1] if i != 0 else array[i]
    # Momentum
    momen = target - current
    low_ran = np.random.uniform(.2, .5)
    high_ran = np.random.uniform(.7, .9)
    if momen > 0:
        return np.random.choice(get_bound(current, high, low, high_ran, low_ran))
    return np.random.choice(get_bound(current, high, low, low_ran, high_ran))

def fill_gap(arr, i=0):
    # Using Recursion to get price when price is not 0.
    if (i + 1) == len(arr):
        return arr
    elif (arr <= 0).sum() == 0:
        return arr
    else:
        if arr[i] == 0:
            arr[i] = get_target_price(arr, i)
        fill_gap(arr, i+1)

In [20]:
def generate_df(price, m='min', period=390):
    # Generate psudo price for minutes data
    # Creating array
    length = period * 4
    array = np.zeros(length)
    pad = int(length * .1)
    point_a = np.random.randint(pad, int(length * .6))
    point_b = np.random.randint(point_a + pad, length - pad)
    swap = np.random.choice([True, False])
    if swap:
        pr = price['Open'], price['Low'], price['High'], price['Close']
    else:
        pr = price['Open'], price['High'], price['Low'], price['Close']
    array[[0, point_a, point_b, -1]] = pr
    # Filling gap
    fill_gap(array)
    # Restack
    restack = restack_array(array, p=period)
    # DataFrame
    date_index = create_datetime(price.name, period=period)
    df = to_dataframe(restack, date_index)
    return get_different_time_frame(df, m=m)
    

In [6]:
d = DataLoader('AAPL', '2018-01-01', '2019-12-31')

In [21]:
price = d.data.iloc[0]

In [22]:
generate_df(price, m='15min')

Unnamed: 0,Open,High,Low,Close
2018-01-02 09:30:00,42.540001,42.971442,42.334867,42.335992
2018-01-02 09:45:00,42.335992,42.979705,42.321602,42.529665
2018-01-02 10:00:00,42.529665,42.857935,42.317069,42.848357
2018-01-02 10:15:00,42.848357,42.926827,42.320192,42.67364
2018-01-02 10:30:00,42.67364,42.747541,42.320412,42.734825
2018-01-02 10:45:00,42.734825,43.020107,42.32107,42.963872
2018-01-02 11:00:00,42.963872,42.963872,42.338206,42.360472
2018-01-02 11:15:00,42.360472,43.010906,42.318069,42.58125
2018-01-02 11:30:00,42.58125,43.008942,42.314999,42.626206
2018-01-02 11:45:00,42.626206,43.072195,42.447517,42.94618
