In [2]:
# Installing dependencies
import numpy as np
import pandas as pd
from yfQuery import datareader

# Loading Data

In [3]:
class DataLoader:
    '''
        Class for loading past stocks prices
        symbol: symbol can be either a single symbol or 
                a list of symbol
        start:  start date
        end:    end date
    '''
    def __init__(self, symbol, start, end):
        # Preload
        self.data = datareader(symbol, start, end)
    
    def get(self, start, end):
        # Return a period of the data
        return self.data.iloc[start: end]

# Stack and DataFrame

In [4]:
def restack_array(array, p):
    # Restacking Array to Open, High, Low, Close
    narr = array.reshape(p, -1).copy()
    nclose = narr[:, -1].copy()
    nopen = narr[:, 0].copy()
    nopen[1:] = nclose[:-1]
    nhigh = narr.max(1)
    nlow = narr.min(1)
    return np.stack([nopen, 
                     np.max([nhigh, nopen], axis=0), 
                     np.min([nlow, nopen], axis=0), 
                     nclose]).T

def create_datetime(t, period=390):
    # Create minute index for date
    ts = t.replace(hour=9, minute=31)
    return pd.date_range(ts, periods=period, freq="1min")

def to_dataframe(array, index):
    # Create minute DataFrame
    return pd.DataFrame(array, index=index,
                        columns=['Open', 'High', 'Low', 'Close'])

def get_different_time_frame(df, m='min'):
    if m != 'min':
        o = df['Open'].groupby(by=pd.Grouper(freq=m)).first()
        c = df['Close'].groupby(by=pd.Grouper(freq=m)).last()
        h = df['High'].groupby(by=pd.Grouper(freq=m)).max()
        l = df['Low'].groupby(by=pd.Grouper(freq=m)).min()
        return pd.concat([o, h, l, c], axis=1)
    return df

# Recursion

In [37]:
def get_target_price(array, i, pct=.95):
    # Get next target
    target = array[i:][array[i:] > 0][0]
    # Get High and Low 
    high = array.max()
    low = array[array > 0].min()
    # Current price
    current = array[i - 1] if i != 0 else array[i]
    # Upper and Lower bound
    upper = (high - current) * pct
    lower = (low - current) * pct
    gain = current + np.random.uniform(0, upper)
    lost = current + np.random.uniform(lower, 0)
    momen = target - current
    if momen > 0:
        return gain
    return lost

def fill_gap(arr, i=0, pct=.95):
    # Using Recursion to get price when price is not 0.
    if (i + 1) == len(arr):
        return arr
    elif (arr <= 0).sum() == 0:
        return arr
    else:
        if arr[i] == 0:
            if i % 3 == 0:
                pct = np.random.uniform(.75, .95)
            arr[i] = get_target_price(arr, i, pct)
        fill_gap(arr, i+1, pct=pct)

In [55]:
def generate_df(price, m='min', period=390, ap=100):
    # Generate psudo price for minutes data
    # Creating array
    length = period * 4
    array = np.zeros(length)
    pad = int(length * .1)
    point_a = np.random.randint(pad, int(length * .6))
    point_b = np.random.randint(point_a + pad, length - pad)
    anchors = np.random.randint(pad, length - pad, size=100)
    swap = np.random.choice([True, False])
    if swap:
        pr = price['Open'], price['Low'], price['High'], price['Close']
    else:
        pr = price['Open'], price['High'], price['Low'], price['Close']
    array[[0, point_a, point_b, -1]] = pr
    array[anchors] = np.random.uniform(price['Low'], price['High'], size=ap)
    # Filling gap
    fill_gap(array)
    # Restack
    restack = restack_array(array, p=period)
    # DataFrame
    date_index = create_datetime(price.name, period=period)
    df = to_dataframe(restack, date_index)
    return get_different_time_frame(df, m=m)
    

In [57]:
d = DataLoader('AAPL', '2018-01-01', '2019-12-31')

In [59]:
price = d.data.iloc[-1]

In [60]:
generate_df(price, m='15min')

Unnamed: 0,Open,High,Low,Close
2019-12-31 09:30:00,72.482498,73.354731,72.482498,72.609831
2019-12-31 09:45:00,72.609831,73.327138,72.605577,73.113485
2019-12-31 10:00:00,73.113485,73.282214,72.537433,72.877631
2019-12-31 10:15:00,72.877631,73.379567,72.431752,72.918596
2019-12-31 10:30:00,72.918596,73.38042,72.582378,73.08146
2019-12-31 10:45:00,73.08146,73.367464,72.38129,72.394848
2019-12-31 11:00:00,72.394848,73.419998,72.38158,72.496206
2019-12-31 11:15:00,72.496206,73.310901,72.389986,72.92793
2019-12-31 11:30:00,72.92793,73.371134,72.450973,72.803965
2019-12-31 11:45:00,72.803965,73.356818,72.467529,72.99145
