### cuIndicator Performance test
cuIndicator is implemented by [cuDF](https://github.com/rapidsai/cudf) and [Numba](http://numba.pydata.org/) libraries. All the indicators are accelerated in the GPU via simple python code. 

In this notebook, we are going to demo 1) the performance gains and 2) how simple to implement customized indicators.

We are going to use the common [Relative strength index](https://en.wikipedia.org/wiki/Relative_strength_index) indicator as an example in this notebook. We first load all the necessary libraries 

In [1]:
import sys
sys.path.append('../..')
import pandas as pd
import cudf
import os
from gquant.dataframe_flow.task import load_modules
load_modules(os.getenv('MODULEPATH')+'/rapids_modules/')
from rapids_modules.cuindicator import Ewm, Rolling, shift, division, multiply, summation
import numpy as np
from bqplot.traits import convert_to_date
import bqplot.pyplot as plt
import ipywidgets as widgets
from IPython.display import display
import datetime
import math
from bqplot import OHLC, LinearScale, DateScale, Axis, Figure, Bars, CATEGORY10, OrdinalScale, Lines, Tooltip
from bqplot.colorschemes import CATEGORY20
import time
from numba import cuda
from numba import njit
from numba import prange
import math
from dateutil import relativedelta
import cupy as cp

We generated fake stock 5min bar data for 20-30 years randomly and load them into the cuDF dataframe. We also define some constants for the plotting

In [2]:

@njit
def compute_prices(open_p, close_p, low_p, high_p, norms, s0,
                  sigma, mu):
    length = len(open_p)
    barsize = len(norms) // length
    price = s0
    for i in range(length):
        first = False
        low_p[i] = np.inf
        high_p[i] = -np.inf
        for bar_id in range(barsize):
            price = price + price * mu + price * sigma * norms[i * barsize + bar_id]
            if not first:
                open_p[i] = price
                first = True
            if price < low_p[i]:
                low_p[i] = price
            if price > high_p[i]:
                high_p[i] = price
        close_p[i] = price



def df_onestock(stock_id=0,
                S0=20,
                yearly_return=0.1,
                yearly_sigma=0.1,
                year_beg=1991,
                year_end=1999,
                volume_lam=50,
                snapshots=10,
                barfreq=5):
    num_years = year_end - year_beg
    one_day = relativedelta.relativedelta(days=1)
    beg_time = " 09:00"
    end_time = " 16:00"
    start = pd.Timestamp('%s-1-1' % (year_beg))
    end = pd.Timestamp('%s-1-1' % (year_end))

    all_times = []
    day = start
    while day < end:
        if day.weekday() <= 5:
            day_str = day.strftime('%Y-%m-%d')
            times = pd.date_range(day_str + beg_time,
                                  day_str + end_time,
                                  freq=("%dmin"%(barfreq)))
            all_times.extend(times)
        day += one_day

    mu =  yearly_return * num_years
    sigma = np.sqrt(yearly_sigma**2.0 * num_years)


    steps = len(all_times)

    step_mu = mu / (snapshots * steps)
    step_sigma = np.sqrt(sigma**2 / (snapshots * steps))

    open_p = np.zeros(steps)
    close_p = np.zeros(steps)
    high_p = np.zeros(steps)
    low_p = np.zeros(steps)
    norms = np.random.normal(0, 1, steps * snapshots)
    compute_prices(open_p, close_p, low_p, high_p, norms, S0, step_sigma, step_mu)
    volume = np.random.poisson(volume_lam, steps).astype(np.float64)

    df = pd.DataFrame({'Open':open_p, 'Close':close_p, 'High':high_p, 'Low':low_p,
                       'Volume':volume, 'Dte':all_times})
    df['Sm_id'] = stock_id
    return df

def gendata(num_stocks = 10, barfreq=5):
    stocks = []
    for sid in range(num_stocks):
        print('gen stock %d/%d' %(sid, num_stocks))
        s0 = np.random.normal(30.0, 5.0)
        ret = np.random.normal(0.1, 0.2)
        sig = np.random.normal(0.1, 0.2)
        year_beg = np.random.randint(1991, 2001)
        year_end = np.random.randint(2005, 2015)
        volume_lam = np.random.randint(20, 80)
        stock = df_onestock(stock_id=sid,
                             S0=s0,
                             yearly_return=ret,
                             yearly_sigma=sig,
                             year_beg=year_beg,
                             year_end=year_end,
                             volume_lam=volume_lam,
                             barfreq=barfreq)
        stocks.append(stock)
    return pd.concat(stocks)


In [3]:
all_stock = gendata(2, 1) 
df = cudf.DataFrame.from_pandas(all_stock)
df['Dte'] = df['Dte'].astype('datetime64[ms]')
ids = all_stock.Sm_id.unique()
list_stocks = { 'stock%d' % (i) : i for i in ids}

indicator_lists = ['RSI(GPU)', 'RSI(CPU)']
main_figure_height='300px'
indicator_figure_height='150px'
figure_width = '1500px'
plot_dp = 300

gen stock 0/2
gen stock 1/2


Once we have all the data loaded in the GPU framebuffer, we can use cuDF dataframe to slice dice it. Here we define two utility functions to select different stock and year of the data. The query happnes in the GPU so it is very fast. 

In [4]:
def one_stock(df, stock_id):
    return df.query('Sm_id==%s' % stock_id)

def slice_stock(df, year):
    beg_date = datetime.datetime.strptime(str(year)+'-01-01', '%Y-%m-%d')
    end_date = datetime.datetime.strptime(str(int(year)+1)+'-01-01', '%Y-%m-%d')
    return df.query('Dte<@end_date and Dte>=@beg_date')

Here we copy paste the RSI computation code from this open source project [Pandas Technical Indicators](git@github.com:Crypto-toolbox/pandas-technical-indicators.git). And we defined a wrapper function `cpu_rsi` to take the same input as the GPU version. To make a fair comparison with GPU, the RSI computation is accelerated by `numba.njit`.

In [5]:
@njit(fastmath=True, parallel=True)
def updown_movement(high, low, UpI, DoI):
    UpI[0] = 0
    DoI[0] = 0
    for i in prange(len(high)- 1):
        UpMove = high[i + 1] - high[i]
        DoMove = low[i] - low[i + 1]
        if UpMove > DoMove and UpMove > 0:
            UpI[i + 1] = UpMove
        else:
            UpI[i + 1] = 0
        if DoMove > UpMove and DoMove > 0:
            DoI[i + 1] = DoMove
        else:
            DoI[i + 1] = 0




def relative_strength_index(df, n):
    """Calculate Relative Strength Index(RSI) for given data.

    :param df: pandas.DataFrame
    :param n:
    :return: pandas.DataFrame
    """
    UpI = np.zeros(len(df))
    DoI = np.zeros(len(df))
    updown_movement(df['High'].values, df['Low'].values, UpI, DoI)
    UpI = pd.Series(UpI)
    DoI = pd.Series(DoI)
    PosDI = pd.Series(UpI.ewm(span=n, min_periods=n).mean())
    NegDI = pd.Series(DoI.ewm(span=n, min_periods=n).mean())
    RSI = pd.Series(PosDI / (PosDI + NegDI), name='RSI')
    df = df.join(RSI)
    return df

def cpu_rsi(high, low, n):
    df = pd.DataFrame()
    df['High'] = high.to_array()
    df['Low'] = low.to_array()
    out = relative_strength_index(df, n)
    return out['RSI'].as_matrix()


Equivalently this is cuIndicator version of the RSI computation, we copy the code from the cuIndicator library. 

In [6]:
number_of_threads = 128

@cuda.jit
def up_down_kernel(high_arr, low_arr, upD_arr, doD_arr, arr_len):
    i = cuda.grid(1)
    if i < arr_len - 1:
        if (math.isnan(high_arr[i]) or math.isnan(high_arr[i + 1])
            or math.isnan(low_arr[i]) or math.isnan(low_arr[i + 1])):
            upD_arr[i] = math.nan
            doD_arr[i] = math.nan
        else:
            upMove = high_arr[i + 1] - high_arr[i]
            doMove = low_arr[i] - low_arr[i + 1]
            if upMove > doMove and upMove > 0:
                upD_arr[i] = upMove
            else:
                upD_arr[i] = 0
            if doMove > upMove and doMove > 0:
                doD_arr[i] = doMove
            else:
                doD_arr[i] = 0
    elif i == arr_len - 1:
        upD_arr[i] = math.nan
        doD_arr[i] = math.nan
        
def upDownMove(high_arr, low_arr):
    upD_arr = cuda.device_array_like(high_arr)
    doD_arr = cuda.device_array_like(high_arr)
    array_len = len(high_arr)
    number_of_blocks = (array_len + (
        number_of_threads - 1)) // number_of_threads
    up_down_kernel[(number_of_blocks,), (number_of_threads,)](high_arr,
                                                              low_arr,
                                                              upD_arr,
                                                              doD_arr,
                                                              array_len)
    return upD_arr, doD_arr

# RSI GPU Version
def gpu_relative_strength_index(high_arr, low_arr, n):
    """Calculate Relative Strength Index(RSI) for given data.
    :param high_arr: high price of the bar, expect series from cudf
    :param low_arr: low price of the bar, expect series from cudf
    :param n: time steps to do EWM average
    :return: Relative Strength Index in cudf.Series
    """
    UpI, DoI = upDownMove(high_arr.to_gpu_array(), low_arr.to_gpu_array())
    UpI_s = shift(UpI, 1)
    UpI_s[0] = 0
    DoI_s = shift(DoI, 1)
    DoI_s[0] = 0
    PosDI = Ewm(n, UpI_s).mean()
    NegDI = Ewm(n, DoI_s).mean()
    RSI = division(PosDI, summation(PosDI, NegDI))
    return cudf.Series(RSI, nan_as_null=False)

As you can compare the CPU and GPU implementation, it is very similar to each other. The up and down movement part of the computation is translated into a python CUDA kernel.

Here we write some visulization code backed by bqplot library to show the stock price and indicators and compare the performance of CPU RSI vs GPU RSI

In [7]:
add_stock_selector = widgets.Dropdown(options=list_stocks.keys(), value=None, description="Add stock")
year_selector = widgets.IntSlider(description="All Year", continuous_update=False)
year_selectors = []

def get_figure(selected, df):
    with out:
        this_stock = one_stock(df, list_stocks[selected])
        this_stock_store = [this_stock]
        stock_selector = widgets.Dropdown(options=list_stocks.keys(), value=add_stock_selector.value, description="stock")
        indicator_selector = widgets.Dropdown(options=indicator_lists, value=None, description="Indicator")
        min_year = this_stock.Dte.to_array().min().astype(datetime.datetime).year
        max_year = this_stock.Dte.to_array().max().astype(datetime.datetime).year
        year_selector = widgets.IntSlider(min=min_year, max=max_year, description="Year", continuous_update=False)
        year = year_selector.value
        stock = slice_stock(this_stock, year)
        sc = LinearScale()
        sc2 = LinearScale()
        dt_scale = DateScale()
        ax_x = Axis(label='Date', scale=dt_scale)
        ax_y = Axis(label='Price', scale=sc, orientation='vertical', tick_format='0.0f')
        # Construct the marks
        tdp = stock.Dte.shape[0]
        skips = tdp // plot_dp
        ohlc = OHLC(x=stock.Dte[::skips].to_array(), y=cp.asnumpy(stock[['Open','High','Low', 'Close']].values[::skips, :]), marker='candle', scales={'x': dt_scale, 'y': sc}, format='ohlc',
                    stroke='blue', display_legend=True, labels=[selected])
        bar = Bars(x=stock.Dte[::skips].to_array(), y=stock.Volume[::skips].to_array(), 
               scales={'x': dt_scale, 'y': sc2}, padding=0.2)
        def_tt = Tooltip(fields=['x', 'y'], formats=['%Y-%m-%d', '.2f'])
        bar.tooltip = def_tt
        bar.interactions = {
            'legend_hover': 'highlight_axes',
            'hover': 'tooltip', 
            'click': 'select',
        }
        sc.min = stock.Close.min() - 0.3 * (stock.Close.max() - stock.Close.min()) 
        sc.max = stock.Close.max()
        sc2.max = stock.Volume.max()*4.0
        dt_scale.min = pd.Timestamp('%d-1-1' % year)
        dt_scale.max = pd.Timestamp('%d-1-1' % (year + 1))
        f = Figure(axes=[ax_x, ax_y], marks=[ohlc, bar], fig_margin={"top":0, "bottom":60, "left":60, "right":60})
        f.layout.height = main_figure_height
        f.layout.width = figure_width
        para_selectors = widgets.VBox([])
        color_id = [0]
        
        def update_graph(stock):
            with bar.hold_trait_notifications() as bc, ohlc.hold_trait_notifications() as oc:
                tdp = stock.Dte.shape[0]
                skips = tdp // plot_dp
                ohlc.y = cp.asnumpy(stock[['Open','High','Low', 'Close']].values[::skips, :])
                ohlc.x = stock.Dte[::skips].to_array()
            
                bar.y = stock.Volume[::skips].to_array()
                bar.x = stock.Dte[::skips].to_array()
        
                sc.min = stock.Close.min() - 0.3 * (stock.Close.max() - stock.Close.min()) 
                sc.max = stock.Close.max()
                sc2.max = stock.Volume.max()*4.0
                dt_scale.min = pd.Timestamp('%d-1-1' % year_selector.value)
                dt_scale.max = pd.Timestamp('%d-1-1' % (year_selector.value + 1))
                update_range()
        
        def year_selection(*stock):
            stock = slice_stock(this_stock_store[0], year_selector.value)
            update_graph(stock)
            
        
        def stock_selection(*stock):
            this_stock_store[0] = one_stock(df, list_stocks[stock_selector.value])
            year_selector.min = this_stock_store[0].Dte.to_array().min().astype(datetime.datetime).year
            year_selector.max = this_stock_store[0].Dte.to_array().max().astype(datetime.datetime).year
            stock = slice_stock(this_stock_store[0], year_selector.value)
            ohlc.labels = [stock_selector.value]
            update_graph(stock)
            
        def update_figure(stock, objects):
            line = objects[0]
            with line.hold_trait_notifications():
                tdp = stock.Dte.shape[0]
                skips = tdp // plot_dp
                line.y = stock['out'][::skips].to_array()
                line.x = stock.Dte[::skips].to_array()
                
        def add_new_indicator(new_fig):
            # add new figure
            # take the axis from the fig
            empty = {"top":0, "bottom":0, "left":60, "right":60}
            axis_margin = {"top":0, "bottom":60, "left":60, "right":60}
            axes_copy = multiple_figs.children[-1].axes.copy() 
            multiple_figs.children[-1].fig_margin = empty
            to_be_removed = axes_copy[0]
            axes_copy.remove(to_be_removed)
            multiple_figs.children[-1].axes = axes_copy
            new_axes = new_fig.axes.copy()
            new_fig.axes = [to_be_removed] + new_axes
            new_fig.fig_margin = axis_margin
            # add new figure
            multiple_figs.children += (new_fig,)
            
        def indicator_selection(*stock):
            if indicator_selector.value is None:
                return
            color_id[0] = (color_id[0] + 1) % len(CATEGORY20)
            
            def setup_indicator(get_para_widgets, get_parameters, process_outputs, create_figure, update_figure, indicator_fun):

                with out:                    
                    def update_df(para_selector_widgets):
                        # with out:
                        #    print('called')
                        # if para_selector_widgets[-1].value=="Busy calculating ...":
                        #    return
                        my_stock = this_stock_store[0]
                        para_selector_widgets[-1].value="Busy calculating ..."
                        parameters = get_parameters(my_stock, para_selector_widgets)
                        beg = time.time()
                        output = indicator_fun(*parameters)
                        end = time.time()
                        para_selector_widgets[-1].value="Computation time:%.3f on %d data points" % (end - beg, len(parameters[0]))
                        stock_df = process_outputs(output, my_stock)
                        stock = slice_stock(stock_df, year_selector.value)
                        return stock
                    para_selector_widgets = get_para_widgets()   
                    para_selectors.children += tuple(para_selector_widgets)                    
                    stock = update_df(para_selector_widgets)
                    figs = create_figure(stock)

                    def update_para(*para):
                        # with out:
                        #    print(para)
                        #    print('update')
                        stock = update_df(para_selector_widgets)
                        update_figure(stock, figs)
                        
                    for selector in para_selector_widgets[:-1]:
                        selector.observe(update_para, 'value')
                    year_selector.observe(update_para, 'value')
                    stock_selector.observe(update_para, 'value')

            if indicator_selector.value=='RSI(GPU)':
                with out:
                    def get_para_widgets():
                        para_selector = widgets.IntSlider(min=2, max=60, description="RSI(GPU)", continuous_update=False)
                        label = widgets.Label(value="")
                        para_selector_widgets = [para_selector, label]                    
                        return para_selector_widgets

                    def get_parameters(stock_df, para_selector_widgets):
                        return  (stock_df["High"], stock_df["Low"], para_selector_widgets[0].value)
                    
                    def process_outputs(output, stock_df):
                        stock_df['out'] = output
                        stock_df['out'] = stock_df['out'].fillna(0)
                        return stock_df
                                   
                    def create_figure(stock):
                        tdp = stock.Dte.shape[0]
                        skips = tdp // plot_dp
                        sc_co = LinearScale()
                        ax_y = Axis(label='RSI(GPU)', scale=sc_co, orientation='vertical')
                        new_line = Lines(x=stock.Dte[::skips].to_array(), y=stock['out'][::skips].to_array(), scales={'x': dt_scale, 'y': sc_co}, colors=[CATEGORY20[color_id[0]]])
                        new_fig = Figure(marks=[new_line], axes=[ax_y])
                        new_fig.layout.height = indicator_figure_height
                        new_fig.layout.width = figure_width                    
                        figs = [new_line]
                        # add new figure
                        add_new_indicator(new_fig)
                        return figs

                    
                    indicator_fun = gpu_relative_strength_index                
                    setup_indicator(get_para_widgets, get_parameters, process_outputs, create_figure, update_figure, indicator_fun)
            elif indicator_selector.value=='RSI(CPU)':
                with out:
                    def get_para_widgets():                     
                        para_selector = widgets.IntSlider(min=2, max=60, description="RSI(CPU)", continuous_update=False)
                        label = widgets.Label(value="")
                        para_selector_widgets = [para_selector, label]                    
                        return para_selector_widgets

                    def get_parameters(stock_df, para_selector_widgets):
                        return  (stock_df["High"], stock_df["Low"], para_selector_widgets[0].value)
                    
                    def process_outputs(output, stock_df):
                        stock_df['out'] = output
                        stock_df['out'] = stock_df['out'].fillna(0)
                        return stock_df
                                   
                    def create_figure(stock):
                        tdp = stock.Dte.shape[0]
                        skips = tdp // plot_dp
                        sc_co = LinearScale()
                        ax_y = Axis(label='RSI(CPU)', scale=sc_co, orientation='vertical')
                        new_line = Lines(x=stock.Dte[::skips].to_array(), y=stock['out'][::skips].to_array(), scales={'x': dt_scale, 'y': sc_co}, colors=[CATEGORY20[color_id[0]]])
                        new_fig = Figure(marks=[new_line], axes=[ax_y])
                        new_fig.layout.height = indicator_figure_height
                        new_fig.layout.width = figure_width                    
                        figs = [new_line]
                        # add new figure
                        add_new_indicator(new_fig)
                        return figs

                    
                    indicator_fun = cpu_rsi                
                    setup_indicator(get_para_widgets, get_parameters, process_outputs, create_figure, update_figure, indicator_fun)
                    
                            
            
            indicator_selector.value=None
                    
                
                    
        year_selector.observe(year_selection, 'value')
        stock_selector.observe(stock_selection, 'value')
        indicator_selector.observe(indicator_selection, 'value')
        multiple_figs = widgets.VBox([f])
        return multiple_figs, year_selector, stock_selector, indicator_selector, para_selectors

#f, year_selector, stock_selector, indicator_selector, para_selectors = get_figure(add_stock_selector.value, df)

def stock_selection(*stock):
    if add_stock_selector.value is None:
        with out:
            print('here')
        return
    f, year_selector, stock_selector, indicator_selector, para_selectors = get_figure(add_stock_selector.value, df)
    vbox = w.children[1]
    vbox.children += (widgets.HBox([f, widgets.VBox([year_selector, stock_selector, indicator_selector, para_selectors])]),) 
    with out:
        print(widgets)
    year_selectors.append(year_selector)
    update_range()
    add_stock_selector.value = None

def update_range():
    min_vals = []
    max_vals = []
    for i in year_selectors:
        min_vals.append(i.min)
        max_vals.append(i.max)
    minV = max(min_vals)
    maxV = min(max_vals)
    if minV<=maxV:
        year_selector.disabled = False
        year_selector.max = maxV   
        year_selector.min = minV
    else:
        year_selector.disabled = True
    

out = widgets.Output(layout={'border': '1px solid black'})

def update_all_ranges(*arg):
    for i in year_selectors:
        i.value = year_selector.value
    
add_stock_selector.observe(stock_selection, 'value')
year_selector.observe(update_all_ranges, 'value')
selectors = widgets.HBox([add_stock_selector, year_selector])
w = widgets.VBox([selectors, widgets.VBox([])])
w

VBox(children=(HBox(children=(Dropdown(description='Add stock', options=('stock0', 'stock1'), value=None), Int…

While playing with it, you may notice GPU RSI is very fast that it completes the computation around 10ms. The CPU RSI takes around 10s. This is about 1000x speedup. The simple example shows the data scientist just need to write a little code to translate a customized indicator into GPU computation and gains 1000x speed up. This allows massive amount of data analysis to be done in real time and data scientist can interactively play with it. 

In [8]:
out


Output(layout=Layout(border='1px solid black'), outputs=({'name': 'stdout', 'text': "<module 'ipywidgets' from…