#  Vaex - 2D demo
The purpose of this notebook is to demonstrate
some of the 2D capabilities of VAEX. Plotting of data is performed with BQPlot
Jupyter widgets are enabled so the user can interactively play with the settings

In [83]:
import numpy as np
import pandas as pd
from IPython.display import clear_output
import ipywidgets as widgets
import bqplot as bq
import vaex as vx



### Create random data to plot

In [84]:

size = 10000
scale = 1.
scaleLocal = 20
np.random.seed(0)
x_data = np.arange(size)
y_data = np.cumsum(np.random.randn(size)  * scale) + np.random.randn(size) * scaleLocal

np.random.seed(1)
scaleLocal2 = 3
y_data2 = np.cumsum(np.random.randn(size)  * scale) + np.random.randn(size) * scaleLocal2


### Create inital figure and lines

In [109]:
x_sc = bq.LinearScale()
x_sc.max = size * 1.3
y_sc = bq.LinearScale()
y_sc2 = bq.LinearScale()

ax_x = bq.Axis(label='X', scale=x_sc, grid_lines='solid')
ax_y = bq.Axis(label='Y', scale=y_sc, orientation='vertical', grid_lines='solid')
ax_y2 = bq.Axis(label='2nd Y', scale=y_sc2, orientation='vertical', side = 'right', visible = False,grid_lines='none')

line1 = bq.Lines(x=x_data, y=y_data, scales={'x': x_sc, 'y': y_sc} , colors=['blue'],display_legend = True, labels=['y1'],stroke_width = 1.0)

line2 = bq.Lines(x=x_data, y=y_data2, scales={'x': x_sc, 'y': y_sc} , colors=['darkgreen'],display_legend = True, labels=['y2'],stroke_width = 1.0)

margins = dict(top = 50, bottom=40, left=50, right=50)
marginsLegend = dict(top = 0, bottom=0, left=100, right=20)
fig = bq.Figure(marks = [line1,line2], axes=[ax_x, ax_y] , fig_margin = margins , animation_duration=1000)
fig.layout.width = '98%'
fig.layout.height = '400px' 
fig.title = 'Vaex demo'

figBinNum = widgets.IntText(
    value=500,
    description='Nº of bins',
    disabled=False
)
figBinNum.layout.width = '180px'


### Create Vaex object

In [110]:
dataVX = vx.from_arrays( x=x_data, y1=y_data, y2 = y_data2)

In [111]:
binbyChannelx = 'x'
binbyChannely = 'y1'

### Calculate initial binned line to plot

In [112]:
start = 500
end = 8000
samplePoints = (50,)
tExpression = "abs(" + str((start + end)/2) + " - " + binbyChannelx + ") < " + str((end-start)/2)
tExpressionAnd = "(" + binbyChannelx + " > " + str(start) + ") & (" + binbyChannelx + " < " + str(end) + ")"
dataVX.select(tExpression, name='xpos')
dataVX.select(tExpressionAnd, name='xposAnd')

x_Binned = dataVX.mean(binbyChannelx, binby=binbyChannelx, selection = 'xpos', shape=samplePoints, limits=[start,end])
y_Binned = dataVX.mean(binbyChannely, binby=binbyChannelx, selection = 'xpos', shape=samplePoints, limits=[start,end])

lineBinned = bq.Lines(x=x_Binned, y=y_Binned, scales={'x': x_sc, 'y': y_sc},display_legend = True, labels = ['y1 -- mean'])
lineBinned2ndY = bq.Lines(x=x_Binned, y=y_Binned, scales={'x': x_sc, 'y': y_sc2},display_legend = True, labels = ['y1 -- mean'])
fig.marks = [line1, line2, lineBinned]


### Create Jupyter widgets

In [113]:
UserEquation = widgets.Text(
    value='y1',
    placeholder='Type equation: e.g.  fuel * engspd,  or  where((fuel>80)&(engspd>2000) , tempegr_val, 0)',
    description='Equation1:',
    disabled=False
)
   
valid = widgets.Valid(
    value=True,
    description='',)
valid.layout.width = '20px'
equationHbox = widgets.HBox([UserEquation,valid])

UserEquation2 = widgets.Text(
    value='y2',
    placeholder='Type equation: e.g.  fuel * engspd,  or  where((fuel>80)&(engspd>2000) , tempegr_val, 0)',
    description='Equation2:',
    disabled=False
)
   
valid2 = widgets.Valid(
    value=True,
    description='',)
valid2.layout.width = '20px'

equationHbox2 = widgets.HBox([UserEquation2,valid2])

              
    
# -------------------------------
# User equation, time series channel


Userselect = widgets.Text(
    value='',
    placeholder='Type selection boolean equation',
    description='Selection:',
    disabled=False
)
   
validselect = widgets.Valid(
    value=True,
    description='',)
validselect.layout.width = '20px'
    
selectHbox = widgets.HBox([Userselect,validselect])
     
figBinNum = widgets.IntText(
    value=500,
    description='Nº of bins:',
    disabled=False
)
figBinNum.layout.width = '180px'

startx = widgets.FloatText(
    value=0,
    description='Start x:',
    disabled=False
)
startx.layout.width = '180px'

endx = widgets.FloatText(
    value=8000,
    description='End x:',
    disabled=False
)
endx.layout.width = '180px'

calcTypeWidget = widgets.SelectMultiple(
    options=['Mean', 'Max', 'Min', 'sum', 'Std. dev', '95%ile', 'median', 'variance', 'correlation' , 'covariance cov[x,y]'],
    value=['Mean'],
    #rows=10,
    description='Calc type:',
    disabled=False
)

buttonPlot = widgets.Button(description='Plot',
           layout=widgets.Layout(width='180px', height='25px'))

updatePlots_progressBar = widgets.IntProgress(
    value=0,
    min=0,
    max=10,
    step=1,
    description='',
    bar_style='', # 'success', 'info', 'warning', 'danger' or ''
    orientation='horizontal'
)
updatePlots_progressBar.layout.width = buttonPlot.layout.width
updatePlots_progressBar.layout.height = '15px'


helpLabelWidget1 = widgets.HTML(value="<b>The lines y1 and y2 are fixed.</b>")
helpLabelWidget = widgets.HTML(value="<b>Edit the settings and hit plot....</b>")


def on_buttonPlot_clicked(buttonPlot):       
    # clear_output() # clear old plots (and widgets which are reinserted in line below)
    updatePlot()

buttonPlot.on_click(on_buttonPlot_clicked)

buttonPlotBox = widgets.VBox([buttonPlot, updatePlots_progressBar])  
    
    
    

### Update plot function

In [124]:
binbyChannelx = 'x'

def updatePlot():
    binbyChannelys =  []
    if UserEquation.value !='':
        binbyChannelys.append(UserEquation.value)
    if UserEquation2.value !='':
        binbyChannelys.append(UserEquation2.value)
        
    updatePlots_progressBar.max = len(calcTypeWidget.value) * len(binbyChannelys)
    
    start = startx.value
    end = endx.value
    samplePoints = figBinNum.value
    tExpressionAnd = "(" + binbyChannelx + " > " + str(start) + ") & (" + binbyChannelx + " < " + str(end) + ")"
    dataVX.select(tExpressionAnd)

    x_Binned = dataVX.mean(binbyChannelx, binby=binbyChannelx, selection = True, shape=(samplePoints,), limits=[start,end])
    print('x_Binned = dataVX.mean(' + binbyChannelx + ' , binby=' + binbyChannelx + ', selection = True, shape = (' + str(samplePoints) + ',) , limits=[' + str(start) +', ' + str(end) + '])')

    print('dataVX.select(' + tExpressionAnd + ')')

    if Userselect.value!= '':
        print('dataVX.select(' + Userselect.value + ', mode = \'and\')')
        dataVX.select(Userselect.value, mode = 'and')    
#         selectionLabel = 'Selection: ' + Userselect.value
    
    y_Binned=[]
    y_Binned2=[]
    labels = []
    labels2 = []
#     print('y_Binned=[]')
    for calcType in calcTypeWidget.value:
        for binbyChannely in binbyChannelys:
            updatePlots_progressBar.value +=1                
            if calcType == 'Mean':
                y_Binned.append(dataVX.mean(binbyChannely, binby=binbyChannelx, selection = True, shape=(samplePoints,), limits=[start,end]) )
                vaexCommand = 'mean'
                labels.append(binbyChannely + '--' + calcType )
                print('dataVX.' + vaexCommand + '("' + binbyChannely + '", binby="' + binbyChannelx + '", selection = True, shape = (' + str(samplePoints) + ',) , limits=[' + str(start) +', ' + str(end) + '])')
            elif calcType == 'Max':
                y_Binned.append(dataVX.max(binbyChannely, binby=binbyChannelx, selection = True, shape=(samplePoints,), limits=[start,end]) )
                vaexCommand = 'max'
                labels.append(binbyChannely + '--' + calcType )
                print('dataVX.' + vaexCommand + '("' + binbyChannely + '", binby="' + binbyChannelx + '", selection = True, shape = (' + str(samplePoints) + ',) , limits=[' + str(start) +', ' + str(end) + '])')
            elif calcType == 'Min':
                y_Binned.append(dataVX.min(binbyChannely, binby=binbyChannelx, selection = True, shape=(samplePoints,), limits=[start,end]) )
                vaexCommand = 'min'
                labels.append(binbyChannely + '--' + calcType )
                print('dataVX.' + vaexCommand + '("' + binbyChannely + '", binby="' + binbyChannelx + '", selection = True, shape = (' + str(samplePoints) + ',) , limits=[' + str(start) +', ' + str(end) + '])')
            elif calcType == 'sum':
                y_Binned2.append(dataVX.sum(binbyChannely, binby=binbyChannelx, selection = True, shape=(samplePoints,), limits=[start,end]) )
                vaexCommand = 'sum'
                labels2.append(binbyChannely + '--' + calcType )
                print('dataVX.' + vaexCommand + '("' + binbyChannely + '", binby="' + binbyChannelx + '", selection = True, shape = (' + str(samplePoints) + ',) , limits=[' + str(start) +', ' + str(end) + '])')
            elif calcType == 'Std. dev':
                y_Binned2.append(dataVX.std(binbyChannely, binby=binbyChannelx, selection = True, shape=(samplePoints,), limits=[start,end]) )
                vaexCommand = 'std'
                labels2.append(binbyChannely + '--' + calcType )
                print('dataVX.' + vaexCommand + '("' + binbyChannely + '", binby="' + binbyChannelx + '", selection = True, shape = (' + str(samplePoints) + ',) , limits=[' + str(start) +', ' + str(end) + '])')
            elif calcType == 'variance':
                y_Binned2.append(dataVX.var(binbyChannely, binby=binbyChannelx, selection = True, shape=(samplePoints,), limits=[start,end]) )
                vaexCommand = 'var'
                labels2.append(binbyChannely + '--' + calcType )
                print('dataVX.' + vaexCommand + '("' + binbyChannely + '", binby="' + binbyChannelx + '", selection = True, shape = (' + str(samplePoints) + ',) , limits=[' + str(start) +', ' + str(end) + '])')
            elif calcType == '95%ile':
                y_Binned.append(dataVX.percentile_approx(binbyChannely, binby=binbyChannelx, selection = True, shape=(samplePoints,), limits=[start,end]) )
                vaexCommand = 'percentile_approx'
                labels.append(binbyChannely + '--' + calcType )
                print('dataVX.' + vaexCommand + '("' + binbyChannely + '", percentage=95.0 , binby="' + binbyChannelx + '", selection = True, shape = (' + str(samplePoints) + ',) , limits=[' + str(start) +', ' + str(end) + '])')
            elif calcType == 'median':
                y_Binned.append(dataVX.median_approx(binbyChannely, binby=binbyChannelx, selection = True, shape=(samplePoints,), limits=[start,end]) )
                vaexCommand = 'median_approx'
                labels.append(binbyChannely + '--' + calcType )
                print('dataVX.' + vaexCommand + '("' + binbyChannely + '", percentage=50.0 , binby="' + binbyChannelx + '", selection = True, shape = (' + str(samplePoints) + ',) , limits=[' + str(start) +', ' + str(end) + '])')
            else:
                break
        if len(binbyChannelys) ==2:
            if calcType == 'correlation':
                y_Binned2.append(dataVX.correlation(binbyChannelys[0], binbyChannelys[1],binby=[binbyChannelx], selection = True, shape=(samplePoints,), limits=[start,end]) )
                vaexCommand = 'correlation'
                labels2.append(binbyChannely[0] + ',' +  binbyChannely[1]+ '--' + calcType )
                print('dataVX.' + vaexCommand + '("' + binbyChannelys[0] + '","' + binbyChannelys[1] + '", binby=["' + binbyChannelx + '"], selection = True, shape = (' + str(samplePoints) + ',) , limits=[' + str(start) +', ' + str(end) + '])')
            elif calcType == 'covariance cov[x,y]':
                y_Binned2.append(dataVX.covar(binbyChannelys[0], binbyChannelys[1],binby=[binbyChannelx], selection = True, shape=(samplePoints,), limits=[start,end]) )
                vaexCommand = 'covar'
                labels2.append(binbyChannely[0] + ',' + binbyChannely[1]+ '--' + calcType )
                print('dataVX.' + vaexCommand + '("' + binbyChannelys[0] + '","' + binbyChannelys[1] + '", binby=["' + binbyChannelx + '"], selection = True, shape = (' + str(samplePoints) + ',) , limits=[' + str(start) +', ' + str(end) + '])')
#             elif calcType == 'variance':
#                 y_Binned2.append(dataVX.var(binbyChannelys[0], binbyChannelys[1],binby=[binbyChannelx], selection = True, shape=(samplePoints,), limits=[start,end]) )
#                 vaexCommand = 'var'
#                 print('dataVX.' + vaexCommand + '("' + binbyChannelys[0] + ',' + binbyChannelys[1] + '", binby=[' + binbyChannelx + '], selection = True, shape = (' + str(samplePoints) + ',) , limits=[' + str(start) +', ' + str(end) + '])')
        else:
            if calcType in ['correlation' , 'covariance','covariance cov[x,y]']:
                print('Please create 2 equations to compute ' + calcType)
                    

            

    lineBinned.x = x_Binned
    lineBinned.y = y_Binned
    if y_Binned2 !=[]:
        lineBinned2ndY.x = x_Binned
        lineBinned2ndY.y = y_Binned2
        ax_y2.visible =True
        fig.marks =  [line1, line2, lineBinned, lineBinned2ndY]
        fig.axes =[ax_x, ax_y, ax_y2]
        lineBinned2ndY.labels = labels2
    else:
        ax_y2.visible =False
        fig.marks =  [line1, line2, lineBinned]
        fig.axes =[ax_x, ax_y]
        
    lineBinned.labels = labels
#     y_sc2 = bq.LinearScale()
#     ax_y2 = bq.Axis(label='2nd Y', scale=y_sc2, orientation='vertical', side = 'right', visible = False)
        

    updatePlots_progressBar.value=0  

    print('------------------------------------------------------------------------')

In [125]:
lhs = widgets.VBox([helpLabelWidget1,helpLabelWidget,buttonPlotBox])
rhs = widgets.VBox([figBinNum,widgets.HBox([startx,endx]), equationHbox,equationHbox2,selectHbox,calcTypeWidget])
bottom = widgets.HBox([lhs,rhs])

widgets.VBox([fig,bottom])

x_Binned = dataVX.mean(x , binby=x, selection = True, shape = (500,) , limits=[0.0, 8000.0])
dataVX.select((x > 0.0) & (x < 8000.0))
dataVX.mean("y1", binby="x", selection = True, shape = (500,) , limits=[0.0, 8000.0])
dataVX.mean("y2", binby="x", selection = True, shape = (500,) , limits=[0.0, 8000.0])
------------------------------------------------------------------------
x_Binned = dataVX.mean(x , binby=x, selection = True, shape = (500,) , limits=[0.0, 8000.0])
dataVX.select((x > 0.0) & (x < 8000.0))
dataVX.std("y1", binby="x", selection = True, shape = (500,) , limits=[0.0, 8000.0])
dataVX.std("y2", binby="x", selection = True, shape = (500,) , limits=[0.0, 8000.0])
------------------------------------------------------------------------
x_Binned = dataVX.mean(x , binby=x, selection = True, shape = (500,) , limits=[0.0, 8000.0])
dataVX.select((x > 0.0) & (x < 8000.0))
dataVX.max("y1", binby="x", selection = True, shape = (500,) , limits=[0.0, 8000.0])
dataVX.max("y2", binby="x", 

In [116]:
# Only include in calculation when y2 is greater than 50
Userselect.value = 'y2 < 50'
updatePlot()

x_Binned = dataVX.mean(x , binby=x, selection = True, shape = (500,) , limits=[0.0, 8000.0])
dataVX.select((x > 0.0) & (x < 8000.0))
dataVX.select(y2 < 50, mode = 'and')
dataVX.mean("y1", binby="x", selection = True, shape = (500,) , limits=[0.0, 8000.0])
dataVX.mean("y2", binby="x", selection = True, shape = (500,) , limits=[0.0, 8000.0])
------------------------------------------------------------------------


In [117]:
UserEquation.value = 'y1*y2 / 10'
updatePlot()

x_Binned = dataVX.mean(x , binby=x, selection = True, shape = (500,) , limits=[0.0, 8000.0])
dataVX.select((x > 0.0) & (x < 8000.0))
dataVX.select(y2 < 50, mode = 'and')
dataVX.mean("y1*y2 / 10", binby="x", selection = True, shape = (500,) , limits=[0.0, 8000.0])
dataVX.mean("y2", binby="x", selection = True, shape = (500,) , limits=[0.0, 8000.0])
------------------------------------------------------------------------


In [118]:
# Using the numexpr where function, cap the minimum to -1000
UserEquation.value = 'where(y1*y2 / 10 < -1000 , -1000 ,y1*y2 / 10) '
updatePlot()

x_Binned = dataVX.mean(x , binby=x, selection = True, shape = (500,) , limits=[0.0, 8000.0])
dataVX.select((x > 0.0) & (x < 8000.0))
dataVX.select(y2 < 50, mode = 'and')
dataVX.mean("where(y1*y2 / 10 < -1000 , -1000 ,y1*y2 / 10) ", binby="x", selection = True, shape = (500,) , limits=[0.0, 8000.0])
dataVX.mean("y2", binby="x", selection = True, shape = (500,) , limits=[0.0, 8000.0])
------------------------------------------------------------------------


In [119]:
Userselect.value = ''
UserEquation.value = 'y1'
updatePlot()

x_Binned = dataVX.mean(x , binby=x, selection = True, shape = (500,) , limits=[0.0, 8000.0])
dataVX.select((x > 0.0) & (x < 8000.0))
dataVX.mean("y1", binby="x", selection = True, shape = (500,) , limits=[0.0, 8000.0])
dataVX.mean("y2", binby="x", selection = True, shape = (500,) , limits=[0.0, 8000.0])
------------------------------------------------------------------------
