# Vaex- Reading And Processing Huge Datasets in seconds

## What is Vaex?

Vaex is a high performance Python library for lazy Out-of-Core DataFrames (similar to Pandas), to visualize and explore big tabular datasets. It calculates statistics such as mean, sum, count, standard deviation etc, on an N-dimensional grid for more than a billion (10^9) samples/rows per second. Visualization is done using histograms, density plots and 3d volume rendering, allowing interactive exploration of big data. Vaex uses memory mapping, zero memory copy policy and lazy computations for best performance (no memory wasted).

In [1]:
import vaex
import pandas as pd
import numpy as np

In [2]:
n_rows = 1000000
n_cols = 500
df = pd.DataFrame(np.random.randint(0, 100, size=(n_rows, n_cols)), columns=['col%d' % i for i in range(n_cols)])

In [3]:
df.head()

Unnamed: 0,col0,col1,col2,col3,col4,col5,col6,col7,col8,col9,...,col490,col491,col492,col493,col494,col495,col496,col497,col498,col499
0,44,26,65,59,98,69,35,61,81,24,...,56,71,44,64,14,71,2,0,92,15
1,25,71,76,49,9,63,91,71,20,33,...,18,36,10,95,98,67,47,89,89,10
2,70,40,87,22,2,9,37,68,87,37,...,84,18,41,64,73,17,6,74,7,26
3,50,13,59,6,72,63,63,68,17,93,...,16,83,45,17,73,5,96,17,23,64
4,74,13,27,32,29,48,76,32,79,34,...,69,87,15,4,78,61,46,68,25,27


In [4]:
df.info(memory_usage = 'deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000000 entries, 0 to 999999
Columns: 500 entries, col0 to col499
dtypes: int32(500)
memory usage: 1.9 GB


## Creating CSV File

In [5]:
file_path = 'final_data.csv'
df.to_csv(file_path, index = False)

## Creating Hdf5 file

In [6]:
vaex_df = vaex.from_csv(file_path, convert = True, chunk_size = 5_000_000)

In [7]:
type(vaex_df)

vaex.dataframe.DataFrameLocal

In [8]:
vaex_df.head()

#,col0,col1,col2,col3,col4,col5,col6,col7,col8,col9,col10,col11,col12,col13,col14,col15,col16,col17,col18,col19,col20,col21,col22,col23,col24,col25,col26,col27,col28,col29,col30,col31,col32,col33,col34,col35,col36,col37,col38,col39,col40,col41,col42,col43,col44,col45,col46,col47,col48,col49,col50,col51,col52,col53,col54,col55,col56,col57,col58,col59,col60,col61,col62,col63,col64,col65,col66,col67,col68,col69,col70,col71,col72,col73,col74,col75,col76,col77,col78,col79,col80,col81,col82,col83,col84,col85,col86,col87,col88,col89,col90,col91,col92,col93,col94,col95,col96,col97,col98,col99,...,col400,col401,col402,col403,col404,col405,col406,col407,col408,col409,col410,col411,col412,col413,col414,col415,col416,col417,col418,col419,col420,col421,col422,col423,col424,col425,col426,col427,col428,col429,col430,col431,col432,col433,col434,col435,col436,col437,col438,col439,col440,col441,col442,col443,col444,col445,col446,col447,col448,col449,col450,col451,col452,col453,col454,col455,col456,col457,col458,col459,col460,col461,col462,col463,col464,col465,col466,col467,col468,col469,col470,col471,col472,col473,col474,col475,col476,col477,col478,col479,col480,col481,col482,col483,col484,col485,col486,col487,col488,col489,col490,col491,col492,col493,col494,col495,col496,col497,col498,col499
0,44,26,65,59,98,69,35,61,81,24,48,29,29,35,3,4,21,30,76,92,72,98,46,44,2,11,20,89,82,86,71,91,74,48,83,39,28,45,23,16,4,30,57,36,30,60,34,81,4,82,4,34,2,93,48,1,34,31,10,42,54,36,81,36,59,82,53,61,38,44,43,2,58,52,33,85,97,10,30,13,80,39,76,90,90,65,27,8,87,96,18,58,63,98,56,20,15,64,63,0,...,60,37,65,1,2,38,23,30,94,29,24,33,90,94,62,50,65,86,13,91,80,70,17,99,13,42,41,95,37,65,0,9,60,32,33,81,83,5,89,92,6,96,8,29,77,26,14,2,39,28,64,27,60,65,94,8,91,7,27,30,67,16,76,23,61,19,55,68,64,28,29,54,67,96,55,94,67,64,96,43,84,45,82,89,89,84,16,3,73,50,56,71,44,64,14,71,2,0,92,15
1,25,71,76,49,9,63,91,71,20,33,74,0,17,23,40,34,19,11,8,18,79,96,79,22,10,9,70,67,19,30,84,29,22,40,77,14,84,59,31,41,15,79,27,87,44,21,70,73,78,32,46,80,32,90,39,89,87,33,72,59,67,18,57,53,90,79,9,35,6,79,31,99,30,53,55,97,13,13,52,1,57,6,96,12,89,24,71,63,98,81,71,2,45,69,98,15,27,62,84,74,...,95,31,0,84,5,69,51,46,85,15,22,6,5,63,25,89,83,20,50,85,94,11,53,36,0,71,91,9,72,36,77,58,65,64,91,52,76,58,81,18,58,74,38,33,53,61,42,56,28,56,52,13,58,48,73,56,35,56,76,72,8,76,6,37,3,32,11,72,64,46,68,93,58,4,96,89,74,85,11,40,79,56,30,43,12,51,83,99,11,13,18,36,10,95,98,67,47,89,89,10
2,70,40,87,22,2,9,37,68,87,37,90,87,38,3,20,13,82,21,47,65,72,18,23,35,9,25,47,86,40,18,3,60,65,24,30,39,82,21,0,72,6,33,22,91,92,79,5,25,97,82,93,19,56,68,53,86,76,43,61,91,21,98,4,92,62,95,30,25,8,29,47,96,74,5,4,73,67,66,51,25,27,75,35,20,78,20,62,84,61,42,54,26,76,35,10,26,20,38,70,32,...,76,68,28,83,90,75,76,60,93,10,42,40,65,6,29,63,52,32,68,2,46,79,20,56,8,85,43,54,94,32,44,67,54,50,4,58,24,41,97,1,1,44,39,13,40,50,57,46,8,91,61,84,66,45,78,41,56,2,3,7,68,64,10,90,25,15,84,28,3,66,73,79,38,34,88,2,31,41,35,56,84,49,97,6,68,67,32,40,21,22,84,18,41,64,73,17,6,74,7,26
3,50,13,59,6,72,63,63,68,17,93,54,13,56,88,32,72,35,61,32,80,55,24,71,99,88,82,44,54,59,73,10,80,94,11,61,78,23,55,62,47,26,59,17,1,86,1,6,61,66,36,6,0,56,96,88,29,99,23,93,54,83,94,54,72,96,65,98,7,63,76,20,76,14,54,72,41,22,96,38,67,54,17,66,47,63,33,83,82,75,4,66,99,32,65,37,96,48,67,18,10,...,58,79,18,54,8,14,6,86,97,19,82,47,50,22,8,37,45,46,29,37,71,99,89,90,90,79,58,64,7,97,44,40,85,2,9,53,2,95,29,19,76,28,38,28,45,51,87,92,73,61,57,98,60,22,14,26,27,37,25,58,59,79,0,97,28,56,26,85,75,86,81,61,42,41,59,98,61,1,51,15,80,3,54,67,50,89,20,20,47,26,16,83,45,17,73,5,96,17,23,64
4,74,13,27,32,29,48,76,32,79,34,29,49,53,15,66,76,67,31,1,36,87,54,85,33,19,56,9,21,53,72,60,44,43,27,70,12,46,99,10,42,52,56,55,12,67,32,73,22,80,73,0,6,3,5,93,22,57,33,97,24,47,52,52,18,56,82,63,13,32,66,70,51,69,92,18,69,0,11,58,33,65,30,80,73,64,52,69,40,93,97,84,50,94,36,91,92,25,41,37,22,...,92,17,89,90,36,97,57,91,32,31,3,25,85,15,36,37,65,36,41,3,70,20,3,9,24,63,82,67,80,41,40,12,59,57,10,61,96,1,86,3,38,54,77,83,12,73,74,12,62,28,8,1,22,90,92,12,89,68,25,1,42,73,67,76,45,23,11,37,18,51,94,75,32,91,67,1,64,3,75,40,34,59,60,67,39,85,17,2,21,32,69,87,15,4,78,61,46,68,25,27
5,91,11,72,37,61,31,53,68,70,65,66,55,75,11,55,59,88,86,84,14,42,32,43,96,53,91,68,61,75,83,16,79,61,1,25,4,32,53,19,77,26,70,47,36,58,0,38,9,96,39,51,62,8,56,1,70,18,4,47,5,97,95,65,58,27,13,12,29,22,26,96,83,34,69,73,51,85,43,8,8,59,1,19,13,8,17,59,63,8,99,34,24,24,41,74,42,64,60,59,78,...,92,32,16,33,67,94,32,23,58,38,6,91,34,0,37,27,64,45,93,38,25,91,95,70,68,73,59,19,89,63,36,2,63,51,93,82,78,94,62,68,73,84,51,18,17,5,94,79,82,22,8,55,36,1,17,17,44,54,89,39,17,88,51,34,70,45,80,93,87,43,1,78,83,75,98,54,99,16,17,75,9,76,98,37,96,59,78,13,25,42,56,15,33,49,8,93,49,82,67,18
6,17,72,46,72,26,20,86,61,19,96,23,22,35,1,64,58,87,78,1,25,37,0,68,37,22,3,51,33,21,4,13,75,11,57,67,27,48,90,21,91,50,99,38,93,3,50,51,68,67,40,14,71,55,0,22,98,63,86,18,46,50,97,14,59,43,56,59,67,37,46,26,34,60,15,9,6,22,68,67,69,13,87,58,96,18,60,30,75,74,50,25,84,20,97,28,96,33,21,39,84,...,14,32,49,51,84,2,49,93,20,59,89,36,92,60,5,73,46,21,25,45,76,41,22,77,32,92,66,96,15,78,38,0,94,64,42,21,12,33,21,85,55,54,3,64,23,79,74,79,18,79,53,74,75,33,19,43,89,63,33,82,42,22,21,19,30,36,93,54,34,31,28,58,18,66,47,60,15,11,33,49,38,39,35,6,22,53,28,85,82,12,8,22,88,66,22,56,86,55,29,46
7,31,83,94,50,44,69,50,91,81,26,77,32,5,85,53,45,3,52,86,10,14,2,58,19,18,51,31,10,19,3,81,76,29,20,73,80,19,33,46,11,32,89,54,9,29,6,87,82,5,16,86,95,73,54,73,82,43,80,44,20,56,34,45,73,83,49,43,60,62,40,62,61,65,59,7,53,96,26,96,5,20,8,16,13,48,50,78,27,62,79,12,3,24,32,29,75,66,47,34,27,...,91,22,32,53,29,0,2,22,85,44,96,46,87,89,1,73,75,57,56,38,14,35,96,21,46,35,19,93,12,11,83,38,57,77,83,43,7,6,93,26,13,96,81,63,65,17,68,84,21,20,71,85,37,4,19,76,94,6,2,7,94,84,79,58,7,98,76,91,85,46,89,0,48,0,70,13,52,25,81,91,27,32,8,47,24,0,23,82,14,47,36,11,34,20,16,95,97,48,54,27
8,87,77,61,2,95,25,84,38,68,54,13,32,19,84,85,38,38,95,4,94,98,94,84,35,47,68,48,55,18,19,75,49,89,64,3,16,84,2,65,54,86,2,91,9,41,95,55,21,86,58,0,22,43,29,2,74,72,19,7,66,9,16,26,58,27,92,86,86,44,26,85,88,29,1,57,80,39,96,13,68,48,52,5,91,24,27,43,51,21,80,11,57,23,43,53,37,86,27,5,84,...,50,77,10,24,30,21,32,5,97,62,50,1,2,13,20,19,35,43,83,48,28,93,75,14,45,19,58,75,64,96,56,19,5,43,84,45,44,57,37,74,82,74,78,31,28,82,76,20,0,43,55,19,29,39,69,45,41,77,77,5,14,70,10,81,4,16,9,12,64,47,38,88,82,31,18,7,89,89,60,39,94,28,86,12,55,52,52,95,30,74,78,59,59,13,81,14,58,51,74,4
9,0,43,16,20,61,73,5,43,24,20,6,45,95,80,49,46,85,28,39,88,7,77,20,72,94,20,58,88,74,27,61,74,17,45,41,13,14,69,99,38,32,79,7,59,69,3,57,26,99,4,31,86,64,27,33,7,67,61,76,13,67,40,76,62,32,61,71,66,62,55,87,39,62,23,26,17,83,97,22,31,82,20,49,1,26,5,25,14,62,1,3,19,96,12,9,58,6,51,40,82,...,1,4,67,84,92,90,69,16,36,85,75,49,34,85,9,8,77,66,49,47,90,40,47,2,69,75,71,85,69,16,3,8,81,40,34,58,73,70,6,4,76,31,47,43,10,30,79,19,59,51,74,77,18,88,31,56,83,55,14,64,42,0,43,67,52,67,77,91,89,35,81,28,98,48,63,45,86,92,15,95,8,6,74,78,85,18,80,18,35,20,89,36,28,23,47,65,3,43,91,86


### Expression system

Don't waste memory or time with feature engineering, we (lazily) transform your data when needed.

In [9]:
%%time
vaex_df['multiplication_col13'] = vaex_df.col1 * vaex_df.col3

CPU times: total: 0 ns
Wall time: 6.98 ms


In [10]:
vaex_df['multiplication_col13']

Expression = multiplication_col13
Length: 1,000,000 dtype: int64 (column)
---------------------------------------
     0  1534
     1  3479
     2   880
     3    78
     4   416
    ...     
999995  1780
999996  1196
999997  6600
999998   684
999999  1950

## Out-of-core DataFrame

Filtering and evaluating expressions will not waste memory by making copies; the data is kept untouched on disk, and will be streamed only when needed. Delay the time before you need a cluster.

In [11]:
vaex_df[vaex_df.col2 > 70]

#,col0,col1,col2,col3,col4,col5,col6,col7,col8,col9,col10,col11,col12,col13,col14,col15,col16,col17,col18,col19,col20,col21,col22,col23,col24,col25,col26,col27,col28,col29,col30,col31,col32,col33,col34,col35,col36,col37,col38,col39,col40,col41,col42,col43,col44,col45,col46,col47,col48,col49,col50,col51,col52,col53,col54,col55,col56,col57,col58,col59,col60,col61,col62,col63,col64,col65,col66,col67,col68,col69,col70,col71,col72,col73,col74,col75,col76,col77,col78,col79,col80,col81,col82,col83,col84,col85,col86,col87,col88,col89,col90,col91,col92,col93,col94,col95,col96,col97,col98,col99,...,col401,col402,col403,col404,col405,col406,col407,col408,col409,col410,col411,col412,col413,col414,col415,col416,col417,col418,col419,col420,col421,col422,col423,col424,col425,col426,col427,col428,col429,col430,col431,col432,col433,col434,col435,col436,col437,col438,col439,col440,col441,col442,col443,col444,col445,col446,col447,col448,col449,col450,col451,col452,col453,col454,col455,col456,col457,col458,col459,col460,col461,col462,col463,col464,col465,col466,col467,col468,col469,col470,col471,col472,col473,col474,col475,col476,col477,col478,col479,col480,col481,col482,col483,col484,col485,col486,col487,col488,col489,col490,col491,col492,col493,col494,col495,col496,col497,col498,col499,multiplication_col13
0,25,71,76,49,9,63,91,71,20,33,74,0,17,23,40,34,19,11,8,18,79,96,79,22,10,9,70,67,19,30,84,29,22,40,77,14,84,59,31,41,15,79,27,87,44,21,70,73,78,32,46,80,32,90,39,89,87,33,72,59,67,18,57,53,90,79,9,35,6,79,31,99,30,53,55,97,13,13,52,1,57,6,96,12,89,24,71,63,98,81,71,2,45,69,98,15,27,62,84,74,...,31,0,84,5,69,51,46,85,15,22,6,5,63,25,89,83,20,50,85,94,11,53,36,0,71,91,9,72,36,77,58,65,64,91,52,76,58,81,18,58,74,38,33,53,61,42,56,28,56,52,13,58,48,73,56,35,56,76,72,8,76,6,37,3,32,11,72,64,46,68,93,58,4,96,89,74,85,11,40,79,56,30,43,12,51,83,99,11,13,18,36,10,95,98,67,47,89,89,10,3479.0
1,70,40,87,22,2,9,37,68,87,37,90,87,38,3,20,13,82,21,47,65,72,18,23,35,9,25,47,86,40,18,3,60,65,24,30,39,82,21,0,72,6,33,22,91,92,79,5,25,97,82,93,19,56,68,53,86,76,43,61,91,21,98,4,92,62,95,30,25,8,29,47,96,74,5,4,73,67,66,51,25,27,75,35,20,78,20,62,84,61,42,54,26,76,35,10,26,20,38,70,32,...,68,28,83,90,75,76,60,93,10,42,40,65,6,29,63,52,32,68,2,46,79,20,56,8,85,43,54,94,32,44,67,54,50,4,58,24,41,97,1,1,44,39,13,40,50,57,46,8,91,61,84,66,45,78,41,56,2,3,7,68,64,10,90,25,15,84,28,3,66,73,79,38,34,88,2,31,41,35,56,84,49,97,6,68,67,32,40,21,22,84,18,41,64,73,17,6,74,7,26,880.0
2,91,11,72,37,61,31,53,68,70,65,66,55,75,11,55,59,88,86,84,14,42,32,43,96,53,91,68,61,75,83,16,79,61,1,25,4,32,53,19,77,26,70,47,36,58,0,38,9,96,39,51,62,8,56,1,70,18,4,47,5,97,95,65,58,27,13,12,29,22,26,96,83,34,69,73,51,85,43,8,8,59,1,19,13,8,17,59,63,8,99,34,24,24,41,74,42,64,60,59,78,...,32,16,33,67,94,32,23,58,38,6,91,34,0,37,27,64,45,93,38,25,91,95,70,68,73,59,19,89,63,36,2,63,51,93,82,78,94,62,68,73,84,51,18,17,5,94,79,82,22,8,55,36,1,17,17,44,54,89,39,17,88,51,34,70,45,80,93,87,43,1,78,83,75,98,54,99,16,17,75,9,76,98,37,96,59,78,13,25,42,56,15,33,49,8,93,49,82,67,18,407.0
3,31,83,94,50,44,69,50,91,81,26,77,32,5,85,53,45,3,52,86,10,14,2,58,19,18,51,31,10,19,3,81,76,29,20,73,80,19,33,46,11,32,89,54,9,29,6,87,82,5,16,86,95,73,54,73,82,43,80,44,20,56,34,45,73,83,49,43,60,62,40,62,61,65,59,7,53,96,26,96,5,20,8,16,13,48,50,78,27,62,79,12,3,24,32,29,75,66,47,34,27,...,22,32,53,29,0,2,22,85,44,96,46,87,89,1,73,75,57,56,38,14,35,96,21,46,35,19,93,12,11,83,38,57,77,83,43,7,6,93,26,13,96,81,63,65,17,68,84,21,20,71,85,37,4,19,76,94,6,2,7,94,84,79,58,7,98,76,91,85,46,89,0,48,0,70,13,52,25,81,91,27,32,8,47,24,0,23,82,14,47,36,11,34,20,16,95,97,48,54,27,4150.0
4,6,92,87,3,82,72,1,22,28,6,73,76,39,86,4,96,65,2,71,63,30,27,80,88,39,20,4,19,90,35,16,90,7,81,29,97,53,92,69,52,90,98,9,71,7,87,14,30,40,20,56,52,42,48,56,78,74,25,51,41,35,9,80,65,80,59,86,95,40,46,70,28,49,44,70,85,77,93,10,14,18,13,41,79,57,48,64,49,12,98,81,72,76,95,69,11,83,26,70,77,...,85,55,88,93,32,75,78,26,16,75,87,80,22,38,97,48,44,20,62,12,89,26,68,47,83,23,8,42,70,56,21,46,22,7,66,22,66,59,83,88,64,64,65,83,44,4,35,93,83,71,79,95,3,14,69,56,64,16,5,1,36,7,71,86,89,27,47,32,20,25,60,65,16,48,88,75,21,38,80,67,79,49,38,18,27,5,86,27,0,6,68,31,22,92,57,30,53,19,44,276.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,564.0
290428,44,47,76,12,47,24,45,81,56,82,26,93,67,7,66,96,56,82,41,81,37,22,95,55,47,47,63,28,30,64,27,96,78,7,61,51,28,44,90,33,96,1,62,35,66,47,82,84,71,32,88,79,62,86,93,28,82,34,62,61,94,1,14,58,43,58,97,70,84,49,23,57,23,68,70,12,30,21,18,38,28,57,78,4,38,33,65,38,26,17,69,90,23,0,32,95,54,51,31,71,...,76,21,59,85,42,76,90,40,86,52,59,10,60,30,78,5,44,84,98,21,92,69,82,31,41,60,62,58,24,19,8,60,63,45,82,91,20,26,99,19,96,92,38,36,56,27,21,51,24,4,39,45,50,42,50,57,23,85,18,7,61,4,97,95,27,12,71,19,75,38,7,68,24,2,71,14,99,30,61,1,7,57,57,27,82,71,58,13,57,40,40,13,17,57,72,8,84,65,40,6786.0
290429,41,78,93,87,45,7,14,9,97,76,83,29,46,47,66,2,88,20,81,27,84,80,74,92,89,23,12,8,20,99,94,18,61,47,16,97,82,72,31,1,65,56,26,57,95,0,15,88,31,69,7,8,60,28,72,30,83,38,29,37,27,88,15,43,90,23,25,22,96,53,41,99,88,15,12,84,15,53,23,31,65,60,10,38,30,93,8,32,76,30,61,19,37,24,61,60,20,9,57,57,...,10,97,62,31,19,41,8,98,43,74,6,9,23,92,36,4,80,44,57,9,46,56,9,84,91,36,16,79,83,98,98,26,78,33,61,46,57,23,32,28,60,76,96,31,54,87,10,60,95,16,51,83,41,79,67,72,0,15,15,6,54,92,40,94,14,83,1,24,67,98,73,39,87,18,14,96,51,45,28,37,39,4,1,38,20,12,84,60,3,86,82,55,19,92,7,15,81,38,89,1780.0
290430,16,89,93,20,10,98,59,46,22,56,92,2,8,81,75,31,2,6,54,53,7,59,42,77,1,37,0,60,49,81,22,46,56,2,17,53,37,91,12,78,20,81,97,76,97,63,29,89,81,23,19,24,9,31,63,66,91,47,94,3,8,17,49,32,14,33,37,43,78,64,17,45,16,17,46,89,47,38,43,64,95,40,21,65,40,58,87,40,25,30,87,31,44,57,25,70,46,19,31,26,...,19,27,85,98,13,71,35,73,15,24,36,49,32,9,76,89,56,70,9,15,37,41,47,34,62,31,92,97,38,68,18,99,13,25,87,75,76,22,95,6,57,26,73,69,57,70,73,65,12,81,40,27,19,68,7,46,56,20,3,56,59,47,98,15,79,93,38,77,18,22,83,79,77,36,83,77,85,1,27,40,97,31,50,37,88,28,77,10,37,47,29,60,41,86,33,24,47,2,90,6600.0
290431,16,88,78,75,35,76,79,81,69,2,15,25,37,96,38,66,45,4,31,53,49,64,62,64,89,71,15,50,82,13,52,87,9,15,54,21,37,25,28,87,17,61,51,95,92,87,4,43,3,85,3,83,29,12,12,95,97,18,15,16,55,72,98,92,80,50,20,23,56,56,11,88,62,26,21,28,47,4,25,53,69,84,38,75,29,26,73,4,20,57,79,87,42,14,54,11,45,17,11,66,...,27,40,86,68,4,30,83,67,51,10,72,61,79,6,75,5,5,46,26,35,15,0,46,18,0,67,71,86,57,80,52,93,76,67,6,94,88,19,9,48,76,84,21,38,14,35,62,55,33,69,2,88,37,31,49,86,30,71,15,31,34,70,16,88,64,6,30,74,73,74,62,55,10,56,54,69,90,81,49,95,6,65,25,82,50,74,36,18,15,93,45,77,89,70,96,28,79,94,39,1950.0


In [12]:
dff = vaex_df[vaex_df.col2 > 70]  # Here Filtering will not make a memory copy

In [13]:
### All the agorithms work out of core, the limit is the size of your harddriver
dff.col2.minmax(progress = 'widget')

HBox(children=(FloatProgress(value=0.0, max=1.0), Label(value='In progress...')))

array([71, 99], dtype=int64)

## Fast groupby / aggregations

Vaex implements parallelized, highly performant groupby operations, especially when using categories (>1 billion/second).

In [14]:
%%time
vaex_df_group = vaex_df.groupby(vaex_df.col1, agg = vaex.agg.mean(vaex_df.col4))
vaex_df_group

CPU times: total: 31.2 ms
Wall time: 165 ms


#,col1,col4_mean
0,0,50.21832030863587
1,1,49.48175035289373
2,2,49.03078002807299
3,3,48.738436679458694
4,4,49.74531499202552
...,...,...
95,95,49.08297958358601
96,96,49.15543314972459
97,97,49.28106899705315
98,98,49.40007926285544


In [15]:
%%time
vaex_df.groupby(vaex_df.col1, agg = 'count')

CPU times: total: 15.6 ms
Wall time: 35.9 ms


#,col1,count
0,0,10109
1,1,9918
2,2,9974
3,3,9902
4,4,10032
...,...,...
95,95,9894
96,96,9985
97,97,9841
98,98,10093
