# Vaex- Reading And Processing Huge Datasets in seconds

### What is Vaex?
Vaex is a high performance Python library for lazy Out-of-Core DataFrames (similar to Pandas), to visualize and explore big tabular datasets. It calculates statistics such as mean, sum, count, standard deviation etc, on an N-dimensional grid for more than a billion (10^9) samples/rows per second. Visualization is done using histograms, density plots and 3d volume rendering, allowing interactive exploration of big data. Vaex uses memory mapping, zero memory copy policy and lazy computations for best performance (no memory wasted).

In [2]:
import vaex
import pandas as pd
import numpy as np
n_rows = 1000000
n_cols = 500
df = pd.DataFrame(np.random.randint(0, 100, size=(n_rows, n_cols)), columns=['col%d' % i for i in range(n_cols)])
df.head()

Unnamed: 0,col0,col1,col2,col3,col4,col5,col6,col7,col8,col9,...,col490,col491,col492,col493,col494,col495,col496,col497,col498,col499
0,73,24,29,33,45,97,17,49,23,81,...,4,25,9,79,82,7,10,6,95,84
1,88,50,69,77,5,30,81,16,59,56,...,78,86,44,21,65,25,69,27,58,98
2,48,26,44,92,57,10,3,39,48,15,...,41,20,92,82,17,49,55,77,27,72
3,57,0,91,36,29,42,5,51,72,80,...,46,43,32,60,7,13,95,4,83,37
4,67,3,24,55,77,43,17,67,20,9,...,77,74,39,7,93,31,44,39,59,36


In [3]:
df.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000000 entries, 0 to 999999
Columns: 500 entries, col0 to col499
dtypes: int32(500)
memory usage: 1.9 GB


# Creating Csv files

In [5]:
file_path = 'final_data_vaex_ex.csv'
df.to_csv(file_path, index=False)

# Create Hdf5 files
# Hierarchical Data Formats

In [6]:
vaex_df = vaex.from_csv(file_path, convert=True, chunk_size=5_000_000)

In [7]:
type(vaex_df)

vaex.dataframe.DataFrameLocal

# Read Hdf5 files using Vaex library

In [9]:
vaex_df = vaex.open('final_data_vaex_ex.csv.hdf5')

In [10]:
type(vaex_df)

vaex.dataframe.DataFrameLocal

In [11]:
vaex_df.head()

#,col0,col1,col2,col3,col4,col5,col6,col7,col8,col9,col10,col11,col12,col13,col14,col15,col16,col17,col18,col19,col20,col21,col22,col23,col24,col25,col26,col27,col28,col29,col30,col31,col32,col33,col34,col35,col36,col37,col38,col39,col40,col41,col42,col43,col44,col45,col46,col47,col48,col49,col50,col51,col52,col53,col54,col55,col56,col57,col58,col59,col60,col61,col62,col63,col64,col65,col66,col67,col68,col69,col70,col71,col72,col73,col74,col75,col76,col77,col78,col79,col80,col81,col82,col83,col84,col85,col86,col87,col88,col89,col90,col91,col92,col93,col94,col95,col96,col97,col98,col99,...,col400,col401,col402,col403,col404,col405,col406,col407,col408,col409,col410,col411,col412,col413,col414,col415,col416,col417,col418,col419,col420,col421,col422,col423,col424,col425,col426,col427,col428,col429,col430,col431,col432,col433,col434,col435,col436,col437,col438,col439,col440,col441,col442,col443,col444,col445,col446,col447,col448,col449,col450,col451,col452,col453,col454,col455,col456,col457,col458,col459,col460,col461,col462,col463,col464,col465,col466,col467,col468,col469,col470,col471,col472,col473,col474,col475,col476,col477,col478,col479,col480,col481,col482,col483,col484,col485,col486,col487,col488,col489,col490,col491,col492,col493,col494,col495,col496,col497,col498,col499
0,73,24,29,33,45,97,17,49,23,81,27,16,68,91,53,65,89,43,98,38,81,54,70,33,73,21,81,62,97,37,74,81,47,86,41,12,31,99,37,58,4,92,49,27,58,2,92,32,44,43,9,74,6,42,60,39,6,64,37,30,54,20,11,67,25,57,85,40,11,56,14,29,24,55,34,97,77,77,87,40,43,24,53,51,87,80,29,29,72,71,13,80,71,20,68,70,75,22,52,47,...,22,2,28,87,47,9,75,74,35,74,61,2,45,8,29,75,68,91,36,51,67,61,8,40,19,80,52,77,16,75,22,49,5,17,88,35,20,21,38,23,13,89,47,39,85,88,8,37,5,41,89,80,70,88,97,42,20,3,68,21,47,94,28,25,94,22,76,80,73,68,8,76,37,67,23,58,70,81,93,87,65,92,78,58,23,61,43,33,68,5,4,25,9,79,82,7,10,6,95,84
1,88,50,69,77,5,30,81,16,59,56,32,50,88,38,90,39,81,10,25,78,56,66,0,90,44,30,56,17,5,21,41,68,80,33,95,34,30,23,84,56,91,1,31,40,47,5,77,57,26,62,47,62,87,19,84,20,37,12,50,58,69,6,97,36,3,50,30,25,25,46,86,72,8,99,17,65,77,7,96,96,69,54,98,66,5,55,65,17,46,97,49,26,53,42,76,98,39,81,62,91,...,32,59,22,38,18,3,14,39,27,70,45,20,98,83,19,99,60,4,21,8,91,63,17,85,61,67,13,11,15,7,96,81,12,17,10,19,7,15,59,79,44,58,67,82,71,68,35,43,29,92,39,86,38,69,97,24,96,94,1,26,55,72,99,94,44,40,23,76,56,1,33,42,18,75,49,17,1,44,93,75,36,19,50,96,90,25,14,27,70,99,78,86,44,21,65,25,69,27,58,98
2,48,26,44,92,57,10,3,39,48,15,99,61,21,2,98,65,86,68,64,82,89,2,85,0,67,41,10,5,75,28,53,67,81,0,39,45,80,27,61,0,86,35,20,42,51,28,81,40,59,73,78,42,8,90,27,46,83,80,98,90,45,26,17,39,7,19,40,44,16,33,87,14,58,44,1,48,70,85,51,14,89,19,98,52,82,54,4,52,44,40,41,71,76,11,16,41,84,90,6,77,...,31,69,60,75,77,93,12,3,43,99,18,74,70,4,20,82,89,7,75,24,89,22,43,51,62,6,79,39,28,20,94,56,34,19,14,99,63,76,6,4,23,15,93,39,90,80,95,48,2,93,26,9,3,12,46,70,10,5,20,53,26,85,82,29,72,81,57,30,88,4,9,51,97,20,41,69,73,87,15,50,96,93,7,45,44,65,28,55,30,22,41,20,92,82,17,49,55,77,27,72
3,57,0,91,36,29,42,5,51,72,80,63,13,5,2,16,53,52,46,83,6,14,48,95,50,52,15,19,60,75,49,82,24,62,31,44,59,80,36,35,32,41,9,25,19,79,63,19,86,49,47,90,93,52,38,11,13,8,20,27,6,50,1,60,41,75,27,6,48,66,35,81,49,77,15,27,49,19,25,69,26,56,2,14,16,19,60,23,75,77,49,29,21,72,30,4,35,69,16,73,89,...,35,73,37,96,7,60,57,80,30,86,39,5,55,10,7,49,83,27,5,66,7,76,89,51,7,2,98,77,25,43,74,0,32,76,92,7,26,66,72,37,76,31,40,15,25,25,11,54,1,61,35,9,37,11,31,7,26,79,94,13,80,47,26,21,91,15,11,36,29,99,19,13,60,72,96,1,4,80,26,73,46,64,70,52,75,81,29,51,53,76,46,43,32,60,7,13,95,4,83,37
4,67,3,24,55,77,43,17,67,20,9,45,81,16,44,51,46,20,39,26,31,15,52,96,82,87,32,57,2,20,14,83,36,6,19,71,88,22,45,27,51,97,64,3,38,24,4,92,18,55,31,2,81,81,82,22,9,80,69,72,42,12,87,29,71,14,12,96,80,90,62,19,6,89,12,97,15,69,66,57,49,23,93,31,1,87,68,65,18,10,39,12,97,91,62,54,94,34,91,45,14,...,85,38,45,12,99,3,75,7,59,39,10,51,2,81,76,25,66,57,30,29,6,47,23,51,46,28,60,5,20,14,2,23,59,90,49,4,88,50,9,30,24,32,37,46,69,78,47,99,0,63,25,26,34,21,99,95,30,93,70,27,56,19,18,67,97,71,76,90,39,49,47,24,20,70,58,93,50,56,37,17,35,20,90,66,36,65,98,60,77,18,77,74,39,7,93,31,44,39,59,36
5,95,81,39,81,58,42,6,86,62,9,47,67,31,51,86,31,41,68,13,1,36,8,19,7,93,81,89,21,35,86,61,22,58,66,46,80,44,68,29,34,44,61,54,94,61,49,88,50,82,14,90,99,43,37,87,35,78,1,1,81,55,38,11,50,59,76,81,19,36,27,71,76,54,78,91,35,77,34,70,7,72,66,4,57,34,9,50,92,86,40,67,37,57,95,7,3,62,6,97,62,...,44,75,33,82,24,71,38,35,49,51,46,54,35,5,31,92,5,47,5,59,89,38,74,77,22,86,21,20,61,6,14,98,80,18,43,11,51,16,79,73,99,65,64,45,9,17,40,85,40,98,38,41,62,47,96,83,78,56,39,18,11,64,35,97,83,76,44,99,64,44,61,33,0,43,7,28,80,15,59,22,5,66,45,15,73,52,39,1,86,50,1,66,0,55,42,54,0,30,39,97
6,13,20,33,13,52,5,2,29,56,81,80,40,6,91,33,4,19,42,97,93,76,71,20,41,91,74,42,46,52,70,51,84,62,6,86,61,49,25,29,95,59,96,10,56,31,34,47,26,7,9,0,44,70,71,26,89,37,58,73,92,50,39,85,19,57,4,16,99,78,99,89,83,99,80,57,64,18,10,67,47,50,96,43,43,48,5,44,54,87,26,57,39,52,1,32,23,44,59,47,89,...,66,59,90,25,64,81,26,32,74,75,17,79,86,90,96,26,91,63,82,8,5,98,72,31,45,84,39,88,35,80,68,96,2,54,57,53,18,46,54,6,4,85,13,30,14,18,99,40,74,59,13,33,63,16,58,44,34,12,46,51,71,25,12,29,55,54,7,92,10,58,17,2,91,6,67,62,0,40,15,26,61,88,78,6,11,59,49,45,47,70,30,58,55,86,46,89,69,24,89,3
7,63,10,31,44,25,84,70,29,77,62,67,86,29,61,53,58,77,32,25,6,83,82,5,35,56,87,82,20,51,94,49,34,30,59,94,4,96,43,50,34,2,93,42,53,9,25,90,98,32,3,76,13,47,34,5,0,31,95,97,99,27,36,76,17,43,56,13,38,63,21,36,68,39,76,34,81,75,79,12,33,35,92,62,39,95,35,27,92,71,18,61,56,67,56,66,80,38,3,91,38,...,62,58,90,20,32,72,28,76,12,75,39,87,56,32,98,58,71,4,20,78,71,76,7,25,91,13,44,46,44,38,74,54,37,98,44,50,57,82,21,77,41,8,51,59,24,54,25,28,43,49,53,34,5,45,52,96,33,36,94,75,78,70,31,21,29,52,76,4,15,44,36,88,80,92,34,74,20,84,90,43,11,85,82,55,30,17,3,51,84,23,3,84,93,43,58,50,65,37,98,20
8,44,15,70,85,83,18,52,77,32,23,20,18,69,24,41,66,98,75,47,20,99,14,17,13,89,14,10,93,2,98,14,55,36,63,7,8,21,75,7,70,75,37,94,75,44,11,58,16,56,24,24,54,31,70,20,41,57,71,51,15,6,14,12,78,72,39,37,25,35,65,98,20,23,55,52,51,34,15,59,57,88,7,82,43,50,40,35,37,49,88,13,87,24,31,46,3,79,9,3,52,...,26,76,89,77,69,59,60,71,44,95,22,6,94,47,50,78,68,98,77,97,58,10,30,1,36,55,55,40,16,77,46,35,45,89,26,5,5,43,9,40,42,77,95,31,10,30,40,1,29,15,63,36,37,18,86,48,91,87,43,39,32,6,75,59,45,95,37,30,81,25,73,82,87,55,70,40,47,0,95,31,35,54,10,72,26,58,62,2,89,42,14,49,91,18,81,10,92,87,54,38
9,30,63,12,39,6,76,55,39,56,99,91,68,80,76,19,55,50,62,46,20,23,99,2,4,75,32,58,53,54,59,59,26,23,63,86,4,61,31,79,72,97,5,65,99,11,6,29,32,49,5,8,4,26,50,72,67,58,36,65,36,91,50,19,53,31,78,36,75,25,42,42,31,11,53,58,78,43,30,90,38,74,11,84,30,20,45,41,21,48,82,17,68,48,23,95,71,90,38,79,81,...,9,49,64,62,56,10,56,57,23,66,61,54,78,23,68,5,36,50,73,41,59,10,29,1,29,1,32,41,87,78,10,69,44,94,86,41,55,93,25,7,87,22,13,86,34,64,90,16,55,72,91,17,89,48,15,40,75,80,81,43,16,67,73,49,42,58,13,74,9,61,2,40,47,63,33,97,26,29,56,57,47,71,3,81,87,15,12,40,20,85,59,66,2,50,25,90,81,83,0,38


# Expression system
Don't waste memory or time with feature engineering, we (lazily) transform your data when needed.

In [15]:
%%time
vaex_df['multiplication_col13']=vaex_df.col1*vaex_df.col3

CPU times: total: 0 ns
Wall time: 6.44 ms


In [16]:
vaex_df['multiplication_col13'] # to see it works fast

Expression = multiplication_col13
Length: 1,000,000 dtype: int64 (column)
---------------------------------------
     0   792
     1  3850
     2  2392
     3     0
     4   165
    ...     
999995  4489
999996  2550
999997   294
999998  2838
999999  2788

#  Out-of-core DataFrame
Filtering and evaluating expressions will not waste memory by making copies; the data is kept untouched on disk, and will be streamed only when needed. Delay the time before you need a cluster.

In [17]:
vaex_df[vaex_df.col2>70]

#,col0,col1,col2,col3,col4,col5,col6,col7,col8,col9,col10,col11,col12,col13,col14,col15,col16,col17,col18,col19,col20,col21,col22,col23,col24,col25,col26,col27,col28,col29,col30,col31,col32,col33,col34,col35,col36,col37,col38,col39,col40,col41,col42,col43,col44,col45,col46,col47,col48,col49,col50,col51,col52,col53,col54,col55,col56,col57,col58,col59,col60,col61,col62,col63,col64,col65,col66,col67,col68,col69,col70,col71,col72,col73,col74,col75,col76,col77,col78,col79,col80,col81,col82,col83,col84,col85,col86,col87,col88,col89,col90,col91,col92,col93,col94,col95,col96,col97,col98,col99,...,col401,col402,col403,col404,col405,col406,col407,col408,col409,col410,col411,col412,col413,col414,col415,col416,col417,col418,col419,col420,col421,col422,col423,col424,col425,col426,col427,col428,col429,col430,col431,col432,col433,col434,col435,col436,col437,col438,col439,col440,col441,col442,col443,col444,col445,col446,col447,col448,col449,col450,col451,col452,col453,col454,col455,col456,col457,col458,col459,col460,col461,col462,col463,col464,col465,col466,col467,col468,col469,col470,col471,col472,col473,col474,col475,col476,col477,col478,col479,col480,col481,col482,col483,col484,col485,col486,col487,col488,col489,col490,col491,col492,col493,col494,col495,col496,col497,col498,col499,multiplication_col13
0,57,0,91,36,29,42,5,51,72,80,63,13,5,2,16,53,52,46,83,6,14,48,95,50,52,15,19,60,75,49,82,24,62,31,44,59,80,36,35,32,41,9,25,19,79,63,19,86,49,47,90,93,52,38,11,13,8,20,27,6,50,1,60,41,75,27,6,48,66,35,81,49,77,15,27,49,19,25,69,26,56,2,14,16,19,60,23,75,77,49,29,21,72,30,4,35,69,16,73,89,...,73,37,96,7,60,57,80,30,86,39,5,55,10,7,49,83,27,5,66,7,76,89,51,7,2,98,77,25,43,74,0,32,76,92,7,26,66,72,37,76,31,40,15,25,25,11,54,1,61,35,9,37,11,31,7,26,79,94,13,80,47,26,21,91,15,11,36,29,99,19,13,60,72,96,1,4,80,26,73,46,64,70,52,75,81,29,51,53,76,46,43,32,60,7,13,95,4,83,37,0.0
1,81,59,75,19,33,65,36,88,67,32,72,76,61,80,12,38,62,78,25,3,22,91,44,96,50,47,58,56,41,35,13,34,98,44,74,77,61,34,26,47,0,31,89,44,56,72,3,17,79,82,33,77,7,20,49,66,21,20,87,84,92,78,64,61,4,99,87,61,51,1,28,97,36,99,76,25,28,58,46,42,49,14,22,55,21,22,3,93,74,8,34,42,28,98,34,74,64,55,72,98,...,58,70,56,63,35,88,33,99,88,40,46,65,5,11,78,51,87,91,20,9,94,16,83,66,56,67,85,77,42,97,78,70,33,56,41,63,70,92,62,82,72,15,72,54,22,99,48,12,68,93,46,35,76,60,26,76,93,74,62,93,7,15,91,5,41,14,35,34,4,18,90,48,27,26,28,34,87,99,79,87,90,73,24,94,86,49,60,8,75,87,50,6,53,86,57,93,23,76,66,1121.0
2,60,12,72,63,30,75,71,55,30,81,98,94,70,84,42,82,21,98,68,88,86,77,29,55,37,2,59,65,15,17,41,73,67,91,95,18,82,36,78,29,21,11,99,39,12,52,9,99,91,47,25,44,53,6,60,5,8,94,10,55,8,42,78,96,60,26,28,95,85,34,59,12,62,47,92,56,83,80,86,2,89,12,6,36,83,82,71,72,67,59,48,33,46,17,43,46,24,58,60,72,...,89,53,52,7,18,68,26,80,85,99,79,96,27,82,85,57,91,16,42,2,16,47,79,22,1,76,87,34,91,74,71,76,71,86,30,23,59,40,84,17,17,83,28,63,36,68,13,42,64,96,96,15,22,92,92,81,76,21,65,76,71,50,30,89,23,37,68,65,32,0,67,68,53,72,92,28,34,60,19,54,20,20,90,92,82,44,18,29,1,98,25,63,32,75,97,68,9,92,19,756.0
3,35,99,80,25,58,2,0,89,48,65,67,36,52,81,70,59,0,84,98,59,22,53,35,1,6,70,17,17,32,2,78,72,14,96,89,42,81,16,32,95,72,1,85,84,20,71,18,67,76,28,6,95,2,72,15,59,77,61,25,33,75,57,90,79,0,17,76,91,67,79,11,41,22,99,89,58,23,83,30,3,69,19,56,14,32,11,75,49,5,79,75,54,18,56,65,20,70,72,8,72,...,18,19,37,53,27,72,81,9,87,46,67,21,73,12,59,4,81,27,62,0,98,56,28,9,26,54,74,97,48,31,71,63,85,53,51,72,37,49,29,76,59,45,97,42,39,60,57,59,47,3,7,43,24,88,53,70,3,49,81,14,83,26,10,45,69,98,37,97,16,38,49,29,94,90,23,61,56,88,18,60,81,11,38,63,38,26,58,5,49,17,87,52,66,45,0,55,22,28,22,2475.0
4,50,5,78,46,25,48,45,76,47,86,10,66,63,55,59,53,12,76,17,64,18,46,56,51,41,86,65,67,16,33,55,38,38,63,42,37,49,29,81,95,96,59,70,99,96,43,22,84,84,18,18,17,11,69,24,36,4,85,93,38,9,47,92,60,98,97,40,35,58,21,45,21,14,46,50,33,74,59,50,67,21,46,49,87,0,56,41,85,6,79,4,40,56,43,97,1,75,15,28,38,...,34,73,91,65,94,42,58,85,53,27,5,77,0,82,67,4,61,1,63,22,70,55,10,97,74,21,56,21,80,55,2,44,38,13,10,1,74,17,2,71,60,7,65,83,33,57,64,87,90,42,22,90,24,90,19,52,59,66,63,10,8,92,5,21,5,80,65,12,95,24,61,42,76,48,61,28,30,28,87,52,45,58,73,38,62,87,83,53,38,31,66,77,7,93,43,51,78,91,61,230.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,5605.0
289858,89,59,74,95,78,38,4,91,1,42,60,66,99,2,12,50,39,66,49,78,29,61,21,53,99,9,14,54,8,40,37,54,88,42,9,69,56,39,20,33,9,92,49,2,77,84,55,95,17,87,5,16,99,18,20,6,15,41,40,87,43,68,41,53,89,47,70,96,75,95,19,18,12,59,66,90,66,99,59,82,43,95,36,23,75,8,64,83,19,20,77,50,1,15,19,41,15,60,61,81,...,89,56,64,30,3,91,15,54,86,87,33,34,56,27,9,94,49,56,9,93,23,32,29,87,79,10,81,42,15,59,66,40,84,3,17,4,43,21,17,35,97,21,72,68,8,2,82,23,34,7,87,51,37,45,4,19,69,22,50,83,77,37,8,39,48,37,91,17,50,19,40,34,43,54,2,41,87,71,30,90,46,33,98,58,35,78,40,20,43,40,1,29,56,60,28,27,81,62,62,8448.0
289859,73,88,92,96,54,16,40,76,66,52,19,79,47,64,0,51,86,71,94,2,22,53,93,26,15,4,27,51,37,1,72,54,55,77,25,81,4,87,14,61,94,53,44,82,69,90,63,9,51,39,4,2,82,79,79,81,31,3,12,42,68,31,32,90,55,29,12,55,57,45,24,44,21,90,58,58,73,70,24,80,25,12,54,12,76,53,65,10,62,5,10,5,15,90,1,49,18,36,66,73,...,70,6,81,19,25,85,60,23,14,10,50,40,2,58,91,94,72,72,52,0,40,8,62,23,78,89,99,81,38,97,63,22,96,64,16,1,39,34,91,70,93,52,44,43,82,20,25,58,87,79,40,94,58,75,22,71,80,43,91,76,0,49,96,33,57,35,92,75,43,47,86,32,50,67,95,42,46,95,92,7,86,74,62,65,26,74,32,66,78,18,37,7,2,97,89,63,3,72,56,3060.0
289860,58,36,73,85,39,55,7,30,2,22,7,52,42,64,88,99,52,18,72,19,32,48,1,75,12,70,12,75,72,65,69,87,66,94,49,85,92,13,6,66,95,61,19,86,94,66,20,34,26,66,25,56,42,47,46,51,16,36,37,84,71,69,15,32,57,53,1,7,71,42,14,4,79,24,32,93,67,16,67,89,55,87,5,95,81,83,31,72,70,60,55,64,88,79,72,61,59,57,89,64,...,55,75,8,94,11,12,19,69,27,75,22,84,12,53,75,30,80,5,2,32,3,55,83,31,56,27,98,92,55,11,58,24,91,10,5,27,76,20,53,65,91,74,61,59,96,8,4,23,73,80,90,55,89,60,91,36,67,88,54,46,56,77,61,35,91,13,73,47,17,27,35,97,23,18,58,20,10,24,75,64,98,26,71,71,16,14,91,80,45,88,19,3,88,74,32,34,11,48,49,435.0
289861,82,15,79,29,13,61,77,57,80,9,57,89,77,24,11,29,2,19,87,13,72,42,28,68,82,44,52,36,96,16,94,0,77,91,64,32,89,41,33,97,12,57,76,32,91,89,60,19,17,32,53,2,85,32,69,20,0,75,55,4,50,92,82,74,3,96,29,42,68,48,89,8,27,30,3,42,6,28,6,90,92,45,92,73,13,74,27,43,90,98,76,51,50,49,97,96,10,44,70,25,...,78,89,83,71,25,54,67,27,68,98,36,1,39,23,9,94,12,84,88,27,60,53,8,5,47,54,87,22,90,16,95,70,94,19,97,11,26,62,47,68,39,88,7,88,54,54,51,53,0,89,76,88,21,81,39,10,58,95,9,94,44,91,97,22,55,86,9,9,35,21,44,57,38,30,30,55,53,55,78,43,16,44,11,39,12,23,79,52,17,47,76,65,49,82,71,27,20,95,89,2550.0


In [18]:
dff=vaex_df[vaex_df.col2>70]  ##Here Filtering will not make a memory copy
### All the agorithms work out of core, the limit is the size of your harddriver
dff.col2.minmax(progress='widget')

HBox(children=(FloatProgress(value=0.0, max=1.0), Label(value='In progress...')))

array([71, 99], dtype=int64)

# Fast groupby / aggregations
Vaex implements parallelized, highly performant groupby operations, especially when using categories (>1 billion/second).

In [19]:
%%time
vaex_df_group=vaex_df.groupby(vaex_df.col1,agg=vaex.agg.mean(vaex_df.col4))
vaex_df_group

CPU times: total: 78.1 ms
Wall time: 139 ms


#,col1,col4_mean
0,0,49.969374302809044
1,1,49.9768821603928
2,2,49.42696629213483
3,3,49.005525972068725
4,4,49.556404405198926
...,...,...
95,95,49.48676979442296
96,96,49.26987134736212
97,97,50.211090169626026
98,98,49.4478527607362


In [20]:
%%time
vaex_df.groupby(vaex_df.col1,agg='count')

CPU times: total: 0 ns
Wall time: 78.7 ms


#,col1,count
0,0,9861
1,1,9776
2,2,9879
3,3,9953
4,4,10079
...,...,...
95,95,9826
96,96,10027
97,97,10081
98,98,10106
