## Testing virtual column and datetime dtype
**2021/08/12**

In [1]:
%run vaex_implementation_v4.py

In [2]:
x = np.array([1.5, 2.5, 3.5])
y = np.array([9.2, 10.5, 11.8])
df = vaex.from_arrays(x=x, y=y)
df

#,x,y
0,1.5,9.2
1,2.5,10.5
2,3.5,11.8


In [3]:
# Constructing a virtual column
df['v'] = df.x + df.y*10

In [4]:
df

#,x,y,v
0,1.5,9.2,93.5
1,2.5,10.5,107.5
2,3.5,11.8,121.5


In [5]:
df.v

Expression = v
Length: 3 dtype: float64 (column)
---------------------------------
0   93.5
1  107.5
2  121.5

In [6]:
df.x

Expression = x
Length: 3 dtype: float64 (column)
---------------------------------
0  1.5
1  2.5
2  3.5

In [8]:
df.add_virtual_column("r", "sqrt(x**2 + y**2)")

In [13]:
df.describe()

Unnamed: 0,x,y,v,r
data_type,float64,float64,float64,float64
count,3,3,3,3
,0,0,0,0
mean,2.5,10.5,107.5,10.807708179237125
std,0.816497,1.061446,11.430952,1.219335
min,1.5,9.2,93.5,9.321481
max,3.5,11.8,121.5,12.308127


In [14]:
df.describe(virtual = False)

Unnamed: 0,x,y
data_type,float64,float64
count,3,3
,0,0
mean,2.5,10.5
std,0.816497,1.061446
min,1.5,9.2
max,3.5,11.8


In [15]:
df.r

Expression = r
Length: 3 dtype: float64 (column)
---------------------------------
0  9.32148
1  10.7935
2  12.3081

In [16]:
# Try to call method from_datagrame on the dataframe with virtual column
from_dataframe_to_vaex(df)

#,x,y,v,r
0,1.5,9.2,93.5,9.32148
1,2.5,10.5,107.5,10.7935
2,3.5,11.8,121.5,12.3081


In [17]:
from_dataframe_to_vaex(df).describe(virtual = False)

Unnamed: 0,x,y,v,r
data_type,float64,float64,float64,float64
count,3,3,3,3
,0,0,0,0
mean,2.5,10.5,107.5,107.5
std,0.816497,1.061446,11.430952,11.430952
min,1.5,9.2,93.5,93.5
max,3.5,11.8,121.5,121.5


In [18]:
from_dataframe_to_vaex(df).r

Expression = r
Length: 3 dtype: float64 (column)
---------------------------------
0  9.32148
1  10.7935
2  12.3081

In [19]:
# Looks like it works!

In [20]:
# Lets play around with datetime (not implemented yet!)

In [21]:
import vaex
import numpy as np
t = np.arange('2015-01-01', '2015-02-01', dtype=np.datetime64)
y = np.arange(len(t))
df2 = vaex.from_arrays(t=t, y=y)
df2

#,t,y
0,2015-01-01,0
1,2015-01-02,1
2,2015-01-03,2
3,2015-01-04,3
4,2015-01-05,4
...,...,...
26,2015-01-27,26
27,2015-01-28,27
28,2015-01-29,28
29,2015-01-30,29


In [22]:
from_dataframe_to_vaex(df2)

NotImplementedError: Data type datetime64[D] not handled yet

In [23]:
datetime = np.array([np.datetime64('2019-10-01'), np.datetime64('2020-10-01'), np.datetime64('2022-10-01')])
df3 = vaex.from_arrays(datetime=datetime)
df3

#,datetime
0,2019-10-01
1,2020-10-01
2,2022-10-01


In [24]:
from_dataframe_to_vaex(df3)

NotImplementedError: Data type datetime64[D] not handled yet