-
Notifications
You must be signed in to change notification settings - Fork 6
/
test_func.py
43 lines (37 loc) · 1007 Bytes
/
test_func.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import pyarrow as pa
from pyarrow_ops import join, filters, groupby, head, drop_duplicates
# Create data
t = pa.Table.from_pydict({
'Animal': ['Falcon', 'Falcon', 'Parrot', 'Parrot', 'Parrot'],
'Max Speed': [380., 370., 24., 26., 24.]
})
print("Source:")
head(t)
# Drop duplicates
print("Drop duplicates:")
d = drop_duplicates(t, on=['Animal'], keep='first')
head(d)
# Groupby aggregations
print("Groupby loop:")
for key, value in groupby(t, ['Animal']):
print(key)
head(value)
print("Aggregrations:")
g = groupby(t, ['Animal']).median()
g = groupby(t, ['Animal']).sum()
g = groupby(t, ['Animal']).min()
g = groupby(t, ['Animal']).agg({'Max Speed': 'max'})
head(g)
# Filters
print("Filters:")
f = filters(t, ('Animal', '=', 'Falcon'))
f = filters(t, [('Animal', 'not in', ['Falcon', 'Duck']), ('Max Speed', '<', 25)])
head(f)
# Join operations
print("Join:")
t2 = pa.Table.from_pydict({
'Animal': ['Falcon', 'Parrot'],
'Age': [10, 20]
})
j = join(t, t2, on=['Animal'])
head(j)