-
Notifications
You must be signed in to change notification settings - Fork 34
/
multi_attribute.py
111 lines (94 loc) · 4.26 KB
/
multi_attribute.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# multi_attribute.py
#
# LICENSE
#
# The MIT License
#
# Copyright (c) 2018 TileDB, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
# DESCRIPTION
#
# This is a part of the TileDB tutorial:
# https://docs.tiledb.io/en/latest/tutorials/multi-attribute-arrays.html
#
# When run, this program will create a simple 2D dense array with two
# attributes, write some data to it, and read a slice of the data back on
# (i) both attributes, and (ii) subselecting on only one of the attributes.
#
import numpy as np
import sys
import tiledb
# Name of the array to create.
array_name = "multi_attribute"
def create_array():
# Create a TileDB context
ctx = tiledb.Ctx()
# Check if the array already exists.
if tiledb.object_type(ctx, array_name) == "array":
return
# The array will be 4x4 with dimensions "rows" and "cols", with domain [1,4].
dom = tiledb.Domain(ctx,
tiledb.Dim(ctx, name="rows", domain=(1, 4), tile=4, dtype=np.int32),
tiledb.Dim(ctx, name="cols", domain=(1, 4), tile=4, dtype=np.int32))
# Add two attributes "a1" and "a2", so each (i,j) cell can store
# a character on "a1" and a vector of two floats on "a2".
schema = tiledb.ArraySchema(ctx, domain=dom, sparse=False,
attrs=[tiledb.Attr(ctx, name="a1", dtype=np.uint8),
tiledb.Attr(ctx, name="a2",
dtype=np.dtype([("", np.float32), ("", np.float32)]))])
# Create the (empty) array on disk.
tiledb.DenseArray.create(array_name, schema)
def write_array():
ctx = tiledb.Ctx()
# Open the array and write to it.
with tiledb.DenseArray(ctx, array_name, mode='w') as A:
data_a1 = np.array((list(map(ord, ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h',
'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p']))))
data_a2 = np.array(([(1.1, 1.2), (2.1, 2.2), (3.1, 3.2), (4.1, 4.2),
(5.1, 5.2), (6.1, 6.2), (7.1, 7.2), (8.1, 8.2),
(9.1, 9.2), (10.1, 10.2), (11.1, 11.2), (12.1, 12.2),
(13.1, 13.2), (14.1, 14.2), (15.1, 15.2), (16.1, 16.2)]),
dtype=[("", np.float32), ("", np.float32)])
A[:, :] = {"a1": data_a1, "a2": data_a2}
def read_array():
ctx = tiledb.Ctx()
# Open the array and read from it.
with tiledb.DenseArray(ctx, array_name, mode='r') as A:
# Slice only rows 1, 2 and cols 2, 3, 4.
data = A[1:3, 2:5]
print("Reading both attributes a1 and a2:")
a1, a2 = data["a1"].flat, data["a2"].flat
for i, v in enumerate(a1):
print("a1: '%s', a2: (%.1f, %.1f)" % (chr(v), a2[i][0], a2[i][1]))
def read_array_subselect():
ctx = tiledb.Ctx()
# Open the array and read from it.
with tiledb.DenseArray(ctx, array_name, mode='r') as A:
# Slice only rows 1, 2 and cols 2, 3, 4, attribute 'a1' only.
# We use the '.query()' syntax which allows attribute subselection.
data = A.query(attrs=["a1"])[1:3, 2:5]
print("Subselecting on attribute a1:")
for a in data["a1"].flat:
print("a1: '%s'" % chr(a))
create_array()
write_array()
read_array()
read_array_subselect()