## TODO

- Incorporate entity information using DataSubjectList class into GammaTensor
- Speedup `DataSubjectList.from_objs()`
- Make `value` private
- Add Lipschitz bound property (?)

In [6]:
!pip install flax jax

Collecting flax
  Using cached flax-0.4.0-py3-none-any.whl (176 kB)
Collecting jax
  Downloading jax-0.3.1.tar.gz (912 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m912.1/912.1 KB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting optax
  Using cached optax-0.1.1-py3-none-any.whl (136 kB)
Collecting msgpack
  Downloading msgpack-1.0.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (322 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m322.4/322.4 KB[0m [31m21.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting absl-py
  Using cached absl_py-1.0.0-py3-none-any.whl (126 kB)
Collecting opt_einsum
  Using cached opt_einsum-3.3.0-py3-none-any.whl (65 kB)
Collecting jaxlib>=0.1.37
  Downloading jaxlib-0.3.0-cp38-none-manylinux2010_x86_64.whl (65.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m65.4/65.4 MB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m:00:0

In [1]:
%load_ext autoreload
%autoreload 2

import syft as sy
import numpy as np

In [2]:
from syft.core.adp.entity import DataSubject

In [3]:
ishan = DataSubject(name="Ishan")
rasswanth = DataSubject(name="Rasswanth")

In [4]:
type([ishan, rasswanth] * 50)

list

In [5]:
tensor = sy.Tensor(np.random.randint(0, 5, size=(100), dtype=np.int32)).private(min_val=0, max_val=10, entities=[ishan, rasswanth] * 50)  

In [6]:
tensor.child

PhiTensor(child=(100,), min_vals=<lazyrepeatarray data: 0 -> shape: (100,)>, max_vals=<lazyrepeatarray data: 10 -> shape: (100,)>)

In [7]:
result = tensor.child.sum()

In [8]:
from syft.core.tensor.autodp.gamma_tensor import GammaTensor

In [9]:
GammaTensor(value=np.ones(10), data_subjects=["Ishan"], min_val=0, max_val=2)

GammaTensor(value=array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]), data_subjects=['Ishan'], min_val=0, max_val=2, func=<function no_op at 0x7f36ecc35f70>, id='1875487103', state={'1875487103': ...})

In [10]:
type(result)

syft.core.tensor.autodp.gamma_tensor.GammaTensor

In [11]:
result.value

205

In [12]:
tensor.child.child.sum()

205

In [13]:
tensor.child.max_vals.sum()

1000

In [14]:
result

GammaTensor(value=205, data_subjects=<syft.core.adp.entity_list.DataSubjectList object at 0x7f37b044e100>, min_val=0, max_val=1000, func=<function no_op at 0x7f36ecc35f70>, id='2125233442', state={'2125233442': ...})

In [15]:
size = 10
large_array = sy.Tensor(np.ones(size, dtype=np.int32)).private(min_val=0, max_val=2, entities=list(np.random.choice([ishan, rasswanth], size=size)))

In [16]:
from syft.core.tensor.autodp.ndim_entity_phi import PhiTensor as NDEPT

In [19]:
from time import time

sizes = [10 ** e for e in range(6, 10)]

for size in sizes:
    # large_array = sy.Tensor(np.ones(size, dtype=np.int32)).private(min_val=0, max_val=2, entities=np.random.choice([ishan, rasswanth], size=size))
    
    t0 = time()
    data = np.ones(size, dtype=np.int32)
    tf = time() - t0
    print(f'Time to create data: {tf} seconds')
    t0 = time()
    entities = np.random.choice([ishan, rasswanth], size=size)
    tf = time() - t0
    print(f'Time to create entity array: {tf} seconds')
    
    t0 = time()
    large_array = NDEPT(child=data, entities=ishan, min_vals=0, max_vals=2)
    tf = time() - t0
    print(f'Time taken to initialize NDEPT: {tf} seconds')
    
    t0 = time()
    result = large_array.sum()
    tf = time() - t0
    print(f'Time taken to sum {size/1e6} million: {tf} seconds!')
    assert result.value/size == 1.0
    # assert result.max_vals/size == 2.0
    

Time to create data: 0.0004870891571044922 seconds
Time to create entity array: 0.014275312423706055 seconds
Time taken to initialize NDEPT: 0.0001316070556640625 seconds
Time taken to sum 1.0 million: 0.0022726058959960938 seconds!
Time to create data: 0.004967451095581055 seconds
Time to create entity array: 0.12358593940734863 seconds
Time taken to initialize NDEPT: 0.00019288063049316406 seconds
Time taken to sum 10.0 million: 0.019536972045898438 seconds!
Time to create data: 0.04445028305053711 seconds
Time to create entity array: 1.0893456935882568 seconds
Time taken to initialize NDEPT: 0.00030040740966796875 seconds
Time taken to sum 100.0 million: 0.17731213569641113 seconds!
Time to create data: 0.4367334842681885 seconds
Time to create entity array: 10.805228233337402 seconds
Time taken to initialize NDEPT: 0.0007562637329101562 seconds
Time taken to sum 1000.0 million: 1.8253142833709717 seconds!


In [22]:
type(large_array)

syft.core.tensor.autodp.ndim_entity_phi.PhiTensor

In [27]:
%%timeit
# t0 = time()
s = sy.serialize(large_array)
# tf = time() - t0
# print(f'Time for serialization: {tf} seconds')

782 ms ± 35.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [31]:
large_array.shape[0]/1e9

1.0

In [29]:
from syft.util import size_mb

In [30]:
size_mb(s)

0.34996795654296875

In [32]:
t0 = time()
d = sy.deserialize(s)
tf = time() - t0
print(f'Time for serialization: {tf} seconds')

Time for serialization: 2.556856632232666 seconds


In [34]:
(d == large_array).child.all()

True

In [78]:
entities

array([<DataSubject:Rasswanth>, <DataSubject:Rasswanth>, <DataSubject:Ishan>, ...,
       <DataSubject:Rasswanth>, <DataSubject:Rasswanth>, <DataSubject:Rasswanth>],
      dtype=object)

In [32]:
large_array.entities.one_hot_lookup

array([<DataSubject:Ishan>, <DataSubject:Rasswanth>], dtype=object)

In [33]:
original_entities = large_array.entities.entities_indexed

In [34]:
original_entities.shape

(10000000,)

In [37]:
original_entities.reshape(1, len(original_entities))

array([[1, 0, 0, ..., 1, 1, 0]])