In [1]:
from syft import nn
from syft import PhiTensor
from syft import GammaTensor
from syft import DataSubjectList
import numpy as np
from jax import numpy as jnp

  from .autonotebook import tqdm as notebook_tqdm


## Test out utils functions

In [2]:
np.log(jnp.ones(5))



array([0., 0., 0., 0., 0.])

In [3]:
np.log(np.e)

1.0

In [4]:
dsl = DataSubjectList(one_hot_lookup=np.array([0,1]), data_subjects_indexed=np.concatenate((np.zeros(10), np.ones(10))))

In [5]:
np.log(np.ones(10))

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [6]:
gt = GammaTensor(child=np.ones(10)*np.e,data_subjects=dsl ,min_val=1, max_val=5)

In [7]:
gt.log()

GammaTensor(child=array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]), data_subjects=<syft.core.adp.data_subject_list.DataSubjectList object at 0x7fcb5320cfa0>, min_val=<lazyrepeatarray data: 0.0 -> shape: (10,)>, max_val=<lazyrepeatarray data: 1.6094379124341003 -> shape: (10,)>, is_linear=True, func=<function GammaTensor.log.<locals>._log at 0x7fcb53242b80>, id='1942082071', state={'1744187340': GammaTensor(child=array([2.71828183, 2.71828183, 2.71828183, 2.71828183, 2.71828183,
       2.71828183, 2.71828183, 2.71828183, 2.71828183, 2.71828183]), data_subjects=<syft.core.adp.data_subject_list.DataSubjectList object at 0x7fcb5320cfa0>, min_val=1, max_val=5, is_linear=True, func=<function no_op at 0x7fcb517799d0>, id='1744187340', state={})})

In [8]:
nn.utils.dp_log(gt)

GammaTensor(child=array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]), data_subjects=<syft.core.adp.data_subject_list.DataSubjectList object at 0x7fcb5320cfa0>, min_val=<lazyrepeatarray data: 0.0 -> shape: (10,)>, max_val=<lazyrepeatarray data: 1.6094379124341003 -> shape: (10,)>, is_linear=True, func=<function GammaTensor.log.<locals>._log at 0x7fcb532ec040>, id='168211101', state={'1744187340': GammaTensor(child=array([2.71828183, 2.71828183, 2.71828183, 2.71828183, 2.71828183,
       2.71828183, 2.71828183, 2.71828183, 2.71828183, 2.71828183]), data_subjects=<syft.core.adp.data_subject_list.DataSubjectList object at 0x7fcb5320cfa0>, min_val=1, max_val=5, is_linear=True, func=<function no_op at 0x7fcb517799d0>, id='1744187340', state={})})

In [9]:
nn.utils.dp_zeros(gt.shape, gt.data_subjects)

GammaTensor(child=array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]), data_subjects=<syft.core.adp.data_subject_list.DataSubjectList object at 0x7fcb53247850>, min_val=<lazyrepeatarray data: 0.0 -> shape: (10,)>, max_val=<lazyrepeatarray data: 0.0 -> shape: (10,)>, is_linear=True, func=<function no_op at 0x7fcb517799d0>, id='640543573', state={})

## DataSubjectList Combination tests


**Case 1: Non-overlapping DSL**

In [10]:
dsl1 = DataSubjectList(
    one_hot_lookup=np.arange(10),
    data_subjects_indexed=np.random.choice(np.arange(10), size=(10, 10))
)

In [11]:
w = np.arange(100, 110)
dsl2 = DataSubjectList(
    one_hot_lookup = w,
    data_subjects_indexed=np.random.choice(np.arange(len(w)), size=(10, 10))
)

In [12]:
dsl1.one_hot_lookup, dsl2.one_hot_lookup

(array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
 array([100, 101, 102, 103, 104, 105, 106, 107, 108, 109]))

In [13]:
dsl1.data_subjects_indexed

array([[0, 8, 1, 8, 6, 4, 7, 1, 2, 9],
       [1, 2, 9, 9, 8, 6, 2, 0, 1, 4],
       [5, 5, 8, 9, 0, 9, 3, 3, 9, 6],
       [7, 9, 6, 7, 0, 1, 6, 8, 4, 9],
       [4, 6, 0, 6, 0, 8, 2, 8, 5, 4],
       [1, 6, 0, 4, 0, 0, 1, 0, 3, 0],
       [3, 8, 9, 2, 5, 5, 3, 9, 4, 3],
       [1, 7, 2, 7, 5, 2, 1, 3, 4, 7],
       [1, 3, 7, 0, 0, 8, 7, 8, 7, 9],
       [4, 6, 1, 8, 5, 2, 3, 7, 2, 7]])

In [14]:
dsl2.data_subjects_indexed

array([[4, 8, 8, 1, 9, 8, 3, 6, 2, 8],
       [6, 6, 0, 5, 3, 9, 8, 7, 6, 3],
       [3, 0, 2, 9, 4, 1, 1, 0, 0, 4],
       [9, 7, 6, 1, 6, 4, 5, 4, 2, 1],
       [9, 2, 7, 7, 8, 8, 5, 9, 1, 3],
       [7, 5, 3, 6, 7, 6, 6, 9, 1, 2],
       [2, 9, 2, 6, 2, 8, 3, 7, 7, 6],
       [7, 3, 4, 1, 5, 9, 0, 2, 6, 3],
       [1, 0, 5, 2, 3, 9, 0, 6, 7, 4],
       [5, 2, 5, 9, 2, 5, 3, 5, 2, 1]])

In [15]:
dsl3 = DataSubjectList.combine(dsl1, dsl2)

In [16]:
dsl3.one_hot_lookup

array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9, 100, 101, 102,
       103, 104, 105, 106, 107, 108, 109])

**Case 2: Fully overlapping DSL**

In [17]:
dsl4 = DataSubjectList.combine(dsl1, dsl1)

In [18]:
dsl4 == dsl1

True

In [19]:
(dsl4.data_subjects_indexed == dsl1.data_subjects_indexed).all()

True

**Case 3: Partially overlapping DSL**

In [20]:
dsl1 = DataSubjectList(
    one_hot_lookup=np.arange(10),
    data_subjects_indexed=np.random.choice(np.arange(10), size=(10, 10))
)

In [21]:
w = np.arange(7, 17)
dsl5 = DataSubjectList(
    one_hot_lookup = w,
    data_subjects_indexed=np.random.choice(np.arange(len(w)), size=(10, 10))
)

In [22]:
dsl1.one_hot_lookup, dsl5.one_hot_lookup

(array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
 array([ 7,  8,  9, 10, 11, 12, 13, 14, 15, 16]))

In [23]:
dsl5.data_subjects_indexed

array([[1, 0, 9, 8, 6, 5, 0, 4, 7, 3],
       [2, 6, 7, 8, 2, 4, 9, 0, 1, 6],
       [8, 4, 3, 4, 3, 6, 8, 2, 6, 9],
       [9, 0, 0, 8, 1, 1, 1, 8, 7, 6],
       [5, 8, 9, 6, 7, 3, 6, 0, 4, 3],
       [2, 0, 7, 4, 7, 6, 7, 2, 6, 7],
       [6, 6, 4, 9, 3, 4, 2, 3, 5, 8],
       [8, 7, 7, 1, 5, 6, 0, 5, 8, 0],
       [2, 8, 2, 9, 0, 4, 3, 2, 0, 0],
       [7, 4, 0, 5, 3, 3, 9, 4, 9, 2]])

In [24]:
dsl6 = DataSubjectList.combine(dsl1, dsl5)

In [25]:
dsl6.one_hot_lookup

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16])

In [26]:
dsl6.data_subjects_indexed

array([[[ 9,  2,  0,  8,  1,  2,  6,  3,  8,  2],
        [ 8,  6,  3,  1,  9,  3,  8,  2,  9,  3],
        [ 8,  2,  5,  7,  9,  6,  7,  3,  2,  8],
        [ 1,  0,  9,  4,  3,  9,  2,  6,  2,  4],
        [ 0,  4,  9,  3,  9,  3,  1,  6,  3,  3],
        [ 1,  6,  6,  9,  4,  8,  7,  0,  4,  5],
        [ 8,  3,  3,  2,  9,  2,  4,  2,  0,  8],
        [ 7,  9,  2,  7,  3,  9,  9,  1,  9,  4],
        [ 0,  6,  9,  9,  2,  9,  1,  7,  1,  4],
        [ 1,  4,  0,  9,  0,  1,  7,  6,  4,  7]],

       [[ 8,  7, 16, 15, 13, 12,  7, 11, 14, 10],
        [ 9, 13, 14, 15,  9, 11, 16,  7,  8, 13],
        [15, 11, 10, 11, 10, 13, 15,  9, 13, 16],
        [16,  7,  7, 15,  8,  8,  8, 15, 14, 13],
        [12, 15, 16, 13, 14, 10, 13,  7, 11, 10],
        [ 9,  7, 14, 11, 14, 13, 14,  9, 13, 14],
        [13, 13, 11, 16, 10, 11,  9, 10, 12, 15],
        [15, 14, 14,  8, 12, 13,  7, 12, 15,  7],
        [ 9, 15,  9, 16,  7, 11, 10,  9,  7,  7],
        [14, 11,  7, 12, 10, 10, 16, 11, 16,  9]

In [27]:
(dsl6.data_subjects_indexed[0] == dsl1.data_subjects_indexed).all()

True

In [28]:
a = np.array([[1,6,3],[7,8,6]])
b = np.array([[3,2,1], [4,5,6]])
np.maximum(a, b)

array([[3, 6, 3],
       [7, 8, 6]])

In [29]:
np.dstack((a,b))

array([[[1, 3],
        [6, 2],
        [3, 1]],

       [[7, 4],
        [8, 5],
        [6, 6]]])

In [30]:
array_with_max = np.argmax(np.dstack((a,b)), axis=-1)
array_with_max

array([[1, 0, 0],
       [0, 0, 0]])

In [31]:
array_with_max.shape == a.shape

True

In [32]:
np.transpose(array_with_max.nonzero())

array([[0, 0]])

In [33]:
tuple(*np.transpose(array_with_max.nonzero()))

(0, 0)

In [34]:
a[tuple(*np.transpose(array_with_max.nonzero()))]

1

**Dp Maximum - fetching indices**

In [10]:
dsl = DataSubjectList.from_objs(np.random.choice([7, 10], size=(3,3)))

In [11]:
dsl.one_hot_lookup

array([ 7, 10])

In [12]:
dsl.data_subjects_indexed

array([[1, 0, 0],
       [0, 0, 0],
       [1, 0, 0]])

In [7]:
dsl2 = DataSubjectList.from_objs(np.random.choice([7, 11], size=(3,3)))

In [8]:
dsl2.data_subjects_indexed

array([[0, 1, 0],
       [1, 1, 1],
       [1, 1, 0]])

In [5]:
gt = GammaTensor(
    child=jnp.array(np.random.rand(3,3)),
    data_subjects=dsl,
    min_val=0,
    max_val=1
)



In [6]:
gt[0]

GammaTensor(child=DeviceArray([0.95491687, 0.50318373, 0.47252621], dtype=float64), data_subjects=<syft.core.adp.data_subject_list.DataSubjectList object at 0x7fbc0458d220>, min_val=0, max_val=1, is_linear=True, func=<function no_op at 0x7fbb40780310>, id='1755200300', state={})

In [13]:
gt2 = GammaTensor(
    child=jnp.array(np.random.rand(3,3)),
    data_subjects=dsl,
    min_val=0,
    max_val=1
)

In [41]:
gt.min_val

0

In [42]:
np.empty(np.prod((4,4))).reshape((4,4))

array([[0.00000000e+000, 2.86558075e-322, 0.00000000e+000,
        0.00000000e+000],
       [0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
        0.00000000e+000],
       [0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
        0.00000000e+000],
       [0.00000000e+000, 0.00000000e+000, 0.00000000e+000,
        5.21522906e-310]])

In [43]:
np.empty(0)

array([], dtype=float64)

In [44]:
data_subs = DataSubjectList(one_hot_lookup=np.empty(0), data_subjects_indexed=np.empty(0))

In [45]:
DataSubjectList.combine(data_subs, dsl)

<syft.core.adp.data_subject_list.DataSubjectList at 0x7fcb53312ca0>

In [14]:
DataSubjectList.combine(dsl, dsl2)

<syft.core.adp.data_subject_list.DataSubjectList at 0x7fbaf01d8ac0>

In [15]:
DataSubjectList.combine(dsl, dsl2).one_hot_lookup

array([ 7, 10, 11])

In [16]:
DataSubjectList.combine(dsl, dsl2).data_subjects_indexed

array([[[1, 0, 0],
        [0, 0, 0],
        [1, 0, 0]],

       [[0, 2, 0],
        [2, 2, 2],
        [2, 2, 0]]])

In [46]:
np.prod(gt.shape)

9

In [47]:
a = 5
b = 5.0

any([isinstance(i, int)] for i in (a, b))
# all([isinstance(i, int) for i in (a, b)])

True

In [48]:
a = [1,2,3]
b = [5,6,7]
a += b

In [49]:
a

[1, 2, 3, 5, 6, 7]

In [50]:
from tqdm import tqdm


from typing import List, Tuple
from syft.core.tensor.lazy_repeat_array import lazyrepeatarray
def combine(gt_list: List[GammaTensor], target_shape: Tuple) -> GammaTensor:
    data = np.zeros(np.prod(target_shape))
    last_index = 0
    min_val = 1e20
    max_val = -1e20
    
    data_subs = DataSubjectList(one_hot_lookup=np.empty(0), data_subjects_indexed=np.empty(0))
    
    for gamma_tensor in tqdm(gt_list):
        # Add data points
        input_size = int(np.prod(gamma_tensor.shape))
        print(last_index, input_size)
        data[last_index: last_index + input_size] = gamma_tensor.child.flatten()
        last_index += input_size
        print(data)
        
        
        # Add min/max values
        if isinstance(gamma_tensor.min_val, lazyrepeatarray):
            local_min = gamma_tensor.min_val.data.min()
            if local_min < min_val:
                min_val = local_min
                
            local_max = gamma_tensor.max_val.data.max()
            if local_max < max_val:
                max_val = local_max
            
        elif isinstance(gamma_tensor.min_val, (int, float)):
            if gamma_tensor.min_val < min_val:
                min_val = gamma_tensor.min_val
            
            if gamma_tensor.max_val < max_val:
                max_val = local_max
        else:
            raise NotImplementedError(f"Undefined behaviour for type: {type(gamma_tensor.min_val)}")
        
        # Add data subjects
        data_subs = DataSubjectList.absorb(data_subs, gamma_tensor.data_subjects)
    
    data_subs.data_subjects_indexed = data_subs.data_subjects_indexed.reshape(target_shape)
    
    return GammaTensor(
        child=data.reshape(target_shape),
        data_subjects=data_subs,
        min_val=min_val,
        max_val=max_val
    )
    
def phi_data_subjects_differ(x, y):
    if not isinstance(y, PhiTensor) or not isinstance(x, PhiTensor):
        return False
    
    if x.data_subjects.one_hot_lookup != y.data_subjects.one_hot_lookup:
        return True
    else:
        return False
    

from typing import Union
def dp_maximum(x: Union[PhiTensor, GammaTensor], y: Union[np.ndarray, PhiTensor, GammaTensor]
               ) -> Union[PhiTensor, GammaTensor]:
    # TODO: Make this work for GammaTensors
    x_data = x.child
    y_data = y.child if hasattr(y, "child") else y

    output = np.maximum(x_data, y_data)
    
    
    # Input checks to see if output is GammaTensor:
    inputs_are_gamma = any([isinstance(i, GammaTensor) for i in (x, y)])
    inputs_are_phi = all([isinstance(i, PhiTensor) for i in (x, y)])
    
    if inputs_are_gamma or (inputs_are_phi and phi_data_subjects_differ(x, y)):
        array_with_max = np.argmax(np.dstack((x_data, y_data)), axis=-1)
        x_max_ds = np.transpose(array_with_max.nonzero())
        y_max_ds = np.transpose((array_with_max == 1).nonzero())
        x_indices = [tuple(i) for i in x_max_ds]

        tensor_list = [x[tuple(i)] for i in x_max_ds]
        if isinstance(y, (PhiTensor, GammaTensor)):
            tensor_list += [y[tuple(i)] for i in y_max_ds]
            
        return combine(tensor_list, output.shape)

    min_v, max_v = output.min(), output.max()
    dsl = DataSubjectList(
        one_hot_lookup=x.data_subjects.one_hot_lookup,
        data_subjects_indexed=np.zeros_like(output)
    )
    return PhiTensor(
        child=output,
        data_subjects=dsl,
        min_vals=min_v,
        max_vals=max_v,
    )

In [17]:
res = nn.utils.dp_maximum(gt, gt2)

IndexError: too many indices for array: array is 0-dimensional, but 1 were indexed

In [52]:
res.data_subjects.data_subjects_indexed

array([[0, 0, 0],
       [0, 0, 1],
       [0, 1, 1]])

In [53]:
res.data_subjects.one_hot_lookup

array([ 7, 10])

In [54]:
res.data_subjects.data_subjects_indexed

array([[0, 0, 0],
       [0, 0, 1],
       [0, 1, 1]])

In [55]:
from syft.core.tensor.lazy_repeat_array import lazyrepeatarray as lra

In [56]:
m = lra(data=4, shape=(10,10))

In [57]:
m[5]

5


<lazyrepeatarray data: 4 -> shape: (10,)>

In [58]:
m[5,6]

(5, 6)


<lazyrepeatarray data: 4 -> shape: ()>

In [59]:
m[1:5, 6:8]

(slice(1, 5, None), slice(6, 8, None))


<lazyrepeatarray data: 4 -> shape: (4, 2)>

In [60]:
isinstance(slice(1,5, None), slice)

True

In [61]:
s = slice(1, 5, None)

In [62]:
s.start

1

In [63]:
s.stop

5

In [64]:
s.indices(80)

(1, 5, 1)

In [65]:
s

slice(1, 5, None)

In [22]:
nn.utils.dp_zeros(gt.shape, gt2.data_subjects)

GammaTensor(child=array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]]), data_subjects=<syft.core.adp.data_subject_list.DataSubjectList object at 0x7fbb2031c160>, min_val=<lazyrepeatarray data: 0.0 -> shape: (3, 3)>, max_val=<lazyrepeatarray data: 0.0 -> shape: (3, 3)>, is_linear=True, func=<function no_op at 0x7fbb40780310>, id='755921866', state={})

In [21]:
gt2.data_subjects.one_hot_lookup

array([ 7, 10])