# Code notes for BCCNet

source: https://github.com/OlgaIsupova/BCCNet

**utils**: https://github.com/OlgaIsupova/BCCNet/blob/master/utils/utils_dataset_processing.py <br><br>
Two functions: 
1. shuffle_arrays(arrays)
2. shrink_arrays(arrays, shrink_size, is_shuffle=True)

In [48]:
import numpy as np

In [49]:
# assume that our data is in 3d, i.e.for each sample we have 2d input.
# for instance, (6000,28,28) for MNIST
# what the shuffle does is to pick out the rows, and random shuffle those rows

def shuffle_arrays(arrays):
    #if not arrays:
    #    return arrays

    size = arrays[0].shape[0]
    permutation = np.arange(size)
    np.random.shuffle(permutation)
    for i, array in enumerate(arrays):
        arrays[i] = array[permutation]

    return arrays

In [59]:
np.random.seed(100)
x = np.random.choice([0,1], size=(6000, 4, 4))
print(x[0])
print(shuffle_arrays(x)[0])

[[0 0 1 1]
 [1 1 0 0]
 [0 0 0 1]
 [0 0 0 0]]
[[0 0 0 1]
 [0 0 0 0]
 [0 0 1 1]
 [1 1 0 0]]


In [60]:
# what we do is shuffle rows like the previous function
# then output a tuple which contains partial and whole of the example


def shrink_arrays(arrays, shrink_size, is_shuffle=True):
    # if shrink_size in [0.0, 1.0] it specifies fraction of the array size to be extracted, if shrink_size is an
    # integer it specifies the size of the shrunk arrays

    #if not arrays:
    #    return arrays

    if type(shrink_size) == float or type(shrink_size) == np.float64:
        assert(0.0 <= shrink_size <= 1.0)
        size = arrays[0].shape[0]
        shrunk_array_size = int(round(shrink_size * size))
    else:
        shrunk_array_size = shrink_size

    if is_shuffle:
        shuffled_arrays = shuffle_arrays(arrays)
    else:
        shuffled_arrays = arrays

    shrunk_arrays = []
    for array in shuffled_arrays:
        shrunk_arrays.append(array[:shrunk_array_size])

    return shrunk_arrays, shuffled_arrays

In [64]:
np.random.seed(100)
print(x[0])
labelled_train, whole_train = shrink_arrays(x, 0.25) #shrunk_arrays, shuffled_arrays
print(labelled_train[0])
print(whole_train[0])

[[0 0 0 0]
 [1 1 0 0]
 [0 0 1 1]
 [0 0 0 1]]
[[0 0 1 1]]
[[0 0 1 1]
 [1 1 0 0]
 [0 0 0 1]
 [0 0 0 0]]


np function: **numpy.expand_dims()**

In [79]:
np.random.seed(100)
print(x[0])
y = np.expand_dims(x, axis=3)
print(y[0])
print(y.shape)

[[0 0 1 1]
 [1 1 0 0]
 [0 0 0 1]
 [0 0 0 0]]
[[[0]
  [0]
  [1]
  [1]]

 [[1]
  [1]
  [0]
  [0]]

 [[0]
  [0]
  [0]
  [1]]

 [[0]
  [0]
  [0]
  [0]]]
(6000, 4, 4, 1)


In [78]:
len(y)

6000