In [3]:
import numpy as np
from thirdai import bolt

In [4]:
from utils import (
    gen_single_sparse_layer_network,
    gen_numpy_training_data,
    get_categorical_acc,train_network,train_single_node_distributed_network
)

In [5]:
def build_sparse_hidden_layer_classifier(input_dim, sparse_dim, output_dim, sparsity):
    layers = [
        bolt.FullyConnected(
            dim=sparse_dim,
            sparsity=sparsity,
            activation_function="ReLU",
        ),
        bolt.FullyConnected(dim=output_dim, activation_function="Softmax"),
    ]
    network = bolt.DistributedNetwork(layers=layers, input_dim=input_dim)
    return network

In [6]:
input_dim = 10
hidden_dim = 10
output_dim = 10
network = build_sparse_hidden_layer_classifier(
    input_dim=input_dim, sparse_dim=hidden_dim, output_dim=output_dim, sparsity=1.0
)

Initializing Bolt network...
InputLayer (Layer 0): dim=10
FullyConnectedLayer (Layer 1): dim=10, sparsity=1, act_func=ReLU
FullyConnectedLayer (Layer 2): dim=10, sparsity=1, act_func=Softmax
Initialized Network in 0 seconds


In [7]:
examples, labels = gen_numpy_training_data(n_classes=10, n_samples=1000)
train_single_node_distributed_network(network, examples, labels, epochs=1, update_parameters=False)

Distributed Network initialization done on this Node


In [8]:
w0=network.get_weights_gradients(0)
w1=network.get_weights_gradients(1)
print(w0,w1)

[[-8.4713381e-04 -1.8421747e-04  6.7424583e-03  3.1810635e-04
   2.8960863e-03 -1.5892272e-03  2.9931613e-04  7.1266582e-03
   1.8590171e-02  3.6582772e-03]
 [ 1.9389585e-02 -1.4998580e-04  2.2481226e-03 -8.6707243e-04
   1.5623739e-04  8.3658861e-06  1.4509683e-03 -3.6106772e-05
   2.5757687e-02  7.2232741e-03]
 [-7.0137940e-03  1.4052187e-02 -3.8450710e-02 -1.7043537e-02
   1.7161719e-03  3.9768843e-03  9.5108431e-03  3.0710783e-03
   3.6101213e-03  2.1944005e-02]
 [-1.4677843e-05 -9.5282998e-03  6.3375216e-03 -1.7406161e-04
  -2.0873737e-02 -1.7215210e-04 -5.8515719e-04  2.3926496e-04
  -5.1984668e-04  2.9479146e-02]
 [ 6.7152374e-04 -5.7613691e-03  1.6839707e-02  1.9579269e-02
  -1.6531796e-03  7.7709807e-03 -5.2292569e-04  4.9576559e-04
  -4.9879286e-04 -2.0366460e-03]
 [-1.2624292e-03 -8.7281829e-04  1.9394138e-03  1.7895335e-02
  -1.8153608e-02  6.5694801e-03  3.1006621e-02 -1.4588882e-02
   1.7032007e-03  1.5941570e-02]
 [-3.1925840e-04  4.0428256e-04 -3.3216991e-02 -8.6731149e

In [9]:
b0=network.get_biases_gradients(0)
b1=network.get_biases_gradients(1)

In [10]:
wthresh=[]
bthresh=[]
for layers in [0,1]:

    compression_density=0.1

    wt=network.get_unbiased_threshold_for_gradients(
        layers, compression_density=1*compression_density, sketch_biases=False
    )

    bt=network.get_unbiased_threshold_for_gradients(
        layers, compression_density=1*compression_density, sketch_biases=True
    )
    wthresh.append(wt)
    bthresh.append(bt)


In [11]:
print(wthresh,bthresh)

[0.01789533533155918, 0.0238969624042511] [0.052507184445858, 0.20508068799972534]


In [12]:
print(b0,b1)

[ 0.03671202  0.05250718 -0.00626167  0.00479284  0.03595268  0.03899039
 -0.02333014  0.01098991 -0.00206941 -0.00152589] [-0.10625004 -0.20508069  0.14035098  0.00604025  0.08509448 -0.06772582
  0.11420869  0.00428471  0.04744843 -0.01204392]


In [13]:
wgrads=[]
bgrads=[]

In [14]:
for layers in [0,1]:
    seed=np.random.randint(100)
    pregenerate=True

    x = network.get_unbiased_indexed_sketch_for_gradients(
        layer_index=layers,
        compression_density=compression_density,
        sketch_biases=False,
        seed_for_hashing=seed,
        pregenerate_distribution=pregenerate,
        threshold=wthresh[layers],
    )

    y = network.get_unbiased_indexed_sketch_for_gradients(
        layer_index=layers,
        compression_density=5*compression_density,
        sketch_biases=True,
        seed_for_hashing=seed,
        pregenerate_distribution=pregenerate,
        threshold=bthresh[layers],
    )
    wgrads.append(x)
    bgrads.append(y)

In [16]:
for layers in [0,1]:
    
    w_values = np.ravel(
        np.hstack([node_weights[layers] for node_weights in [wgrads]])
    )
    b_values = np.ravel(
        np.hstack([node_biases[layers] for node_biases in [bgrads]])
    )
    network.set_unbiased_gradients_from_indices_values(
        layer_index=layers, indices=w_values, set_biases=False, threshold=wthresh[layers]
    )
    network.set_unbiased_gradients_from_indices_values(
        layer_index=layers, indices=b_values, set_biases=True, threshold=bthresh[layers]
    )

In [113]:
print(wthresh,bthresh)

[0.009753064252436161, 0.005380359012633562] [0.012643870897591114, 0.24587056040763855]


In [15]:
print(wgrads,bgrads)

[array([  4, -41, -34, -82,  29,  85,  87, -78,   0, -79], dtype=int32), array([ -5, -14,  13, -64,  12,  23, -25, -83,   0,  22], dtype=int32)] [array([ 0, -6,  0,  1,  0], dtype=int32), array([ 0,  6, -1,  0,  0], dtype=int32)]


In [17]:
network.get_biases_gradients(0)

array([ 0.        ,  0.05250718,  0.        ,  0.        ,  0.        ,
        0.        , -0.05250718,  0.        ,  0.        ,  0.        ],
      dtype=float32)