In [1]:
import numpy as np
from thirdai import bolt

In [2]:
from utils import (
    gen_single_sparse_layer_network,
    gen_numpy_training_data,
    get_categorical_acc,train_network,train_single_node_distributed_network
)

In [3]:
def build_sparse_hidden_layer_classifier(input_dim, sparse_dim, output_dim, sparsity):
    layers = [
        bolt.FullyConnected(
            dim=sparse_dim,
            sparsity=sparsity,
            activation_function="ReLU",
        ),
        bolt.FullyConnected(dim=output_dim, activation_function="Softmax"),
    ]
    network = bolt.DistributedNetwork(layers=layers, input_dim=input_dim)
    return network

In [4]:
input_dim = 10
hidden_dim = 10
output_dim = 10
network = build_sparse_hidden_layer_classifier(
    input_dim=input_dim, sparse_dim=hidden_dim, output_dim=output_dim, sparsity=1.0
)

Initializing Bolt network...
InputLayer (Layer 0): dim=10
FullyConnectedLayer (Layer 1): dim=10, sparsity=1, act_func=ReLU
FullyConnectedLayer (Layer 2): dim=10, sparsity=1, act_func=Softmax
Initialized Network in 0 seconds


In [5]:
examples, labels = gen_numpy_training_data(n_classes=10, n_samples=1000)
train_single_node_distributed_network(network, examples, labels, epochs=1, update_parameters=False)

printing the batch size: 64
Distributed Network initialization done on this Node


In [6]:
w0=network.get_weights_gradients(0)
w1=network.get_weights_gradients(1)
print(w0,w1)

[[ 0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
   0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
   0.00000000e+00  0.00000000e+00]
 [-2.02648152e-04 -1.36692366e-02 -5.55803344e-05 -9.83047939e-05
   2.07345074e-04 -1.78430884e-04  2.59588240e-04  2.20701011e-04
  -1.05524780e-02  5.94036246e-04]
 [ 5.83809044e-04  2.45426577e-02 -5.42184338e-04  1.87494850e-04
  -1.32723665e-02  2.64657108e-04  2.49056815e-04 -1.11938082e-02
  -3.44963046e-03  3.13086843e-04]
 [-2.41444708e-04 -5.65086630e-05 -4.85633092e-04  1.35110496e-02
   8.44895694e-05 -1.55905989e-04  2.61106109e-03  2.97936820e-03
  -2.91122415e-05 -4.57083552e-05]
 [ 3.15607351e-04 -2.80225620e-04 -5.23110852e-04  8.11146630e-04
   1.48681240e-04  2.48914614e-04  7.57032540e-05 -2.31524988e-04
   2.18110319e-04  2.21010689e-02]
 [ 6.54205913e-04  1.04942756e-05 -2.26623706e-05 -4.61348718e-05
  -4.55401969e-05  1.02125905e-05 -6.26395849e-05 -6.60735532e-05
  -1.21568482e-05 -1.52085613e-05

In [7]:
b0=network.get_biases_gradients(0)
b1=network.get_biases_gradients(1)

In [8]:
wthresh=[]
bthresh=[]
for layers in [0,1]:

    compression_density=0.1

    wt=network.get_unbiased_threshold_for_gradients(
        layers, compression_density=1*compression_density, sketch_biases=False
    )

    bt=network.get_unbiased_threshold_for_gradients(
        layers, compression_density=1*compression_density, sketch_biases=True
    )
    wthresh.append(wt)
    bthresh.append(bt)


In [9]:
print(wthresh,bthresh)

[0.01994033344089985, 0.006407885812222958] [0.02378719113767147, 0.11060959100723267]


In [10]:
print(b0,b1)

[ 0.         -0.02378719 -0.00021373  0.01796575  0.02233225  0.00045985
  0.00064798 -0.00527625 -0.00961438 -0.00858767] [-0.00413506  0.08318032 -0.09175882 -0.0787776  -0.08299562  0.11060959
 -0.09790234  0.03790947 -0.11727516  0.10090064]


In [11]:
wgrads=[]
bgrads=[]

In [12]:
for layers in [0,1]:
    seed=np.random.randint(100)
    pregenerate=True

    x = network.get_unbiased_indexed_sketch_for_gradients(
        layer_index=layers,
        compression_density=compression_density,
        sketch_biases=False,
        seed_for_hashing=seed,
        pregenerate_distribution=pregenerate,
        threshold=wthresh[layers],
    )

    y = network.get_unbiased_indexed_sketch_for_gradients(
        layer_index=layers,
        compression_density=5*compression_density,
        sketch_biases=True,
        seed_for_hashing=seed,
        pregenerate_distribution=pregenerate,
        threshold=bthresh[layers],
    )
    wgrads.append(x)
    bgrads.append(y)

In [13]:
for layers in [0,1]:
    
    w_values = np.ravel(
        np.hstack([node_weights[layers] for node_weights in [wgrads]])
    )
    b_values = np.ravel(
        np.hstack([node_biases[layers] for node_biases in [bgrads]])
    )
    network.set_unbiased_gradients_from_indices_values(
        layer_index=layers, indices=w_values, set_biases=False, threshold=wthresh[layers]
    )
    network.set_unbiased_gradients_from_indices_values(
        layer_index=layers, indices=b_values, set_biases=True, threshold=bthresh[layers]
    )

In [14]:
print(wthresh,bthresh)

[0.01994033344089985, 0.006407885812222958] [0.02378719113767147, 0.11060959100723267]


In [15]:
print(wgrads,bgrads)

[array([-64,  95, -96, -30, -62,  14, -72,  78,  66,  44], dtype=int32), array([-92, -48,  58, -78, -68,   8, -26, -52, -57,  87], dtype=int32)] [array([ 0,  0, -1,  3,  0], dtype=int32), array([9, 0, 0, 5, 0], dtype=int32)]


In [17]:
network.get_biases_gradients(0)

array([ 0.        ,  0.05250718,  0.        ,  0.        ,  0.        ,
        0.        , -0.05250718,  0.        ,  0.        ,  0.        ],
      dtype=float32)