# Divisions for a deep network
---

#### Batch Normalization Overhead

In [3]:
# Testing BatchNorm overhead
from models.recursives import Conv_Net

def count_parameters(net):
    pars = 0
    for _, parm in net.state_dict().items():
        pars += parm.shape.numel()
    return pars

# For small Network
cnn1 = Conv_Net('cnn1', M=12, L=4, normalize=False)
cnn2 = Conv_Net('cnn2', M=12, L=4, normalize=True)
p1 = count_parameters(cnn1); p2 = count_parameters(cnn2)
print('No BN: {} || BN: {}  ||  Ratio: {}'.format(p1, p2, p1/p2))

# For deep Network
cnn1 = Conv_Net('cnn1', M=32, L=16, normalize=False)
cnn2 = Conv_Net('cnn2', M=32, L=16, normalize=True)
p1 = count_parameters(cnn1); p2 = count_parameters(cnn2)
print('No BN: {} || BN: {}  ||  Ratio: {}'.format(p1, p2, p1/p2))

## We won't take into account these parameters from now on

No BN: 15238 || BN: 15336  ||  Ratio: 0.9936098069900887
No BN: 174634 || BN: 174892  ||  Ratio: 0.9985248038789653


#### The Single Deep NN is defined below
![](images/network.png)

In [42]:
class Net:
    '''
    Calculate Network Parameters defined by given filters (M) and layers (L)
    Assumptions: Input=32x32 Output=10
    '''
    def __init__(self,M,L):
        self.M = M
        self.L = L
    
    def V(self):
        return 8*8*3*self.M
    
    def W(self):
        return (3*3*self.M*self.M*self.L) + (self.M*(self.L+1))

    def C(self):
        return (64*self.M*10) + 10
    
    def total(self):
        return self.V() + self.W() + self.C()
    
K = 4
L = 16
M = 32
S = Net(M = M, L = L)
print('M = {}, L = {}'.format(M, L))
print(S.total())

M = 32, L = 16
174634


In [43]:
'''
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
net = Rectangle(xy=(0,0),width=L, height=M)
fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2)
ax1.add_patch(Rectangle((0,0), L, M,alpha=1))
plt.show()
'''

'\nimport matplotlib.pyplot as plt\nfrom matplotlib.patches import Rectangle\nnet = Rectangle(xy=(0,0),width=L, height=M)\nfig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2)\nax1.add_patch(Rectangle((0,0), L, M,alpha=1))\nplt.show()\n'

### A - Horizontal Division

In horizontal division, the number of filters is reduced to accomodate more networks to the ensemble.  

We distinguish two possible ways of performing HD:

##### A1 - Plain HD
The depth is maintained `Le = L` => Calculate `Me | K` or calculate `K | Me` (read `K given Me`).

![](images/divisions/A1_horizontal.png)


In [44]:
# A1: Get the value of M keeping L the same as the deep network:
def getM(S,K):
    ensemble_network = Net(M = 1, L = S.L)
    budget = S.total()/K
    if K == 1:
        return S.M
        
    for M in range(S.M):
        ensemble_network.M = M
        if ensemble_network.total() > budget:
            return M-1

##### A2 - Conditioned HD
The depth is given `Le` => Calculate `Me` given `(K, Le)` or calculate `K` given `(Me, Le)`.  
There are 2 possibilities: 
- A21: `Le < L` => Makes sense if calculate `K|Me,Le`, we create shallower nets to allow bigger ensemble size
- A22: `Le > L` => 

![](images/divisions/A2_conditioned_horizontal.png)

In [29]:
# A2: Get the value of M given an L different from the deep network:
def getM_L(S,K,L):
    ensemble_network = Net(M = 1, L = L)
    budget = S.total()/K

    # sweep M
    for M in range(S.M):
        ensemble_network.M = M
        if ensemble_network.total() == budget:
            return M
        if ensemble_network.total() > budget:
            return M-1
    return -1

### Horizontal Division Implementation

In [45]:
## A: HORIZONTAL DIVISION: Fix L, K --> Divide M 
Le = S.L
Me = getM(S, K=4)
Ek = Net(M = Me, L=Le)
print('Me = {}, Le = {}, K = {}'.format(Me, Le, K))          
print(S.total() / Ek.total())

Me = 14, Le = 16, K = 4


In [46]:
## A11: CONDITIONED HORIZONTAL DIVISION: Fix K, Choose L --> Divide M
Le = 20
Me = getM_L(S, L=Le, K=4)
Ek = Net(M = Me, L = Le)
print('Me = {}, Le = {}, K = {}'.format(Me, Le, K))
print(S.total() / Ek.total())

Me = 13, Le = 20, K = 4
4.206122498133385


In [47]:
## A11: CONDITIONED HORIZONTAL DIVISION: Fix K, Choose L --> Divide M
Le = 4
Me = getM_L(S, L=Le, K=4)
Ek = Net(M = Me, L = Le)
print('Me = {}, Le = {}, K = {}'.format(Me, Le, K))
print(S.total() / Ek.total())

Me = 25, Le = 4, K = 4
4.020582479567169


### B - Vertical Division

In vertical division, the number of layers is modify to accomadate more networks.  
We distinguish 2 ways of doing vertical division

##### B1 - Plain VD
The filters are maintained `Me = M` => Calculate `Le | K` or calculate `K | Le`

![](images/divisions/B1_vertical.png)


In [48]:
# Get the value of L keeping M the same as the deep network:
def getL(S,K):
    ensemble_network = Net(M = 1, L = S.L)
    budget = S.total()/K
    print("Budget: " + str(budget))
    for L in range(S.L):
        ensemble_network.L = L
        if ensemble_network.total() > budget:
            return L-1
    return L  ## TODO: M=1 is allowing to have Le > L for k=4 and returns None

##### B2 - Conditioned CD
The filters are given `Me` => Calculate `Le` given `(K, Me)` or calculate `K` given `(Me, Le)`.  
There are 2 possibilities: 
- B21: `Me < M` => *When is this one preferable?*
- B22: `Me > M` => *When is this one preferable?*

![](images/divisions/B2_conditioned_vertical.png)

In [49]:
# Get the value of L keeping given an M different from the deep network:
def getL_M(S,K,M):
    ensemble_network = Net(M = M , L = S.L)
    budget = S.total()/K
    
    for L in range(S.L):
        ensemble_network.L = L
        if ensemble_network.total() == budget:
            return L
        if ensemble_network.total() > budget:
            return L-1
    return -1

In [51]:
## B1: VERTICAL DIVISION: Fix M, K --> Divide L
Me = S.M
Le = getL(S, K = 4)
Ek = Net(M = Me, L = Le)
print('Me = {}, Le = {}, K = {}'.format(Me, Le, K))
print(S.total() / Ek.total())

Budget: 43658.5
Me = 32, Le = 15, K = 4
1.0559176713869372


In [52]:
## B21: CONDITIONED VERICAL DIVISION: Fix K, Choose M --> Divide L
Me = 16
Le = getL_M(S = S, M = Me, K = 4)
Ek = Net(M = Me, L = Le)
print('Me = {}, Le = {}, K = {}'.format(Me, Le, K))
print(S.total() / Ek.total())

Me = 16, Le = 13, K = 4
4.014759299278127


In [54]:
## B22: CONDITIONED VERICAL DIVISION: Fix K, Choose M --> Divide L
Me = 48
Le = getL_M(S = S, M = Me, K = 4)
Ek = Net(M = Me, L = Le)
print('Me = {}, Le = {}, K = {}'.format(Me, Le, K))         ## Le is going to zero and less!!
print(S.total() / Ek.total())

Me = 48, Le = 0, K = 4
4.366504975746362


### C - Recursive Division
![](images/divisions/C_recursive.png)

In [55]:
## CA: RECURSIVE: Fix Le = 1, Choose M --> Calculate Ensemble Size allowed
Le = 1
Me = S.M
Ek = Net(M = Me, L = Le)
print('Me = {}, Le = {}, K = {}'.format(Me, Le, K))

Me = 32, Le = 1, K = 4


In [56]:
## CA: RECURSIVE: Fix Le = 1, Choose K --> Calculate Me allowed to use
K = 6
Le = 1
Me = getM_L(S, L=Le, K=K)
Ek = Net(M = Me, L = Le)
print('Me = {}, Le = {}, K = {}'.format(Me, Le, K))           ## Le is going to 0 or less!

Me = 27, Le = 1, K = 6
