In [1]:
import tensorflow as tf
import tensorflow.keras as keras
import tensorflow.keras.models as KM
import tensorflow.keras.layers as KL
import tensorflow.keras.backend as K
import numpy as np

In [2]:
def estimate_generalization_error(model, train, validation):
    pass

In [3]:
def estimate_G(model, train, validation):
    pass

In [4]:
def estimate_v(model, train, validation):
    pass

In [5]:
def compute_norm_w(model):
    pass

In [39]:
def cnn_model(input_shape = (512, 512, 3, ), layers = [8, 16], n_classes = 5):
    input = KL.Input(shape=input_shape)
    x = input
    
    for i, layer in enumerate(layers):
        x = KL.Conv2D(layer, 3, activation='relu', kernel_initializer="he_normal")(x)
        if i < len(layers) - 1:
            x = KL.MaxPool2D(2)(x)
        
    x = KL.Flatten()(x)
    z = KL.Dense(10, activation = 'relu')(x)
    output = KL.Dense(5, activation = 'softmax')(z)
    
    return KM.Model(input, output), KM.Model(input, z)

In [40]:
cnn, cnn_z = cnn_model()

In [41]:
cnn.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_9 (InputLayer)         (None, 512, 512, 3)       0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 510, 510, 8)       224       
_________________________________________________________________
max_pooling2d_31 (MaxPooling (None, 255, 255, 8)       0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 253, 253, 16)      1168      
_________________________________________________________________
flatten_8 (Flatten)          (None, 1024144)           0         
_________________________________________________________________
dense_16 (Dense)             (None, 10)                10241450  
_________________________________________________________________
dense_17 (Dense)             (None, 5)                 55        
Total para

In [38]:
for layer in cnn.layers:
    weights = layer.get_weights()
    if len(weights) > 0:
        print([w.shape for w in weights])

[(3, 3, 3, 8), (8,)]
[(3, 3, 8, 16), (16,)]
[(3, 3, 16, 32), (32,)]
[(3, 3, 32, 64), (64,)]
[(3, 3, 64, 32), (32,)]
[(3, 3, 32, 16), (16,)]
[(2304, 10), (10,)]
[(10, 5), (5,)]


In [8]:
cnn.layers[1].get_weights()[0].shape

(3, 3, 3, 1)

In [7]:
def demo_model():
    input = KL.Input(shape=(512, 512, 1, ))
    
    x = KL.Lambda(lambda image: tf.image.resize_images(image, (128, 128)))(input)
    x = KL.Flatten()(x)
    x = KL.Dense(10, activation='relu', use_bias=False)(x)
    z = KL.Dense(10, activation='relu', use_bias=False)(x)
    
    output = KL.Dense(5, activation='softmax', use_bias=False)(z)
    
    return KM.Model(input, output), KM.Model(input, z)

In [8]:
model, z = demo_model()

In [9]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 512, 512, 1)       0         
_________________________________________________________________
lambda (Lambda)              (None, 128, 128, 1)       0         
_________________________________________________________________
flatten (Flatten)            (None, 16384)             0         
_________________________________________________________________
dense (Dense)                (None, 10)                163840    
_________________________________________________________________
dense_1 (Dense)              (None, 10)                100       
_________________________________________________________________
dense_2 (Dense)              (None, 5)                 50        
Total params: 163,990
Trainable params: 163,990
Non-trainable params: 0
_________________________________________________________________


In [12]:
W = None
for layer in model.layers:
    weights = layer.get_weights()
    if len(weights) == 0:
        continue
    w_i = weights[0]
    if W is None:
        W = w_i
    else:
        W = np.matmul(W, w_i)

In [13]:
W.shape

(16384, 5)

In [14]:
np.linalg.norm(W[:,0], ord=2)

1.9350959

In [15]:
import matplotlib.pyplot as plt
%matplotlib inline

image.reshape turns (width, height) into (batch, width, height, channel)

In [18]:
img = np.random.rand(1,512,512,1)

In [19]:
model.predict(img)

array([[0.14574671, 0.14376056, 0.2748396 , 0.19105217, 0.24460094]],
      dtype=float32)

In [20]:
z_i = z.predict(img)

In [21]:
G = np.matmul(z_i.T, z_i)

In [22]:
G

array([[0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.075531  , 0.03191868, 0.14682122, 0.        ,
        0.        , 0.1140682 , 0.22865663, 0.01541052, 0.        ],
       [0.        , 0.03191868, 0.01348853, 0.06204526, 0.        ,
        0.        , 0.04820414, 0.09662812, 0.00651234, 0.        ],
       [0.        , 0.14682122, 0.06204526, 0.28539902, 0.        ,
        0.        , 0.22173192, 0.44447508, 0.0299558 , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.1140682 , 0.04820414, 0.22173192, 0.        ,
        0.        , 0.17226774, 0.34532115, 0.02327323, 0.        ],
       [0.        , 0.22865663, 0.0966281

In [23]:
np.max(np.linalg.eig(G)[0])

1.2420477

### Notes
$\overline{w}$ is the weight vector of all the paths $\prod{|W_i|}$