In [1]:
import random
import numpy as np
import tensorflow as tf

SEED=1995

np.random.seed(SEED)
random.seed(SEED)
tf.random.set_seed(SEED)

gru_tf = tf.keras.layers.GRU(
    units=5,
    return_sequences=True,
    kernel_initializer=tf.keras.initializers.GlorotUniform(seed=SEED),
    recurrent_initializer=tf.keras.initializers.Orthogonal(seed=SEED),
    bias_initializer=tf.keras.initializers.GlorotUniform(seed=SEED)
)

y_tf = gru_tf(tf.ones((1, 3, 5)), training=False)  # forward pass with ones

np.savez(
    'tf_model_weights.npz', 
    gru_kernel=gru_tf.weights[0].numpy(), 
    gru_recurrent_kernel=gru_tf.weights[1].numpy(),
    gru_bias=gru_tf.weights[2].numpy()
)

In [2]:
import random as r
import numpy as np
import torch

SEED=1995
torch.set_printoptions(precision=8)

r.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

# from speechbrain.nnet.RNN import GRU, LSTM

npz_weights = np.load('tf_model_weights.npz')


def convert_input_kernel_inv(kernel):
    kernel_r, kernel_z, kernel_h = np.hsplit(kernel, 3)
    return np.concatenate((kernel_z.T, kernel_r.T, kernel_h.T))
    

def convert_recurrent_kernel_inv(kernel):
    kernel_r, kernel_z, kernel_h = np.hsplit(kernel, 3)
    return np.concatenate((kernel_z.T, kernel_r.T, kernel_h.T))


def convert_bias_inv(bias):
    bias = bias.reshape(2, 3, -1) 
    return bias[:, [1, 0, 2], :].reshape((2, -1))


gru_pt = torch.nn.GRU(
    hidden_size=5,
    input_size=5,
    num_layers=1,
    bidirectional=False,
    batch_first=True
)
for pn, p in gru_pt.named_parameters():
    if 'weight_ih' in pn:
        p.data = torch.from_numpy(convert_input_kernel_inv(npz_weights['gru_kernel']))
    elif 'weight_hh' in pn:
        p.data = torch.from_numpy(convert_recurrent_kernel_inv(npz_weights['gru_recurrent_kernel']))
    elif 'bias_ih' in pn:
        p.data = torch.from_numpy(convert_bias_inv(npz_weights['gru_bias'])[0])
    else:
        p.data = torch.from_numpy(convert_bias_inv(npz_weights['gru_bias'])[1])

In [3]:
y_tf

<tf.Tensor: shape=(1, 3, 5), dtype=float32, numpy=
array([[[-0.7925101 , -0.40041846,  0.20510904,  0.13561569,
          0.28905517],
        [-0.9018102 , -0.5436267 ,  0.2940711 ,  0.23402843,
          0.48948967],
        [-0.9190761 , -0.58599323,  0.34698206,  0.30531275,
          0.58507884]]], dtype=float32)>

In [4]:
gru_pt.eval()

GRU(5, 5, batch_first=True)

In [5]:
y_pt, _ = gru_pt(torch.ones(1, 3, 5))

In [6]:
y_pt

tensor([[[-0.79250997, -0.40041843,  0.20510904,  0.13561571,  0.28905517],
         [-0.90181017, -0.54362673,  0.29407114,  0.23402844,  0.48948961],
         [-0.91907609, -0.58599323,  0.34698212,  0.30531275,  0.58507884]]],
       grad_fn=<TransposeBackward1>)

In [7]:
def convert_kernel(kernel):
    kernel_z, kernel_r, kernel_h = np.vsplit(kernel, 3)
    return np.concatenate((kernel_r.T, kernel_z.T, kernel_h.T), axis=1)

def convert_bias(bias):
    bias = bias.reshape(2, 3, -1) 
    return bias[:, [1, 0, 2], :].reshape((2, -1))

In [8]:
for pn, p in gru_pt.named_parameters():
    if 'weight_ih' in pn:
        kernel = p.data
    elif 'weight_hh' in pn:
        recurrent_kernel = p.data
    elif 'bias_ih' in pn:
        bias_ih = p.data
    else:
        bias_hh = p.data
bias = np.stack((bias_ih, bias_hh), axis=0)

In [9]:
bias = np.stack((bias_ih, bias_hh), axis=0)

In [10]:
gru_tf.set_weights([convert_kernel(kernel), 
                    convert_kernel(recurrent_kernel), 
                    convert_bias(bias)])

In [11]:
y_tf = gru_tf(tf.ones((1, 3, 5)), training=False)  # forward pass with ones

In [12]:
y_tf

<tf.Tensor: shape=(1, 3, 5), dtype=float32, numpy=
array([[[-0.7925101 , -0.40041846,  0.20510904,  0.13561569,
          0.28905517],
        [-0.9018102 , -0.5436267 ,  0.2940711 ,  0.23402843,
          0.48948967],
        [-0.9190761 , -0.58599323,  0.34698206,  0.30531275,
          0.58507884]]], dtype=float32)>

In [13]:
# library imports:

import pandas as pd
from pathlib import Path
import portiloop_software
import torch
from portiloop_software import run_offline_unlabelled, get_final_model_config_dict, get_trained_model
from matplotlib import pyplot as plt
from torchsummary import summary
import tensorflow as tf
from tensorflow.keras.layers import Dense, Flatten, Conv2D, Conv1D, MaxPool1D, GRU
from tensorflow.keras import Model
import numpy as np

In [14]:
# path to the portiloop software package:

path_software = Path(portiloop_software.__file__).parent.absolute()

In [15]:
# path to the folder containing pre-trained models:

path_experiments = path_software / 'experiments'

In [16]:
# configuration dictionary of the model:

config_dict = get_final_model_config_dict()

In [17]:
# run offline inference (on all data points):

model_torch = get_trained_model(config_dict, path_experiments)

In [18]:
model_torch

PortiloopNetwork(
  (first_layer_input1): ConvPoolModule(
    (conv): Conv1d(1, 31, kernel_size=(7,), stride=(1,))
    (pool): MaxPool1d(kernel_size=7, stride=1, padding=0, dilation=1, ceil_mode=False)
    (dropout): Dropout(p=0, inplace=False)
  )
  (seq_input1): Sequential(
    (0): ConvPoolModule(
      (conv): Conv1d(31, 31, kernel_size=(7,), stride=(1,))
      (pool): MaxPool1d(kernel_size=7, stride=1, padding=0, dilation=1, ceil_mode=False)
      (dropout): Dropout(p=0.5, inplace=False)
    )
    (1): ConvPoolModule(
      (conv): Conv1d(31, 31, kernel_size=(7,), stride=(1,))
      (pool): MaxPool1d(kernel_size=7, stride=1, padding=0, dilation=1, ceil_mode=False)
      (dropout): Dropout(p=0.5, inplace=False)
    )
  )
  (gru_input1): GRU(558, 7, batch_first=True)
  (fc): Linear(in_features=7, out_features=1, bias=True)
)

In [19]:
summary(model_torch)

Layer (type:depth-idx)                   Param #
├─ConvPoolModule: 1-1                    --
|    └─Conv1d: 2-1                       248
|    └─MaxPool1d: 2-2                    --
|    └─Dropout: 2-3                      --
├─Sequential: 1-2                        --
|    └─ConvPoolModule: 2-4               --
|    |    └─Conv1d: 3-1                  6,758
|    |    └─MaxPool1d: 3-2               --
|    |    └─Dropout: 3-3                 --
|    └─ConvPoolModule: 2-5               --
|    |    └─Conv1d: 3-4                  6,758
|    |    └─MaxPool1d: 3-5               --
|    |    └─Dropout: 3-6                 --
├─GRU: 1-3                               11,907
├─Linear: 1-4                            8
Total params: 25,679
Trainable params: 25,679
Non-trainable params: 0


Layer (type:depth-idx)                   Param #
├─ConvPoolModule: 1-1                    --
|    └─Conv1d: 2-1                       248
|    └─MaxPool1d: 2-2                    --
|    └─Dropout: 2-3                      --
├─Sequential: 1-2                        --
|    └─ConvPoolModule: 2-4               --
|    |    └─Conv1d: 3-1                  6,758
|    |    └─MaxPool1d: 3-2               --
|    |    └─Dropout: 3-3                 --
|    └─ConvPoolModule: 2-5               --
|    |    └─Conv1d: 3-4                  6,758
|    |    └─MaxPool1d: 3-5               --
|    |    └─Dropout: 3-6                 --
├─GRU: 1-3                               11,907
├─Linear: 1-4                            8
Total params: 25,679
Trainable params: 25,679
Non-trainable params: 0

In [20]:
import tensorflow.keras as keras

class ReshapeLayer(keras.layers.Layer):
    def init(self, target_shape, kwargs):
        super(ReshapeLayer, self).init(kwargs)
        self.target_shape = tuple(target_shape)
    
    def call(self, input):
        # desired_shape = (int(tf.shape(input)[0]), ) + self.target_shape
        desired_shape = (-1, 50, 558)
        print(desired_shape)
        return tf.reshape(input, desired_shape)

In [21]:
model_keras = tf.keras.Sequential()
model_keras.add(tf.keras.layers.Reshape((-1, 54, 1)))
model_keras.add(tf.keras.layers.Conv1D(31, strides=[1], kernel_size=7, activation='relu'))
model_keras.add(tf.keras.layers.MaxPooling2D(pool_size=(1, 7), strides=1, padding='valid'))
model_keras.add(tf.keras.layers.Conv1D(31, strides=[1], kernel_size=7, activation='relu'))
model_keras.add(tf.keras.layers.MaxPooling2D(pool_size=(1, 7), strides=1, padding='valid'))
model_keras.add(tf.keras.layers.Conv1D(31, strides=[1], kernel_size=7, activation='relu'))
model_keras.add(tf.keras.layers.MaxPooling2D(pool_size=(1, 7), strides=1, padding='valid'))
# model_keras.add(tf.keras.layers.Reshape((-1, 558)))
model_keras.add(ReshapeLayer())
model_keras.add(GRU(units=7, time_major=False))
model_keras.add(tf.keras.layers.Dense(1, activation='sigmoid'))
model_keras.build((None, 50, 54))
model_keras.summary()

(-1, 50, 558)
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 reshape (Reshape)           (None, 50, 54, 1)         0         
                                                                 
 conv1d (Conv1D)             (None, 50, 48, 31)        248       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 50, 42, 31)       0         
 )                                                               
                                                                 
 conv1d_1 (Conv1D)           (None, 50, 36, 31)        6758      
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 50, 30, 31)       0         
 2D)                                                             
                                                                 
 conv1d_2 (Conv1D)           (None, 50, 24

In [22]:
model_keras.layers

[<keras.layers.core.reshape.Reshape at 0x136b2bced90>,
 <keras.layers.convolutional.Conv1D at 0x136ab93eca0>,
 <keras.layers.pooling.MaxPooling2D at 0x1368613a190>,
 <keras.layers.convolutional.Conv1D at 0x136ab86b460>,
 <keras.layers.pooling.MaxPooling2D at 0x1368613a850>,
 <keras.layers.convolutional.Conv1D at 0x136ab93ea90>,
 <keras.layers.pooling.MaxPooling2D at 0x136ab83d250>,
 <__main__.ReshapeLayer at 0x136ab945580>,
 <keras.layers.recurrent_v2.GRU at 0x136ab86bbe0>,
 <keras.layers.core.dense.Dense at 0x136ab931370>]

In [23]:
torch_params = [param.detach().numpy() for param in model_torch.parameters()]

In [24]:
for i, param in enumerate(torch_params):
    print(f"param {i}: {param.shape}")

param 0: (31, 1, 7)
param 1: (31,)
param 2: (31, 31, 7)
param 3: (31,)
param 4: (31, 31, 7)
param 5: (31,)
param 6: (21, 558)
param 7: (21, 7)
param 8: (21,)
param 9: (21,)
param 10: (1, 7)
param 11: (1,)


In [25]:
def convert_kernel_inv(kernel):
    kernel_r, kernel_z, kernel_h = np.hsplit(kernel, 3)
    return np.concatenate((kernel_z.T, kernel_r.T, kernel_h.T))

def convert_kernel(kernel):
    kernel_z, kernel_r, kernel_h = np.vsplit(kernel, 3)
    return np.concatenate((kernel_r.T, kernel_z.T, kernel_h.T), axis=1)

def convert_bias(bias):
    bias = bias.reshape(2, 3, -1) 
    return bias[:, [1, 0, 2], :].reshape((2, -1))

In [26]:
model_keras.layers[8].weights

[<tf.Variable 'gru_1/gru_cell_1/kernel:0' shape=(558, 21) dtype=float32, numpy=
 array([[ 0.06987111, -0.08233741, -0.01363966, ...,  0.08950477,
          0.03794876, -0.05743991],
        [ 0.03526143, -0.02010861,  0.04055554, ..., -0.03146853,
          0.0947753 ,  0.06274372],
        [ 0.05155519, -0.04070115, -0.03309855, ..., -0.05078401,
         -0.10165539, -0.0556715 ],
        ...,
        [-0.04685628,  0.05984283, -0.03304814, ...,  0.06785407,
          0.01720869,  0.08539138],
        [-0.03967514, -0.09396231, -0.09895834, ...,  0.1006132 ,
          0.10082895,  0.02969674],
        [ 0.07360422,  0.03359257,  0.09591938, ...,  0.03117121,
          0.01499479,  0.09121189]], dtype=float32)>,
 <tf.Variable 'gru_1/gru_cell_1/recurrent_kernel:0' shape=(7, 21) dtype=float32, numpy=
 array([[-0.06439924, -0.39913705,  0.13888477, -0.16469152,  0.12037365,
         -0.04164285, -0.02109119,  0.48498216,  0.06057859,  0.02622014,
         -0.19166383, -0.2502437 , -0.089

In [27]:
np.stack((torch_params[8], torch_params[9]), axis=0)

array([[-0.02185846, -0.14705312, -0.06134333, -0.21139653, -0.18442982,
         0.00979619, -0.14749926, -0.24829413,  1.0149685 ,  0.30653724,
        -0.07180233, -0.6962207 , -0.48068357, -0.37287533, -0.01478305,
         0.0281016 , -0.29703212, -0.05942979,  0.1469805 ,  0.30490357,
         0.17594711],
       [-0.05141691, -0.14167127, -0.00835179, -0.31697038,  0.17389293,
         0.12304442,  0.03572496, -0.31770235,  0.99044317,  0.0597805 ,
        -0.47962168, -0.7721744 , -0.4003129 , -0.18330953,  0.1975466 ,
         0.06060247,  0.20888945,  0.0802715 , -0.32268295,  0.0396922 ,
        -0.1638425 ]], dtype=float32)

In [28]:
torch_params[6].shape

(21, 558)

In [29]:
torch_params[7].shape

(21, 7)

In [30]:
l = [m for m in model_torch.modules()]
gru_pt = l[14]
gru_pt

GRU(558, 7, batch_first=True)

In [31]:
for pn, p in gru_pt.named_parameters():
    if 'weight_ih' in pn:
        kernel = p.data
    elif 'weight_hh' in pn:
        recurrent_kernel = p.data
    elif 'bias_ih' in pn:
        bias_ih = p.data
    else:
        bias_hh = p.data
bias = np.stack((bias_ih, bias_hh), axis=0)

In [32]:
model_keras.layers[1].set_weights([torch_params[0].T, torch_params[1].T])

# Second Conv Layer:
model_keras.layers[3].set_weights([torch_params[2].T, torch_params[3].T])

# Third Conv Layer:
model_keras.layers[5].set_weights([torch_params[4].T, torch_params[5].T])

# GRU Layer:

# kernel_input = convert_kernel(torch_params[6])
# kernel_h = convert_kernel(torch_params[7])
# bias = convert_bias(np.stack((torch_params[8], torch_params[9]), axis=0))

# model_keras.layers[8].set_weights([kernel_input, 
#                                    kernel_h, 
#                                    bias])

model_keras.layers[8].set_weights([convert_kernel(kernel), 
                    convert_kernel(recurrent_kernel), 
                    convert_bias(bias)])

# Dense Layer:
model_keras.layers[9].set_weights([torch_params[10].T, torch_params[11].T])

In [33]:
model_keras.layers[8].weights

[<tf.Variable 'gru_1/gru_cell_1/kernel:0' shape=(558, 21) dtype=float32, numpy=
 array([[-0.16189827, -0.02721401, -0.00131223, ..., -0.0224664 ,
         -0.10324092,  0.9781653 ],
        [-0.04380907,  0.07253738, -0.03930822, ..., -0.04212598,
         -0.05580707,  0.70585686],
        [-0.22128211,  0.04012021, -0.01671478, ..., -0.07443257,
         -0.06322375,  0.50437504],
        ...,
        [-0.11483102, -0.11880854, -0.21867967, ..., -0.04734865,
         -0.17091021,  0.05709471],
        [-0.38440633, -0.02999607, -0.02425925, ..., -0.0608215 ,
         -0.324488  ,  0.04215567],
        [-0.3089492 , -0.13285387,  0.00966667, ..., -0.04424573,
         -0.311438  ,  0.10411689]], dtype=float32)>,
 <tf.Variable 'gru_1/gru_cell_1/recurrent_kernel:0' shape=(7, 21) dtype=float32, numpy=
 array([[-0.25731143,  0.20413998,  0.03256913, -0.12562902, -0.09498331,
         -0.04637064,  0.57656753, -0.6201707 ,  0.11997185, -0.385144  ,
         -0.16755357, -0.01406851, -0.306

In [34]:
input_numpy = np.ones((1, 50, 54))
input_torch = torch.ones((1, 50, 54))

In [35]:
x, hn1, _, _ = model_torch(input_torch, None, None, torch.zeros(1, 1, 7), None)

DEBUG: x.shape:(50, 31, 42)
DEBUG: x.mean():0.01873699761927128
DEBUG: x:
[[[0.         0.         0.         ... 0.         0.         0.        ]
  [0.         0.         0.         ... 0.         0.         0.        ]
  [0.0898011  0.0898011  0.0898011  ... 0.0898011  0.0898011  0.0898011 ]
  ...
  [0.02003773 0.02003773 0.02003773 ... 0.02003773 0.02003773 0.02003773]
  [0.20981292 0.20981292 0.20981292 ... 0.20981292 0.20981292 0.20981292]
  [0.         0.         0.         ... 0.         0.         0.        ]]

 [[0.         0.         0.         ... 0.         0.         0.        ]
  [0.         0.         0.         ... 0.         0.         0.        ]
  [0.0898011  0.0898011  0.0898011  ... 0.0898011  0.0898011  0.0898011 ]
  ...
  [0.02003773 0.02003773 0.02003773 ... 0.02003773 0.02003773 0.02003773]
  [0.20981292 0.20981292 0.20981292 ... 0.20981292 0.20981292 0.20981292]
  [0.         0.         0.         ... 0.         0.         0.        ]]

 [[0.         0.      

In [36]:
x

tensor([[0.05004729]], grad_fn=<SigmoidBackward0>)

In [37]:
hn1

tensor([[[ 0.79664421, -0.03222394,  0.49799740, -0.05747593, -0.29114568,
          -0.09384963,  0.98452938]]], grad_fn=<StackBackward0>)

In [38]:
out_keras = model_keras(input_numpy)
print(out_keras)

(-1, 50, 558)
tf.Tensor([[0.09079182]], shape=(1, 1), dtype=float32)


In [39]:
from keras import backend as K

inp = model_keras.input                                           # input placeholder
outputs = [layer.output for layer in model_keras.layers]          # all layer outputs
functors = [K.function([inp], [out]) for out in outputs]   # evaluation function

# Testing
layer_outs = [func([input_numpy]) for func in functors]

(-1, 50, 558)
(-1, 50, 558)
(-1, 50, 558)


In [40]:
print("===")
print(np.array(layer_outs[2]).squeeze().swapaxes(1,2).shape)
print(np.array(layer_outs[2]).squeeze().swapaxes(1,2))

print("===")
print(np.array(layer_outs[4]).squeeze().swapaxes(1,2).shape)
print(np.array(layer_outs[4]).squeeze().swapaxes(1,2))

print("===")
print(np.array(layer_outs[6]).squeeze().swapaxes(1,2).shape)
print(np.array(layer_outs[6]).squeeze().swapaxes(1,2))

print("===")
print(np.array(layer_outs[7]).squeeze().shape)
print(np.array(layer_outs[7]).squeeze().mean())
print(np.array(layer_outs[7]).squeeze())

print("===")
print(np.array(layer_outs[8]).squeeze().shape)
print(np.array(layer_outs[8]).squeeze())

===
(50, 31, 42)
[[[0.         0.         0.         ... 0.         0.         0.        ]
  [0.         0.         0.         ... 0.         0.         0.        ]
  [0.0898011  0.0898011  0.0898011  ... 0.0898011  0.0898011  0.0898011 ]
  ...
  [0.02003773 0.02003773 0.02003773 ... 0.02003773 0.02003773 0.02003773]
  [0.20981297 0.20981297 0.20981297 ... 0.20981297 0.20981297 0.20981297]
  [0.         0.         0.         ... 0.         0.         0.        ]]

 [[0.         0.         0.         ... 0.         0.         0.        ]
  [0.         0.         0.         ... 0.         0.         0.        ]
  [0.0898011  0.0898011  0.0898011  ... 0.0898011  0.0898011  0.0898011 ]
  ...
  [0.02003773 0.02003773 0.02003773 ... 0.02003773 0.02003773 0.02003773]
  [0.20981297 0.20981297 0.20981297 ... 0.20981297 0.20981297 0.20981297]
  [0.         0.         0.         ... 0.         0.         0.        ]]

 [[0.         0.         0.         ... 0.         0.         0.        ]
  [0.

In [46]:
gru_tf = model_keras.layers[8]
gru_tf

<keras.layers.recurrent_v2.GRU at 0x136ab86bbe0>

In [47]:
gru_pt

GRU(558, 7, batch_first=True)

In [48]:
gru_pt(torch.ones(1, 50, 558))

(tensor([[[ 1.00000000e+00,  0.00000000e+00, -4.53531742e-04, -1.00000000e+00,
            1.00000000e+00, -9.79106963e-01,  1.00000000e+00],
          [ 1.00000000e+00,  0.00000000e+00, -9.85145569e-04, -1.00000000e+00,
            1.00000000e+00, -9.99547601e-01,  1.00000000e+00],
          [ 1.00000000e+00,  0.00000000e+00, -1.52158737e-03, -1.00000000e+00,
            1.00000000e+00, -9.99990225e-01,  1.00000000e+00],
          [ 1.00000000e+00,  0.00000000e+00, -2.05773115e-03, -1.00000000e+00,
            1.00000000e+00, -9.99999762e-01,  1.00000000e+00],
          [ 1.00000000e+00,  0.00000000e+00, -2.59357691e-03, -1.00000000e+00,
            1.00000000e+00, -1.00000000e+00,  1.00000000e+00],
          [ 1.00000000e+00,  0.00000000e+00, -3.12906504e-03, -1.00000000e+00,
            1.00000000e+00, -1.00000000e+00,  1.00000000e+00],
          [ 1.00000000e+00,  0.00000000e+00, -3.66413593e-03, -1.00000000e+00,
            1.00000000e+00, -1.00000000e+00,  1.00000000e+00],
      

In [49]:
y_tf = gru_tf(tf.ones((1, 50, 558)), training=False)

In [50]:
y_tf

<tf.Tensor: shape=(1, 7), dtype=float32, numpy=
array([[ 1.        ,  0.        , -0.02632462, -1.        ,  1.        ,
        -1.        ,  1.        ]], dtype=float32)>