# Building a convolutional neural network to predict promoter strength

## Import modules and define functions

Import the required modules:

In [1]:
import os
import numpy as np
import pandas as pd
import re
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
import tensorflow as tf
from tensorflow import keras as k
import tensorflow.keras.layers as kl
from dataclasses import dataclass
from typing import Optional
from io import TextIOBase

2022-08-01 15:45:09.091607: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1


Enable GPU memory growth:

In [2]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)

1 Physical GPUs 1 Logical GPUs


2022-08-01 15:45:58.722623: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2022-08-01 15:45:58.727500: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2022-08-01 15:45:58.793479: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-01 15:45:58.794684: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:c3:00.0 name: Quadro RTX 5000 computeCapability: 7.5
coreClock: 1.815GHz coreCount: 48 deviceMemorySize: 15.75GiB deviceMemoryBandwidth: 417.29GiB/s
2022-08-01 15:45:58.794707: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1
2022-08-01 15:45:59.926723: I tensorflow/stream_executor/platform/default/d

Define a function to one-hot encode the DNA sequences (adapted from https://colab.research.google.com/drive/17E4h5aAOioh5DiTo7MZg4hpL6Z_0FyWr):

In [3]:
integer_encoder = LabelEncoder()  

one_hot_encoder = OneHotEncoder(categories='auto')

def one_hot_encoding(sequences, verbose = True): 
    one_hot_sequences = []

    if verbose:
        i = 0
        print('one-hot encoding in progress ...', flush = True)
    
    for sequence in sequences:
        integer_encoded = integer_encoder.fit_transform(list(sequence))
        integer_encoded = np.array(integer_encoded).reshape(-1, 1)
        one_hot_encoded = one_hot_encoder.fit_transform(integer_encoded)
        one_hot_sequences.append(one_hot_encoded.toarray())
    
        if verbose:
            i += 1
            if i % 1000 == 0:
                print(i, 'sequences processed', flush = True, end = '\r')
        
    if verbose:
        print('finished one-hot encoding:', i, 'sequences processed', flush = True)
    
    one_hot_sequences = np.stack(one_hot_sequences)

    return one_hot_sequences

Define a class to read in MEME files:

In [4]:
@dataclass
class Motif:
    identifier: str
    pfm: np.ndarray
    alphabet_length: int
    length: int
    name: Optional[str] = None
    source_sites: Optional[int] = None
    source_evalue: Optional[float] = None
    
    def __len__(self) -> int:
        return self.length
    
    
class MinimalMEME:
    """ http://meme-suite.org/doc/meme-format.html """
    
    __version_regex = re.compile('^MEME version ([0-9]+)$')
    __background_regex = re.compile('^Background letter frequencies(?: \(from (.+)\))?$')
    __background_sum_error = 0.00001
    __pfm_header_regex = re.compile('^letter-probability matrix:(?: alength= ([0-9]+))?(?: w= ([0-9]+))?(?: nsites= ([0-9]+))?(?: E= ([0-9.e-]+))?$')
    version = None
    alphabet = None
    strands = None
    background = None
    background_source = None
    motifs = None
    
    def __init__(self, path):
        self.motifs = {}
        
        # parse the minimal MEME file
        with open(path) as minimal_meme_file:
            line = minimal_meme_file.readline()
            # first line must be version
            self.version = self._parse_version(line)

            line = minimal_meme_file.readline()
            while line:
                if line.startswith('ALPHABET'):
                    if self.alphabet is None:
                        self.alphabet = self._parse_alphabet(line)
                        line = minimal_meme_file.readline()
                    else:
                        raise RuntimeError("Multiple alphabet definitions encountered in MEME file")
                elif line.startswith('strands: '):
                    if self.strands is None:
                        self.strands = self._parse_strands(line)
                        line = minimal_meme_file.readline()
                    else:
                        raise RuntimeError("Multiple strand definitions encountered in MEME file")
                elif line.startswith('Background letter frequencies'):
                    if self.background is None:
                        line = self._parse_background(line, minimal_meme_file)
                    else:
                        raise RuntimeError("Multiple background frequency definitions encountered in MEME file")
                elif line.startswith('MOTIF'):
                    line = self._parse_motif(line, minimal_meme_file)
                else:
                    line = minimal_meme_file.readline()
    
    def _parse_version(self, line: str) -> str:
        match = re.match(self.__version_regex, line)
        if match:
            return match.group(1)
        else:
            raise RuntimeError("Minimal MEME file missing version string on first line")
            
    def _parse_alphabet(self, line: str) -> str:
        if line.startswith('ALPHABET '):
            raise NotImplementedError("Alphabet definitions not supported")
        elif line.startswith('ALPHABET= '):
            return line.rstrip()[10:]
        else:
            raise RuntimeError('Unable to parse alphabet line')
            
    def _parse_strands(self, line: str) -> str:
        strands = line.rstrip()[9:]
        if not ((strands == '+') or (strands == '+ -')):
            raise RuntimeError("Invalid strand specification")
        else:
            return strands
        
    def _parse_background(self, line: str, handle: TextIOBase) -> str:
        match = re.match(self.__background_regex, line)
        if match:
            if match.group(1) is not None:
                self.background_source = match.group(1)
        else:
            raise RuntimeError("Unable to parse background frequency line")

        self.background = {}
        # start parsing possibly multiple lines of background frequencies
        line = handle.readline()
        while line:
            if (not line.rstrip()) or line.startswith('MOTIF'):
                if abs(1 - sum(self.background.values())) <= self.__background_sum_error:
                    return line
                else:
                    raise RuntimeError("Background frequencies do not sum to 1")
            else:
                line_freqs = line.rstrip().split(' ')
                if len(line_freqs) % 2 != 0:
                    raise RuntimeError("Invalid background frequency definition")
                for residue, freq in zip(line_freqs[0::2], line_freqs[1::2]):
                    self.background[residue] = float(freq)
            line = handle.readline()
    
    def _parse_motif(self, line: str, handle: TextIOBase) -> str:
        # parse motif identifier
        line_split = line.rstrip().split(' ')
        if (len(line_split) < 2) or (len(line_split) > 3):
            raise RuntimeError("Invalid motif name line")
        motif_identifier = line_split[1]
        motif_name = line_split[2] if len(line_split) == 3 else None
        
        line = handle.readline()
        # parse letter probability matrix header
        if not line.startswith('letter-probability matrix:'):
            raise RuntimeError("No letter-probability matrix header line in motif entry")
        match = re.match(self.__pfm_header_regex, line)
        if match:
            motif_alphabet_length = int(match.group(1)) if match.group(1) is not None else None
            motif_length = int(match.group(2)) if match.group(2) is not None else None
            motif_source_sites = int(match.group(3)) if match.group(3) is not None else None
            motif_source_evalue = float(match.group(4)) if match.group(4) is not None else None
        else:
            raise RuntimeError("Unable to parse letter-probability matrix header")
        
        # parse letter probability matrix
        line = handle.readline()
        pfm_rows = []
        while line:
            if (not line.rstrip()) or line.startswith('MOTIF'):
                if motif_identifier in self.motifs:
                    raise RuntimeError("Motif identifiers not unique within file")
                pfm = np.stack(pfm_rows)
                if motif_length is None:
                    motif_length = pfm.shape[0]
                elif motif_length != pfm.shape[0]:
                    raise RuntimeError("Provided motif length is not consistent with the letter-probability matrix shape")
                self.motifs[motif_identifier] = Motif(
                    identifier = motif_identifier,
                    pfm = pfm,
                    alphabet_length = motif_alphabet_length,
                    length = motif_length,
                    name = motif_name,
                    source_sites = motif_source_sites,
                    source_evalue = motif_source_evalue
                )
                return line
            else:
                line_split = line.rstrip().split()
                if motif_alphabet_length is None:
                    motif_alphabet_length = len(line_split)
                elif motif_alphabet_length != len(line_split):
                    raise RuntimeError("Letter-probability matrix row length doesn't equal alphabet length")
                pfm_row = np.array([float(s) for s in line_split])
                pfm_rows.append(pfm_row)
                line = handle.readline()

## Load and convert the data to the required format

Load CPE and TF motifs:

In [5]:
core_promoter_elements = MinimalMEME('/cellar/users/aklie/projects/EUGENe/tests/_data/datasets/jores21/CPEs.meme')
tf_groups = MinimalMEME('/cellar/users/aklie/projects/EUGENe/tests/_data/datasets/jores21/TF-clusters.meme')
all_motifs = {**core_promoter_elements.motifs, **tf_groups.motifs}

Load the training and test data:

In [6]:
data_test_leaf = pd.read_csv('/cellar/users/aklie/projects/EUGENe/tests/_data/datasets/jores21/CNN_test_leaf.tsv', sep = '\t', header = 0)
data_train_leaf = pd.read_csv('/cellar/users/aklie/projects/EUGENe/tests/_data/datasets/jores21/CNN_train_leaf.tsv', sep = '\t', header = 0)
data_test_proto = pd.read_csv('/cellar/users/aklie/projects/EUGENe/tests/_data/datasets/jores21/CNN_test_proto.tsv', sep = '\t', header = 0)
data_train_proto = pd.read_csv('/cellar/users/aklie/projects/EUGENe/tests/_data/datasets/jores21/CNN_train_proto.tsv', sep = '\t', header = 0)

One-hot encode the promoter sequences:

In [7]:
train_sequences_leaf = one_hot_encoding(data_train_leaf['sequence'])
test_sequences_leaf = one_hot_encoding(data_test_leaf['sequence'])
train_sequences_proto = one_hot_encoding(data_train_proto['sequence'])
test_sequences_proto = one_hot_encoding(data_test_proto['sequence'])

one-hot encoding in progress ...
finished one-hot encoding: 65004 sequences processed
one-hot encoding in progress ...
finished one-hot encoding: 7154 sequences processed
one-hot encoding in progress ...
finished one-hot encoding: 68213 sequences processed
one-hot encoding in progress ...
finished one-hot encoding: 7595 sequences processed


Convert the enrichment value to an array of the correct shape.

In [8]:
train_enrichment_leaf = np.array(data_train_leaf['enrichment']).reshape(-1, 1)
test_enrichment_leaf = np.array(data_test_leaf['enrichment']).reshape(-1, 1)
train_enrichment_proto = np.array(data_train_proto['enrichment']).reshape(-1, 1)
test_enrichment_proto = np.array(data_test_proto['enrichment']).reshape(-1, 1)

## Build the models

Define a bidirectional convolutional layer stack, inspired from DeepGMAP (https://doi.org/10.1371/journal.pone.0235748)

In [9]:
class BiConv1D(kl.Layer):
    def __init__(self, filters, kernel_size, layers = 2, stride = 1, dropout_rate = 0.15):
        super().__init__()
        self.filters = filters
        self.kernel_size = kernel_size
        if layers < 1:
            raise ValueError("At least one layer needed")
        self.layers = layers
        if (dropout_rate < 0) or (dropout_rate > 1):
            raise ValueError("Dropout rate must be a float between 0 and 1")
        self.dropout_rate = dropout_rate
        self.stride = stride
    
    def build(self, input_shape):
        self.kernels = []
        self.biases = []
        for layer in range(self.layers):
            self.kernels.append(self.add_weight(
                f"kernel{layer}",
                shape = (self.kernel_size, input_shape[-1], self.filters),
                trainable = True,
                initializer = k.initializers.GlorotUniform()
            ))
            self.biases.append(self.add_weight(
                f"bias{layer}",
                shape = (self.filters,),
                trainable = True,
                initializer = k.initializers.Zeros()
            ))

    def call(self, input):
        # first layer
        x_fwd = tf.nn.conv1d(input, self.kernels[0], stride = self.stride, padding = 'SAME')
        x_fwd = tf.add(x_fwd, self.biases[0])
        x_fwd = tf.nn.dropout(tf.nn.relu(x_fwd), rate = self.dropout_rate)
        x_rev = tf.nn.conv1d(input, tf.reverse(self.kernels[0], axis = [1, 2]), stride = self.stride, padding = 'SAME')
        x_rev = tf.add(x_fwd, self.biases[0])
        x_rev = tf.nn.dropout(tf.nn.relu(x_rev), rate = self.dropout_rate)
    
        # subsequent layers
        for layer in range(1, self.layers):
            x_fwd = tf.nn.conv1d(x_fwd, self.kernels[layer], stride = self.stride, padding = 'SAME')
            x_fwd = tf.add(x_fwd, self.biases[layer])
            x_fwd = tf.nn.dropout(tf.nn.relu(x_fwd), rate = self.dropout_rate)
            x_rev = tf.nn.conv1d(x_rev, tf.reverse(self.kernels[layer], axis = [1, 2]), stride = self.stride, padding = 'SAME')
            x_rev = tf.add(x_fwd, self.biases[layer])
            x_rev = tf.nn.dropout(tf.nn.relu(x_rev), rate = self.dropout_rate)
        
        return tf.math.add(x_fwd, x_rev)

Define a function to build the bidirectional model:

In [10]:
def build_bidirectional_model(motif_kernel: np.ndarray):
    # motif_kernel.shape[2] is filters, shape[0] is kernel size
    inputs = kl.Input((170, 4))
    x = BiConv1D(filters = motif_kernel.shape[2], kernel_size = motif_kernel.shape[0], layers = 2)(inputs)
    x = kl.Conv1D(filters = 128, kernel_size = 13, padding = 'same', activation = 'relu')(x)
    x = kl.Dropout(0.15)(x)
    x = kl.Flatten()(x)
    x = kl.Dense(64)(x)
    x = kl.BatchNormalization()(x)
    x = kl.Activation('relu')(x)
    outputs = kl.Dense(1)(x)
    model = k.Model(inputs = inputs, outputs = outputs, name = "BiDirectionalCNN")
    # initialize first layer kernel with motifs
    model.layers[1].kernels[0].assign(motif_kernel)
    return model

In [11]:
kernel = k.initializers.glorot_uniform()(shape = (13, 4, 128)).numpy()

# overwrite part of kernel with pfms from motifs
for i, motif_id in enumerate(all_motifs):
    motif = all_motifs[motif_id]
    
    # convert PFM to PWM, assume equal background frequency of 0.25
    # truncates motifs longer than 13bp to 13bp
    kernel[:min(len(motif), kernel.shape[0]), :, i] = motif.pfm[:min(len(motif), kernel.shape[0]), :] / 0.25

Build and compile the models:

In [12]:
model_leaf = build_bidirectional_model(kernel)
model_proto = build_bidirectional_model(kernel)

model_leaf.compile(
    loss = 'mean_squared_error',
    optimizer = 'Adam',
    metrics = ['mean_squared_error']
)
model_proto.compile(
    loss = 'mean_squared_error',
    optimizer = 'Adam',
    metrics = ['mean_squared_error']
)

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'


In [13]:
model_proto.summary()

Model: "BiDirectionalCNN"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 170, 4)]          0         
_________________________________________________________________
bi_conv1d_1 (BiConv1D)       (None, 170, 128)          13568     
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 170, 128)          213120    
_________________________________________________________________
dropout_1 (Dropout)          (None, 170, 128)          0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 21760)             0         
_________________________________________________________________
dense_2 (Dense)              (None, 64)                1392704   
_________________________________________________________________
batch_normalization_1 (Batch (None, 64)           

Define training parameters:

In [15]:
earlyStop = k.callbacks.EarlyStopping(patience = 5)
#reduceLR = k.callbacks.ReduceLROnPlateau(patience = 2)

Train the model for the tobacco leaf system:

In [16]:
if os.path.isdir('model_leaf'):
    # load previously trained model
    model_leaf = k.models.load_model('model_leaf')
else:
    # train model
    model_leaf.fit(
        train_sequences_leaf,
        train_enrichment_leaf, 
        epochs = 25,
        batch_size = 128,
        validation_split = 0.1,
        callbacks = [earlyStop], #reduceLR],
        verbose = 1
    )

2022-08-01 15:48:34.695173: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2022-08-01 15:48:34.739658: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 2999865000 Hz


Epoch 1/25


2022-08-01 15:48:35.725587: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.10
2022-08-01 15:48:37.854862: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.7


Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25


In [None]:
# save model
# model_leaf.save('model_leaf')

In [29]:
print(k.backend.eval(model_leaf.optimizer.lr))

0.001


Train the model for the maize protoplast system:

In [15]:
"""
if os.path.isdir('model_proto'):
    # load previously trained model
    model_proto = k.models.load_model('model_proto')
else:
    # train model
    model_proto.fit(
        train_sequences_proto,
        train_enrichment_proto, 
        epochs = 25,
        batch_size = 128,
        validation_split = 0.1,
        callbacks = [earlyStop, reduceLR],
        verbose = 1
    )

    # save model
    model_proto.save('model_proto')
"""

## Evalutate the models

Predict enrichment for the training and test sets:

In [17]:
predicted_enrichment_train_leaf = model_leaf.predict(np.stack(train_sequences_leaf))
predicted_enrichment_test_leaf = model_leaf.predict(np.stack(test_sequences_leaf))
#predicted_enrichment_train_proto = model_proto.predict(np.stack(train_sequences_proto))
#predicted_enrichment_test_proto = model_proto.predict(np.stack(test_sequences_proto))

Add predicted values to the dataframe:

In [18]:
data_test_leaf['prediction'] = predicted_enrichment_test_leaf
data_train_leaf['prediction'] = predicted_enrichment_train_leaf
#data_test_proto['prediction'] = predicted_enrichment_test_proto
#data_train_proto['prediction'] = predicted_enrichment_train_proto

Rename column with species identifier (for compatibility with LaTeX plotting code) and shuffle data to avoid one species always being drawn on top of another:

In [19]:
data_test_leaf = data_test_leaf.rename(columns = {'sp' : 'sample.name'}).sample(frac = 1)
data_train_leaf = data_train_leaf.rename(columns = {'sp' : 'sample.name'}).sample(frac = 1)
#data_test_proto = data_test_proto.rename(columns = {'sp' : 'sample.name'}).sample(frac = 1)
#data_train_proto = data_train_proto.rename(columns = {'sp' : 'sample.name'}).sample(frac = 1)

Define a function to calculate the correlation between the measured and predicted enrichment (overall and for each species individually):

In [20]:
def get_cor(data):
    samples = ['all', 'At', 'Zm', 'Sb']

    rsquare = []
    spearman = []

    for species in samples:
        if species == 'all':
            data_filt = data
        else:
            data_filt = data[data['sample.name'] == species]
        
        rsquare.append(round(data_filt['enrichment'].corr(data_filt['prediction'])**2, 2))
        spearman.append(round(data_filt['enrichment'].corr(data_filt['prediction'], method = 'spearman'), 2))

    return pd.DataFrame({'sample.name' : samples, 'spearman' : spearman, 'rsquare' : rsquare})

Calculate correlation for test and training set:

In [21]:
correlation_test_leaf = get_cor(data_test_leaf)
correlation_train_leaf = get_cor(data_train_leaf)
#correlation_test_proto = get_cor(data_test_proto)
#correlation_train_proto = get_cor(data_train_proto)

Let's take a look at how well the model performed on the test set:

In [23]:
print('Training data in tobacco leaf system:')
print(correlation_train_leaf)
print('\nTest data in tobacco leaf system:')
print(correlation_test_leaf)
#print('\nTraining data in maize protoplast system:')
#print(correlation_train_proto)
#print('\nTest data in maize protoplast system:')
#print(correlation_test_proto)

Training data in tobacco leaf system:
  sample.name  spearman  rsquare
0         all      0.89     0.79
1          At      0.84     0.72
2          Zm      0.89     0.79
3          Sb      0.89     0.79

Test data in tobacco leaf system:
  sample.name  spearman  rsquare
0         all      0.81     0.66
1          At      0.70     0.52
2          Zm      0.82     0.67
3          Sb      0.81     0.66


Save the data to files:

In [22]:
data_test_leaf.to_csv('../figures/rawData/CNN_test_leaf_pred.tsv', sep = '\t', index = False, columns = ['sample.name', 'enrichment', 'prediction'])
data_test_proto.to_csv('../figures/rawData/CNN_test_proto_pred.tsv', sep = '\t', index = False, columns = ['sample.name', 'enrichment', 'prediction'])
# data_train_leaf.to_csv('../figures/rawData/CNN_train_leaf_pred.tsv', sep = '\t', index = False, columns = ['sample.name', 'enrichment', 'prediction'])
# data_train_proto.to_csv('../figures/rawData/CNN_train_proto_pred.tsv', sep = '\t', index = False, columns = ['sample.name', 'enrichment', 'prediction'])

correlation_test_leaf.to_csv('../figures/rawData/CNN_test_leaf_stats.tsv', sep = '\t', index = False)
correlation_test_proto.to_csv('../figures/rawData/CNN_test_proto_stats.tsv', sep = '\t', index = False)
# correlation_train_leaf.to_csv('../figures/rawData/CNN_train_leaf_stats.tsv', sep = '\t', index = False)
# correlation_train_proto.to_csv('../figures/rawData/CNN_train_proto_stats.tsv', sep = '\t', index = False)

scratch

In [None]:
model_proto.layers[1].kernels[0].shape, model_proto.layers[1].kernels[1].shape

(TensorShape([13, 4, 128]), TensorShape([13, 4, 128]))

In [None]:
model_proto.layers[1].kernels[0][:, :, 0]

<tf.Tensor: shape=(13, 4), dtype=float32, numpy=
array([[0.51 , 1.506, 0.478, 1.506],
       [0.63 , 1.594, 0.796, 0.982],
       [0.996, 1.212, 0.788, 1.004],
       [0.494, 2.62 , 0.302, 0.582],
       [0.04 , 0.008, 0.008, 3.944],
       [3.872, 0.   , 0.   , 0.128],
       [0.008, 0.056, 0.024, 3.912],
       [3.968, 0.   , 0.008, 0.024],
       [2.612, 0.048, 0.008, 1.332],
       [3.896, 0.   , 0.032, 0.072],
       [1.364, 0.112, 0.144, 2.382],
       [2.782, 0.326, 0.478, 0.414],
       [0.502, 1.728, 1.266, 0.502]], dtype=float32)>

In [None]:
tf.reverse(model_proto.layers[1].kernels[0], axis = [1, 2])[:, :, 0]

<tf.Tensor: shape=(13, 4), dtype=float32, numpy=
array([[-1.1752490e-02, -3.3738129e-02,  6.0109161e-03,  1.9622605e-02],
       [-2.2206314e-02,  4.1522197e-03,  5.1510818e-03,  3.5024669e-02],
       [-3.6124412e-02, -3.3148341e-02,  4.9235549e-02, -4.9587451e-02],
       [-2.1023933e-02,  4.6193339e-03, -5.1596034e-02, -3.1489260e-02],
       [ 1.5524324e-02, -2.6284009e-03,  8.2412772e-03,  1.4788691e-02],
       [ 2.6899632e-02, -2.5407940e-02, -5.0277077e-03, -1.8956561e-02],
       [ 5.5888068e-02,  2.1890689e-02,  1.0152947e-02,  4.3098699e-02],
       [-3.2539338e-02, -5.1493611e-02,  2.1482933e-02,  1.9928727e-02],
       [-1.3733637e-02,  4.3182302e-02,  2.1718442e-06, -3.5343777e-02],
       [ 3.0339655e-02, -5.3364597e-02,  2.1932591e-02,  1.6808707e-02],
       [-5.4198734e-02, -2.9335709e-02, -1.9956362e-02,  5.3986106e-02],
       [-4.3515462e-02,  1.3974495e-03,  2.9999938e-02,  8.1115179e-03],
       [-5.7419207e-02,  1.8064570e-02, -3.2875217e-02,  5.6787375e-02]],
 

In [None]:
model_proto.layers[1].biases[0].shape, model_proto.layers[1].biases[1].shape

(TensorShape([128]), TensorShape([128]))

In [None]:
first_out = tf.nn.conv1d(tf.ones((10, 170, 4)), model_proto.layers[1].kernels[0], stride = 1, padding = 'SAME')

In [None]:
first_out.shape

TensorShape([10, 170, 128])

In [None]:
tf.nn.conv1d(first_out, model_proto.layers[1].kernels[1], stride = 1, padding = 'SAME')

<tf.Tensor: shape=(1, 170, 128), dtype=float32, numpy=
array([[[-6.2085419e+00,  2.3372312e+00,  7.3843746e+00, ...,
         -8.6654257e-03, -7.0331623e-03,  2.3195921e-02],
        [-5.8269234e+00,  3.9527270e-01,  8.7749662e+00, ...,
         -2.0850651e-02, -2.3376362e-02,  1.1375189e-02],
        [-6.6466060e+00,  5.4851884e-01,  1.0273194e+01, ...,
         -4.3990575e-03, -9.5281871e-03,  1.5416055e-02],
        ...,
        [ 3.9359903e+00, -6.0941577e-01,  1.6216959e+01, ...,
         -7.2578099e-03, -1.5117239e-02, -2.3746926e-02],
        [ 5.4510412e+00, -5.6218451e-01,  1.5262706e+01, ...,
          2.0589863e-03,  2.5248829e-02, -2.4427205e-02],
        [ 6.9992914e+00, -3.6010320e+00,  1.2241146e+01, ...,
          1.2646944e-03,  1.7705860e-02, -1.1187748e-03]]], dtype=float32)>

In [None]:
model_leaf.layers[1].kernels[0].shape, tf.reverse(model_leaf.layers[1].kernels[0], axis = [1, 2]).shape

(TensorShape([13, 4, 128]), TensorShape([13, 4, 128]))