In [2]:
# Each time we will run an experiment, we will restart the kernel. This is based on experimental learning from my previous project 
# that without this the result becomes unecpectedly stochastics in nature despite setting the seed.

In [1]:
def test_model(ablation=1000, B=32, epochs=3, filters=64, kernel_size=3, dense_neurons=2048, 
              conv1d_set1=1, conv1d_set2=1, maxpool_1=False):
    from hdflogv1 import HDFSLogv1
    import tensorflow as tf
    import numpy as np
    tf.random.set_seed(123)
    import pickle
    with open('../data/hdfs_log_obj.pkl', 'rb') as f:
        hdfslogs = pickle.load(f)
    ablation_data = hdfslogs.get_train_test_data(ablation=ablation)
    x_train, y_train, x_test, y_test = hdfslogs.get_padded_train_test_data(ablation=ablation)
    train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
    train_data = train_data.shuffle(buffer_size=y_train.shape[0]).batch(B, drop_remainder=True)
    print(train_data)
    
    tk = hdfslogs.tk    
    vocab_size = len(tk.word_index)
    print(f'vocab_size: {vocab_size}')
    char_onehot = vocab_size
    
    embedding_weights = []
    embedding_weights.append(np.zeros(vocab_size))
    for char, i in tk.word_index.items(): # from 1 to 51
        onehot = np.zeros(vocab_size)
        onehot[i-1] = 1
        embedding_weights.append(onehot)
    embedding_weights = np.array(embedding_weights)
    
    input_size = [x_train.shape[1], x_train.shape[2]]
    embedding_size = vocab_size

    embedding_layer = tf.keras.layers.Embedding(vocab_size+1,
                                                embedding_size,
                                                input_length=input_size,
                                                weights = [embedding_weights])
    
    
    
    
    inputs = tf.keras.layers.Input(batch_shape=(B, x_train.shape[1], x_train.shape[2]), dtype='float64' )
    x = tf.keras.layers.Embedding(input_dim=vocab_size+1,
                                    output_dim=embedding_size,
                                    input_length=x_train.shape[2],
                                    weights = [embedding_weights],
                                    )(inputs)
    for _ in range(conv1d_set1):
        x = tf.keras.layers.Conv1D(filters=filters, kernel_size=kernel_size, padding='same')(x)
    if maxpool_1:
        x = tf.keras.layers.MaxPooling2D(pool_size=(1, x_train.shape[2]))(x)
        x = tf.reshape(x, (B, x_train.shape[1], filters))        
        for _ in range(conv1d_set2):
            x = tf.keras.layers.Conv1D(filters=filters, kernel_size=kernel_size, padding='same')(x)
        x = tf.keras.layers.MaxPooling1D(pool_size=(x_train.shape[1]) )(x)    
    if not maxpool_1:
        x = tf.keras.layers.Flatten()(x)       
    x = tf.keras.layers.Dense(dense_neurons)(x)
    outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    print(model.summary())
    model.compile(optimizer='adam', 
                  loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])
    model.fit(train_data, epochs=epochs)    

In [None]:
# so eve with 4000 ablation which means 4000 positive and 400 negative sequences we have achived 99.36%
# Epoch 16/16
# 32/32 [==============================] - 16s 514ms/step - loss: 0.5070 - accuracy: 0.9936
# now it is time to include validation set as well, for which we will create a new notebook

In [2]:
# That was certainly encouraging result 
# We got 99.9 %  accuracy  - Epoch 16/16
# epoch 11/16
# 16/16 [==============================] - 8s 513ms/step - loss: 0.5073 - accuracy: 0.9920
# Epoch 12/16
# 16/16 [==============================] - 8s 521ms/step - loss: 0.5044 - accuracy: 0.9980
# Epoch 13/16
# 16/16 [==============================] - 8s 512ms/step - loss: 0.5040 - accuracy: 0.9985
# Epoch 14/16
# 16/16 [==============================] - 8s 513ms/step - loss: 0.5038 - accuracy: 0.9990
# Epoch 15/16
# 16/16 [==============================] - 8s 510ms/step - loss: 0.5038 - accuracy: 0.9990
# 16/16 [==============================] - 8s 508ms/step - loss: 0.5038 - accuracy: 0.9990
# #################remeber the setting ####################
# test_model(ablation=2000, B=250, kernel_size=3, epochs=16, dense_neurons=2048, conv1d_set1=3,conv1d_set2=3, maxpool_1=True )
# now we will give more data
test_model(ablation=4000, B=250, kernel_size=3, epochs=16, dense_neurons=2048, conv1d_set1=3,conv1d_set2=3, maxpool_1=True )


getting ablation data: 4000
4000 12838
train_test_data done: 0.015006542205810547
RAM usage train_test_data:  72
length of train  sequence original 4
length of train  sequence original 25
length of train  sequence original 33
length of train  sequence original 21
length of train  sequence original 2
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
padded_train_test_data done: 6.474947214126587
RAM usage padded_train_test_data:  72
<BatchDataset shapes: ((250, 32, 64), (250,)), types: (tf.int32, tf.int64)>
vocab_size: 42
Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         

In [2]:
# it helped , accuracy touched 80% . the best score was 82% . LEt us increase the data further
test_model(ablation=2000, B=250, kernel_size=3, epochs=16, dense_neurons=2048, conv1d_set1=3,conv1d_set2=3, maxpool_1=True )

getting ablation data: 2000
2000 14838
train_test_data done: 0.015001535415649414
RAM usage train_test_data:  72
length of train  sequence original 4
length of train  sequence original 25
length of train  sequence original 33
length of train  sequence original 21
length of train  sequence original 2
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
padded_train_test_data done: 6.868514776229858
RAM usage padded_train_test_data:  72
<BatchDataset shapes: ((250, 32, 64), (250,)), types: (tf.int32, tf.int64)>
vocab_size: 42
Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         

In [2]:
# slight improvement, earlier we observed data to batch ratio is actually 4:1 ,
# let us see if the batch size 250 helps 
test_model(ablation=1000, B=250, kernel_size=3, epochs=16, dense_neurons=2048, conv1d_set1=3,conv1d_set2=3, maxpool_1=True )

getting ablation data: 1000
1000 15838
train_test_data done: 0.015004158020019531
RAM usage train_test_data:  72
length of train  sequence original 4
length of train  sequence original 25
length of train  sequence original 33
length of train  sequence original 21
length of train  sequence original 2
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
padded_train_test_data done: 7.093010902404785
RAM usage padded_train_test_data:  72
<BatchDataset shapes: ((250, 32, 64), (250,)), types: (tf.int32, tf.int64)>
vocab_size: 42
Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         

In [2]:
# no improvment even at batch size 40 , let us increase the data further
test_model(ablation=1000, B=40, kernel_size=3, epochs=16, dense_neurons=2048, conv1d_set1=3,conv1d_set2=3, maxpool_1=True )

getting ablation data: 1000
1000 15838
train_test_data done: 0.015027999877929688
RAM usage train_test_data:  72
length of train  sequence original 4
length of train  sequence original 25
length of train  sequence original 33
length of train  sequence original 21
length of train  sequence original 2
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
padded_train_test_data done: 6.854597568511963
RAM usage padded_train_test_data:  72
<BatchDataset shapes: ((40, 32, 64), (40,)), types: (tf.int32, tf.int64)>
vocab_size: 42
Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(

In [2]:
# dropped to 72.8% , let us increase the batch size
test_model(ablation=160, B=40, kernel_size=3, epochs=16, dense_neurons=2048, conv1d_set1=3,conv1d_set2=3, maxpool_1=True )

getting ablation data: 160
160 16678
train_test_data done: 0.016001462936401367
RAM usage train_test_data:  72
length of train  sequence original 4
length of train  sequence original 25
length of train  sequence original 33
length of train  sequence original 21
length of train  sequence original 2
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
padded_train_test_data done: 6.859640836715698
RAM usage padded_train_test_data:  72
<BatchDataset shapes: ((40, 32, 64), (40,)), types: (tf.int32, tf.int64)>
vocab_size: 42
Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(40

In [2]:
# loss: 0.5804 - accuracy: 0.8250
# loss: 0.5862 - accuracy: 0.7937
# So increasing data point and increasing batch_size both is helping 
#we got 79% at the 16th epochs where as at the 10th epochs it touched 82%
# Let us increase data point further
test_model(ablation=160, B=20, kernel_size=3, epochs=16, dense_neurons=2048, conv1d_set1=3,conv1d_set2=3, maxpool_1=True )

getting ablation data: 160
160 16678
train_test_data done: 0.015029430389404297
RAM usage train_test_data:  72
length of train  sequence original 4
length of train  sequence original 25
length of train  sequence original 33
length of train  sequence original 21
length of train  sequence original 2
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
padded_train_test_data done: 6.874576091766357
RAM usage padded_train_test_data:  72
<BatchDataset shapes: ((20, 32, 64), (20,)), types: (tf.int32, tf.int64)>
vocab_size: 42
Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(20

In [2]:
# no more epochs did not help , let us increase the bach size
test_model(ablation=80, B=20, kernel_size=3, epochs=16, dense_neurons=2048, conv1d_set1=3,conv1d_set2=3, maxpool_1=True )

getting ablation data: 80
80 16758
train_test_data done: 0.015012025833129883
RAM usage train_test_data:  72
length of train  sequence original 4
length of train  sequence original 25
length of train  sequence original 33
length of train  sequence original 21
length of train  sequence original 2
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
padded_train_test_data done: 6.98826789855957
RAM usage padded_train_test_data:  72
<BatchDataset shapes: ((20, 32, 64), (20,)), types: (tf.int32, tf.int64)>
vocab_size: 42
Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(20, 3

In [2]:
# with ablation 80 , accuracy dropped to 50% , althoough it touched 71.25
# 0.6129 - accuracy: 0.7125
# 0.8133 - accuracy: 0.5000
# may be more epochs wil help ?
test_model(ablation=80, B=10, kernel_size=3, epochs=16, dense_neurons=2048, conv1d_set1=3,conv1d_set2=3, maxpool_1=True )

getting ablation data: 80
80 16758
train_test_data done: 0.015974998474121094
RAM usage train_test_data:  72
length of train  sequence original 4
length of train  sequence original 25
length of train  sequence original 33
length of train  sequence original 21
length of train  sequence original 2
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
padded_train_test_data done: 6.878019571304321
RAM usage padded_train_test_data:  72
<BatchDataset shapes: ((10, 32, 64), (10,)), types: (tf.int32, tf.int64)>
vocab_size: 42
Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(10, 

In [2]:
# improved by 3 % - loss: 0.5840 - accuracy: 0.7875 - test_model(ablation=40, B=10, kernel_size=3, epochs=8, dense_neurons=2048, conv1d_set1=3,conv1d_set2=3, maxpool_1=True )
# when data is doubled 20 to 40  but batch remined same as 5 accuracy drooped by 3% 
# However when batch size  doubled from 5 to 10 accuracy improved by 3%  from the best score
# Let us double the data again
test_model(ablation=80, B=10, kernel_size=3, epochs=8, dense_neurons=2048, conv1d_set1=3,conv1d_set2=3, maxpool_1=True )

getting ablation data: 80
80 16758
train_test_data done: 0.015000343322753906
RAM usage train_test_data:  72
length of train  sequence original 4
length of train  sequence original 25
length of train  sequence original 33
length of train  sequence original 21
length of train  sequence original 2
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
padded_train_test_data done: 6.838227033615112
RAM usage padded_train_test_data:  72
<BatchDataset shapes: ((10, 32, 64), (10,)), types: (tf.int32, tf.int64)>
vocab_size: 42
Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(10, 

In [2]:
# not improving ,3% accuracy dropped. Increase the batch size 
test_model(ablation=40, B=10, kernel_size=3, epochs=8, dense_neurons=2048, conv1d_set1=3,conv1d_set2=3, maxpool_1=True )

getting ablation data: 40
40 16798
train_test_data done: 0.01399683952331543
RAM usage train_test_data:  72
length of train  sequence original 4
length of train  sequence original 25
length of train  sequence original 33
length of train  sequence original 21
length of train  sequence original 2
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
padded_train_test_data done: 6.6837499141693115
RAM usage padded_train_test_data:  72
<BatchDataset shapes: ((10, 32, 64), (10,)), types: (tf.int32, tf.int64)>
vocab_size: 42
Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(10, 

In [2]:
# not improving , now let us double the data
test_model(ablation=40, B=5, kernel_size=3, epochs=8, dense_neurons=2048, conv1d_set1=3,conv1d_set2=3, maxpool_1=True )

getting ablation data: 40
40 16798
train_test_data done: 0.015001535415649414
RAM usage train_test_data:  72
length of train  sequence original 4
length of train  sequence original 25
length of train  sequence original 33
length of train  sequence original 21
length of train  sequence original 2
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
padded_train_test_data done: 6.859712600708008
RAM usage padded_train_test_data:  72
<BatchDataset shapes: ((5, 32, 64), (5,)), types: (tf.int32, tf.int64)>
vocab_size: 42
Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(5, 32,

In [2]:
# increasing the epochs is not helping, let us increase the conv layer 1 more for each set1 and set 2
test_model(ablation=20, B=5, kernel_size=3, epochs=12, dense_neurons=2048, conv1d_set1=3,conv1d_set2=3, maxpool_1=True )

getting ablation data: 20
20 16818
train_test_data done: 0.016002416610717773
RAM usage train_test_data:  72
length of train  sequence original 4
length of train  sequence original 25
length of train  sequence original 33
length of train  sequence original 21
length of train  sequence original 2
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
padded_train_test_data done: 6.773735284805298
RAM usage padded_train_test_data:  72
<BatchDataset shapes: ((5, 32, 64), (5,)), types: (tf.int32, tf.int64)>
vocab_size: 42
Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(5, 32,

In [2]:
# so hierarchical conv improved the best result by 5% - 11ms/step - loss: 0.5922 - accuracy: 0.7500
# This is the setting - test_model(ablation=20, B=5, kernel_size=3, epochs=6, dense_neurons=2048, conv1d_set1=2,conv1d_set2=2, maxpool_1=True )
# let us now increase epochs 
test_model(ablation=20, B=5, kernel_size=3, epochs=12, dense_neurons=2048, conv1d_set1=2,conv1d_set2=2, maxpool_1=True )

getting ablation data: 20
20 16818
train_test_data done: 0.014966249465942383
RAM usage train_test_data:  72
length of train  sequence original 4
length of train  sequence original 25
length of train  sequence original 33
length of train  sequence original 21
length of train  sequence original 2
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
padded_train_test_data done: 6.63904070854187
RAM usage padded_train_test_data:  72
<BatchDataset shapes: ((5, 32, 64), (5,)), types: (tf.int32, tf.int64)>
vocab_size: 42
Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(5, 32, 

In [2]:
# increasing the conv1d_set did not help . lets include a maxpooling
test_model(ablation=20, B=5, kernel_size=3, epochs=6, dense_neurons=2048, conv1d_set1=2,conv1d_set2=2, maxpool_1=True )

getting ablation data: 20
20 16818
train_test_data done: 0.014998912811279297
RAM usage train_test_data:  72
length of train  sequence original 4
length of train  sequence original 25
length of train  sequence original 33
length of train  sequence original 21
length of train  sequence original 2
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
padded_train_test_data done: 6.645535230636597
RAM usage padded_train_test_data:  72
<BatchDataset shapes: ((5, 32, 64), (5,)), types: (tf.int32, tf.int64)>
vocab_size: 42
Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(5, 32,

In [2]:
# yes ,, the setting produced same result twice - test_model(ablation=20, B=5, kernel_size=3, epochs=6, dense_neurons= 2048 )
# lets increase one more layer of conv1d
test_model(ablation=20, B=5, kernel_size=3, epochs=6, dense_neurons=2048, conv1d_set1=2 )

getting ablation data: 20
20 16818
train_test_data done: 0.013999700546264648
RAM usage train_test_data:  72
length of train  sequence original 4
length of train  sequence original 25
length of train  sequence original 33
length of train  sequence original 21
length of train  sequence original 2
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
padded_train_test_data done: 6.768073558807373
RAM usage padded_train_test_data:  72
<BatchDataset shapes: ((5, 32, 64), (5,)), types: (tf.int32, tf.int64)>
vocab_size: 42
Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(5, 32,

In [3]:
# decreasing not helping , keeping it same as the best just to check consistency
test_model(ablation=20, B=5, kernel_size=3, epochs=6, dense_neurons= 2048 )

getting ablation data: 20
20 16818
train_test_data done: 0.016002178192138672
RAM usage train_test_data:  72
length of train  sequence original 4
length of train  sequence original 25
length of train  sequence original 33
length of train  sequence original 21
length of train  sequence original 2
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
padded_train_test_data done: 7.299583911895752
RAM usage padded_train_test_data:  72
<BatchDataset shapes: ((5, 32, 64), (5,)), types: (tf.int32, tf.int64)>
vocab_size: 42
Model: "functional_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(5, 32,

In [2]:
# increasing the dense_neuron did not help, now reduce 
test_model(ablation=20, B=5, kernel_size=3, epochs=6, dense_neurons= 1024 )

getting ablation data: 20
20 16818
train_test_data done: 0.016967058181762695
RAM usage train_test_data:  72
length of train  sequence original 4
length of train  sequence original 25
length of train  sequence original 33
length of train  sequence original 21
length of train  sequence original 2
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
padded_train_test_data done: 6.756180047988892
RAM usage padded_train_test_data:  72
<BatchDataset shapes: ((5, 32, 64), (5,)), types: (tf.int32, tf.int64)>
vocab_size: 42
Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(5, 32,

In [2]:
# increainsg the filers , kernel size not helping. lets try with dense neuron wuth the best result so far test_model(ablation=20, B=5, kernel_size=3, epochs=6)
test_model(ablation=20, B=5, kernel_size=3, epochs=6, dense_neurons= 4098 )

getting ablation data: 20
20 16818
train_test_data done: 0.01500082015991211
RAM usage train_test_data:  72
length of train  sequence original 4
length of train  sequence original 25
length of train  sequence original 33
length of train  sequence original 21
length of train  sequence original 2
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
padded_train_test_data done: 6.808279752731323
RAM usage padded_train_test_data:  72
<BatchDataset shapes: ((5, 32, 64), (5,)), types: (tf.int32, tf.int64)>
vocab_size: 42
Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(5, 32, 

In [3]:
# so far best score loss: 0.6172 - accuracy: 0.7000 is with test_model(ablation=20, B=5, kernel_size=3, epochs=6)
test_model(ablation=20, B=5, kernel_size=3, epochs=6, filters=82)

getting ablation data: 20
20 16818
train_test_data done: 0.015002012252807617
RAM usage train_test_data:  72
length of train  sequence original 4
length of train  sequence original 25
length of train  sequence original 33
length of train  sequence original 21
length of train  sequence original 2
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
padded_train_test_data done: 6.6594531536102295
RAM usage padded_train_test_data:  72
<BatchDataset shapes: ((5, 32, 64), (5,)), types: (tf.int32, tf.int64)>
vocab_size: 42
Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(5, 32

In [2]:
test_model(ablation=20, B=5, kernel_size=3, epochs=6, filters=128)

getting ablation data: 20
20 16818
train_test_data done: 0.015002727508544922
RAM usage train_test_data:  72
length of train  sequence original 4
length of train  sequence original 25
length of train  sequence original 33
length of train  sequence original 21
length of train  sequence original 2
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
padded_train_test_data done: 6.7468507289886475
RAM usage padded_train_test_data:  72
<BatchDataset shapes: ((5, 32, 64), (5,)), types: (tf.int32, tf.int64)>
vocab_size: 42
Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(5, 32

In [2]:
test_model(ablation=20, B=5, kernel_size=3, epochs=12)

getting ablation data: 20
20 16818
train_test_data done: 0.013999462127685547
RAM usage train_test_data:  72
length of train  sequence original 4
length of train  sequence original 25
length of train  sequence original 33
length of train  sequence original 21
length of train  sequence original 2
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
padded_train_test_data done: 6.646817684173584
RAM usage padded_train_test_data:  72
<BatchDataset shapes: ((5, 32, 64), (5,)), types: (tf.int32, tf.int64)>
vocab_size: 42
Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(5, 32,

In [2]:
test_model(ablation=20, B=5, kernel_size=3, epochs=6)

getting ablation data: 20
20 16818
train_test_data done: 0.015002012252807617
RAM usage train_test_data:  72
length of train  sequence original 4
length of train  sequence original 25
length of train  sequence original 33
length of train  sequence original 21
length of train  sequence original 2
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
padded_train_test_data done: 6.641399621963501
RAM usage padded_train_test_data:  72
<BatchDataset shapes: ((5, 32, 64), (5,)), types: (tf.int32, tf.int64)>
vocab_size: 42
Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(5, 32,

In [3]:
test_model(ablation=20, B=5, kernel_size=8, epochs=6)

getting ablation data: 20
20 16818
train_test_data done: 0.015009403228759766
RAM usage train_test_data:  72
length of train  sequence original 4
length of train  sequence original 25
length of train  sequence original 33
length of train  sequence original 21
length of train  sequence original 2
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
padded_train_test_data done: 6.722247838973999
RAM usage padded_train_test_data:  72
<BatchDataset shapes: ((5, 32, 64), (5,)), types: (tf.int32, tf.int64)>
vocab_size: 42
Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(5, 32,

In [3]:
test_model(ablation=20, B=5, kernel_size=8)

getting ablation data: 20
20 16818
train_test_data done: 0.0149993896484375
RAM usage train_test_data:  72
length of train  sequence original 4
length of train  sequence original 25
length of train  sequence original 33
length of train  sequence original 21
length of train  sequence original 2
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
padded_train_test_data done: 6.682743549346924
RAM usage padded_train_test_data:  72
<BatchDataset shapes: ((5, 32, 64), (5,)), types: (tf.int32, tf.int64)>
vocab_size: 42
Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(5, 32, 6

In [2]:
test_model(ablation=20, B=5, kernel_size=4)

getting ablation data: 20
20 16818
train_test_data done: 0.015000104904174805
RAM usage train_test_data:  72
length of train  sequence original 4
length of train  sequence original 25
length of train  sequence original 33
length of train  sequence original 21
length of train  sequence original 2
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
padded_train_test_data done: 6.96438455581665
RAM usage padded_train_test_data:  72
<BatchDataset shapes: ((5, 32, 64), (5,)), types: (tf.int32, tf.int64)>
vocab_size: 42
Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(5, 32, 

In [2]:
test_model(ablation=20, B=5, kernel_size=2)

getting ablation data: 20
20 16818
train_test_data done: 0.015002965927124023
RAM usage train_test_data:  72
length of train  sequence original 4
length of train  sequence original 25
length of train  sequence original 33
length of train  sequence original 21
length of train  sequence original 2
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
padded_train_test_data done: 6.9465086460113525
RAM usage padded_train_test_data:  72
<BatchDataset shapes: ((5, 32, 64), (5,)), types: (tf.int32, tf.int64)>
vocab_size: 42
Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(5, 32

In [2]:
test_model(ablation=20, B=5)

getting ablation data: 20
20 16818
train_test_data done: 0.014000654220581055
RAM usage train_test_data:  72
length of train  sequence original 4
length of train  sequence original 25
length of train  sequence original 33
length of train  sequence original 21
length of train  sequence original 2
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
padded_train_test_data done: 7.033799171447754
RAM usage padded_train_test_data:  72
<BatchDataset shapes: ((5, 32, 64), (5,)), types: (tf.int32, tf.int64)>
vocab_size: 42
Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(5, 32,

In [3]:
test_model(ablation=6, B=1)

getting ablation data: 6
6 16832
train_test_data done: 0.015001296997070312
RAM usage train_test_data:  72
length of train  sequence original 4
length of train  sequence original 25
length of train  sequence original 33
length of train  sequence original 21
length of train  sequence original 2
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
padded_train_test_data done: 6.7540202140808105
RAM usage padded_train_test_data:  72
<BatchDataset shapes: ((1, 32, 64), (1,)), types: (tf.int32, tf.int64)>
vocab_size: 42
Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(1, 32, 

In [3]:
test_model(ablation=100, B=10)

getting ablation data: 100
100 16738
train_test_data done: 0.014999866485595703
RAM usage train_test_data:  72
length of train  sequence original 4
length of train  sequence original 25
length of train  sequence original 33
length of train  sequence original 21
length of train  sequence original 2
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
padded_train_test_data done: 6.796327829360962
RAM usage padded_train_test_data:  72
<BatchDataset shapes: ((10, 32, 64), (10,)), types: (tf.int32, tf.int64)>
vocab_size: 42
Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(10

In [2]:
test_model(ablation=20, B=10)

getting ablation data: 20
20 16818
train_test_data done: 0.015000343322753906
RAM usage train_test_data:  72
length of train  sequence original 4
length of train  sequence original 25
length of train  sequence original 33
length of train  sequence original 21
length of train  sequence original 2
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
padded_train_test_data done: 6.722995281219482
RAM usage padded_train_test_data:  72
<BatchDataset shapes: ((10, 32, 64), (10,)), types: (tf.int32, tf.int64)>
vocab_size: 42
Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(10, 

In [16]:
test_model()

getting ablation data: 2000
2000 14838
train_test_data done: 0.028005361557006836
RAM usage train_test_data:  72
length of train  sequence original 4
length of train  sequence original 25
length of train  sequence original 33
length of train  sequence original 21
length of train  sequence original 2
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
length of train sequence padded 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
len of test seq after padding 32
padded_train_test_data done: 7.109444856643677
RAM usage padded_train_test_data:  72
<BatchDataset shapes: ((32, 32, 64), (32,)), types: (tf.int32, tf.int64)>
vocab_size: 42
Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(

In [23]:
len(y_train)

4000