In [3]:
from google.colab import drive
drive.mount('/content/gdrive')
gpath = '/content/gdrive/My Drive/MLdata/'
import os
os.path.exists(gpath)
# os.chdir(gpath)
#############End of colab code ###################
# Import the environment
print(os.listdir(gpath))
dpath = os.path.join(gpath, 'padded_4000_train_test.pkl')
print(os.path.exists(dpath))
tkpath = os.path.join(gpath, 'tk.pkl')
print(os.path.exists(tkpath))

Mounted at /content/gdrive
['padded_4000_train_test.pkl', 'tk.pkl']
True
True


In [8]:
def test_model(ablation=1000, B=32, epochs=3, filters=64, kernel_size=3, dense_neurons=2048, 
              conv1d_set1=1, conv1d_set2=1, maxpool_1=False):
    
    import tensorflow as tf
    import numpy as np
    tf.random.set_seed(123)
    import pickle
    with open(dpath, 'rb') as f:
      padded_4000_train_test = pickle.load(f)    
    x_train, y_train, x_test, y_test = padded_4000_train_test
    train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
    train_data = train_data.shuffle(buffer_size=y_train.shape[0]).batch(B, drop_remainder=True)
    print(train_data)
    
    test_data = tf.data.Dataset.from_tensor_slices((x_test, y_test))
    test_data = test_data.shuffle(buffer_size=y_test.shape[0]).batch(B, drop_remainder=True)
    print(test_data)
    
    
    # tk = hdfslogs.tk  
    with open(tkpath, 'rb') as f:
      tk = pickle.load(f)  
    vocab_size = len(tk.word_index)
    vocab_size = len(tk.word_index)
    print(f'vocab_size: {vocab_size}')
    char_onehot = vocab_size
    
    embedding_weights = []
    embedding_weights.append(np.zeros(vocab_size))
    for char, i in tk.word_index.items(): # from 1 to 51
        onehot = np.zeros(vocab_size)
        onehot[i-1] = 1
        embedding_weights.append(onehot)
    embedding_weights = np.array(embedding_weights)
    
    input_size = [x_train.shape[1], x_train.shape[2]]
    embedding_size = vocab_size

    embedding_layer = tf.keras.layers.Embedding(vocab_size+1,
                                                embedding_size,
                                                input_length=input_size,
                                                weights = [embedding_weights])
    
    
    
    
    inputs = tf.keras.layers.Input(batch_shape=(B, x_train.shape[1], x_train.shape[2]), dtype='float64' )
    x = tf.keras.layers.Embedding(input_dim=vocab_size+1,
                                    output_dim=embedding_size,
                                    input_length=x_train.shape[2],
                                    weights = [embedding_weights],
                                    )(inputs)
    for _ in range(conv1d_set1):
        x = tf.keras.layers.Conv1D(filters=filters, kernel_size=kernel_size, padding='same')(x)
    if maxpool_1:
        x = tf.keras.layers.MaxPooling2D(pool_size=(1, x_train.shape[2]))(x)
        x = tf.reshape(x, (B, x_train.shape[1], filters))        
        for _ in range(conv1d_set2):
            x = tf.keras.layers.Conv1D(filters=filters, kernel_size=kernel_size, padding='same')(x)
        x = tf.keras.layers.MaxPooling1D(pool_size=(x_train.shape[1]) )(x)
        x = tf.reshape(x, (B, filters))
    if not maxpool_1:
        x = tf.keras.layers.Flatten()(x)       
    x = tf.keras.layers.Dense(dense_neurons)(x)
    outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    print(model.summary())
    model.compile(optimizer='adam', 
                  loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])
    hist = model.fit(train_data, validation_data=test_data, epochs=epochs) 
    return model, hist

In [9]:
test_model(B=250, kernel_size=3, epochs=16, dense_neurons=2048, conv1d_set1=3,conv1d_set2=3, maxpool_1=True )

<BatchDataset shapes: ((250, 64, 256), (250,)), types: (tf.int32, tf.int64)>
<BatchDataset shapes: ((250, 64, 256), (250,)), types: (tf.int32, tf.int64)>
vocab_size: 42
Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(250, 64, 256)]          0         
                                                                 
 embedding_5 (Embedding)     (250, 64, 256, 42)        1806      
                                                                 
 conv1d_12 (Conv1D)          (250, 64, 256, 64)        8128      
                                                                 
 conv1d_13 (Conv1D)          (250, 64, 256, 64)        12352     
                                                                 
 conv1d_14 (Conv1D)          (250, 64, 256, 64)        12352     
                                                                 
 max_pooling2d_2 (MaxP

  return dispatch_target(*args, **kwargs)


Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16


(<keras.engine.functional.Functional at 0x7f0eeecbbe90>,
 <keras.callbacks.History at 0x7f0eeed27650>)