In [1]:
from util import *

2023-12-10 23:53:46.931799: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-10 23:53:46.931870: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-10 23:53:46.934632: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-10 23:53:46.947248: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
train_dir = 'Dataset/train_clean.txt'
val_dir = 'Dataset/val_clean.txt'
test_dir = 'Dataset/test_clean.txt'

In [3]:
with open(train_dir, 'r', encoding='utf8') as f:
    train = f.read()
with open(val_dir, 'r', encoding='utf8') as f:
    val = f.read()
with open(test_dir, 'r', encoding='utf8') as f:
    test = f.read()

In [4]:
with open('pkl_dir/train_no_diacritics.txt', 'rb') as f:
    train_no_diacritics = pickle.load(f)
with open('pkl_dir/train_labels.txt', 'rb') as f:
    train_labels = pickle.load(f)
with open('pkl_dir/val_no_diacritics.txt', 'rb') as f:
    val_no_diacritics = pickle.load(f)
with open('pkl_dir/val_labels.txt', 'rb') as f:
    val_labels = pickle.load(f)
with open('pkl_dir/test_no_diacritics.txt', 'rb') as f:
    test_no_diacritics = pickle.load(f)
with open('pkl_dir/test_labels.txt', 'rb') as f:
    test_labels = pickle.load(f)

In [5]:
max_len = 600

train_sentences, train_diacritics = extract_sentences(train_no_diacritics, train_labels, max_len)
val_sentences, val_diacritics = extract_sentences(val_no_diacritics, val_labels, max_len)
test_sentences, test_diacritics = extract_sentences(test_no_diacritics, test_labels, max_len)

In [6]:
test_len = 10000
np.random.seed(42)
indices = np.arange(len(test_sentences))
np.random.shuffle(indices)
test_sentences = test_sentences[indices]
test_diacritics = test_diacritics[indices]
test_sentences = test_sentences[:test_len]
test_diacritics = test_diacritics[:test_len]

In [7]:
sentence_encoder = LabelEncoder().fit(train_sentences.flatten())
X_train = sentence_encoder.transform(train_sentences.flatten()).reshape(train_sentences.shape)
X_val = sentence_encoder.transform(val_sentences.flatten()).reshape(val_sentences.shape)
X_test = sentence_encoder.transform(test_sentences.flatten()).reshape(test_sentences.shape)

In [8]:
del train_sentences, val_sentences, test_sentences
del train_no_diacritics, val_no_diacritics, test_no_diacritics

In [9]:
label_encoder = LabelEncoder().fit(train_diacritics.flatten())
y_train = label_encoder.transform(train_diacritics.flatten()).reshape(train_diacritics.shape)
y_val = label_encoder.transform(val_diacritics.flatten()).reshape(val_diacritics.shape)
y_test = label_encoder.transform(test_diacritics.flatten()).reshape(test_diacritics.shape)

y_train = 9 * y_train[:, :, 0] + y_train[:, :, 1]
y_val = 9 * y_val[:, :, 0] + y_val[:, :, 1]
y_test = 9 * y_test[:, :, 0] + y_test[:, :, 1]

label_encoder2 = LabelEncoder().fit(y_train.flatten())
y_train = label_encoder2.transform(y_train.flatten()).reshape(y_train.shape)
y_val = label_encoder2.transform(y_val.flatten()).reshape(y_val.shape)
y_test = label_encoder2.transform(y_test.flatten()).reshape(y_test.shape)

In [10]:
del train_diacritics, val_diacritics, test_diacritics

In [11]:
def conv1d(inputs, kernel_size, channels, activation, is_training, scope):
    with tf.name_scope(scope):
        # Use TensorFlow 2.x Conv1D layer
        conv1d_output = Conv1D(
            filters=channels,
            kernel_size=kernel_size,
            activation=None,  # Activation will be applied separately
            padding='same')(inputs)
        
        # Batch normalization
        conv1d_output = BatchNormalization()(conv1d_output, training=is_training)
        
        # Activation function
        conv1d_output = Activation(activation)(conv1d_output)

    return conv1d_output

In [12]:
def highwaynet(inputs, scope, units=128, activation1=tf.nn.relu, activation2=tf.nn.sigmoid):
    with tf.name_scope(scope):
        H = tf.keras.layers.Dense(
            units=units,
            activation=tf.nn.relu,
            name='H')(inputs)
        T = tf.keras.layers.Dense(
            units=units,
            activation=tf.nn.sigmoid,
            name='T',
            bias_initializer=tf.constant_initializer(-1.0))(inputs)
    return H * T + inputs * (1.0 - T)

In [13]:
def cbhg(inputs, input_lengths, scope, K, projections, gru_size):
    with tf.name_scope(scope):
        # Convolution bank
        conv_outputs = tf.concat([Conv1D(gru_size, k, activation='relu', padding='same', name=f'conv1d_{k}')(inputs) for k in range(1, K + 1)], axis=-1)

        # Maxpooling
        maxpool_output = MaxPooling1D(pool_size=2, strides=1, padding='same')(conv_outputs)

        # Two projection layers
        proj1_output = Conv1D(projections[0], 3, activation='relu', padding='same', name='proj_1')(maxpool_output)
        proj2_output = Conv1D(projections[1], 3, padding='same', name='proj_2')(proj1_output)

        # Residual connection
        highway_input = proj2_output + inputs

        # Handle dimensionality mismatch
        if highway_input.shape[2] != gru_size:
            highway_input = Dense(gru_size)(highway_input)

        # 4-layer HighwayNet
        for i in range(4):
            highway_input = highwaynet(highway_input, f'highway_{i + 1}')
        rnn_input = highway_input

        # Bidirectional RNN
        outputs = Bidirectional(GRU(gru_size, return_sequences=True))(rnn_input, mask=tf.sequence_mask(input_lengths))

        return outputs

In [14]:
from tensorflow.keras.layers import Layer, Conv1D, Dense, MaxPooling1D, Bidirectional, GRU
import tensorflow as tf
tf.config.run_functions_eagerly(True)
class CbhgLayer(Layer):
    def __init__(self, K, projections, gru_size, **kwargs):
        super(CbhgLayer, self).__init__(**kwargs)
        self.K = K
        self.projections = projections
        self.gru_size = gru_size
        self.conv_layers = [Conv1D(self.gru_size, k, activation='relu', padding='same', name=f'conv1d_{k}') for k in range(1, self.K + 1)]

    @tf.function
    def call(self, inputs, mask=None):
        # Convolution bank
        conv_outputs = tf.concat([layer(inputs) for layer in self.conv_layers], axis=-1)

        # Maxpooling
        maxpool_output = MaxPooling1D(pool_size=2, strides=1, padding='same')(conv_outputs)

        # Two projection layers
        proj1_output = Conv1D(self.projections[0], 3, activation='relu', padding='same', name='proj_1')(maxpool_output)
        proj2_output = Conv1D(self.projections[1], 3, padding='same', name='proj_2')(proj1_output)

        # Residual connection
        highway_input = proj2_output + inputs

        # Handle dimensionality mismatch
        if highway_input.shape[2] != self.gru_size:
            highway_input = Dense(self.gru_size)(highway_input)

        # 4-layer HighwayNet
        for i in range(4):
            highway_input = highwaynet(highway_input, f'highway_{i + 1}')
        rnn_input = highway_input

        # Bidirectional RNN
        outputs = Bidirectional(GRU(self.gru_size, return_sequences=True))(rnn_input, mask=mask)

        return outputs

    def compute_output_shape(self, input_shape):
        return (input_shape[0], input_shape[1], 2 * self.gru_size)  # Adjust this based on your output shape


In [15]:
model = Sequential()
model.add(Embedding(len(sentence_encoder.classes_), 512, input_length=max_len))
model.add(TimeDistributed(Dense(512, activation='relu')))
model.add(Dropout(0.5))
model.add(TimeDistributed(Dense(128, activation='relu')))
model.add(Dropout(0.5))

model.add(CbhgLayer(K=16, projections=[128, 128], gru_size=128))

model.add(TimeDistributed(Dense(256, activation='relu')))
model.add(TimeDistributed(Dense(np.unique(y_train).shape[0], activation='softmax')))

model.summary()

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

2023-12-10 23:54:10.648212: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-12-10 23:54:10.763153: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-12-10 23:54:10.763211: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-12-10 23:54:10.764739: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-12-10 23:54:10.764893: I external/local_xla/xla/stream_executor

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 600, 512)          28672     
                                                                 
 time_distributed (TimeDist  (None, 600, 512)          262656    
 ributed)                                                        
                                                                 
 dropout (Dropout)           (None, 600, 512)          0         
                                                                 
 time_distributed_1 (TimeDi  (None, 600, 128)          65664     
 stributed)                                                      
                                                                 
 dropout_1 (Dropout)         (None, 600, 128)          0         
                                                                 
 cbhg_layer (CbhgLayer)      (None, 600, 256)          2

In [16]:
history = model.fit(X_train, tf.keras.utils.to_categorical(y_train), validation_data=(X_val, tf.keras.utils.to_categorical(y_val)), batch_size=64, epochs=5)

2023-12-10 23:54:13.942649: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 1469772000 exceeds 10% of free system memory.
2023-12-10 23:54:15.362027: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 1469772000 exceeds 10% of free system memory.


Epoch 1/5


2023-12-10 23:54:16.848368: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8904
2023-12-10 23:54:17.083755: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory


