In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
from tensorflow import nn
from tensorflow.keras.activations import softmax
from keras import layers
from tensorflow.keras.layers import Dense,LayerNormalization ## alternative for nn.linear
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

2023-12-04 18:04:46.623802: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-12-04 18:04:46.800214: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-04 18:04:46.800288: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-04 18:04:46.824698: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-04 18:04:46.882282: I tensorflow/core/platform/cpu_feature_guar

In [2]:
# X_data = np.load("../../data/data_mfcc.npy")
X_data = np.load("../../data/data_40_500.npy")
X_data = np.transpose(X_data, (0, 2, 1))
# X_data=X_data[:100]
# X_data = np.expand_dims(X_data, axis=-1)
print(X_data.shape)




(13100, 500, 40)


In [3]:
data = pd.read_csv(
    "../../data/LJSpeech-1.1/metadata.csv",
    sep="|",
    header=None,
    names=["ID", "Text1", "Text2"],
)
texts = data["Text1"].to_list()
ID = data["ID"].to_list()
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)
num_classes = len(tokenizer.word_index) + 1  # Add 1 for the padding token
sequences = tokenizer.texts_to_sequences(texts)
Y_data = pad_sequences(sequences, padding="post", maxlen=30)
# Y_data=Y_data[:100]
print(num_classes)
print(Y_data.shape)

14518
(13100, 30)


In [4]:
def create_self_attention_mask(sequence_length):
    mask = np.tril(np.ones((sequence_length,sequence_length)))
    mask[mask==0]=-np.inf
    mask[mask==1]=0
    return mask

In [5]:
test = create_self_attention_mask(5)
test

array([[  0., -inf, -inf, -inf, -inf],
       [  0.,   0., -inf, -inf, -inf],
       [  0.,   0.,   0., -inf, -inf],
       [  0.,   0.,   0.,   0., -inf],
       [  0.,   0.,   0.,   0.,   0.]])

In [6]:
def scaled_dot_product(q, k, v, mask):
    d_k = tf.cast(tf.shape(k)[-1], tf.float32)
    scaled_qk = tf.matmul(q, k, transpose_b=True) / tf.math.sqrt(d_k)

    if mask is not None:
        scaled_qk += mask

    attention_weights = tf.nn.softmax(scaled_qk)
    output = tf.matmul(attention_weights, v)
    return output, attention_weights

In [7]:
q=[[1.0,2.0,1.0],[1.0,1.0,1.0],[1.0,1.0,1.0]]
k=[[1.0,3.0,1.0],[1.0,1.0,1.0],[1.0,1.0,1.0]]
v=[[1.0,1.0,1.0],[1.0,1.0,1.0],[1.0,1.0,1.0]]
mask_1=create_self_attention_mask(3)
test,weights = scaled_dot_product(q,k,v,mask=mask_1)
print("output is\n",test)
print("attention weights is \n",weights)

2023-12-04 18:04:49.958350: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-12-04 18:04:50.070588: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-12-04 18:04:50.070768: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-

output is
 tf.Tensor(
[[1.         1.         1.        ]
 [0.99999994 0.99999994 0.99999994]
 [1.         1.         1.        ]], shape=(3, 3), dtype=float32)
attention weights is 
 tf.Tensor(
[[1.         0.         0.        ]
 [0.7603684  0.23963155 0.        ]
 [0.6133826  0.19330868 0.19330868]], shape=(3, 3), dtype=float32)


In [8]:
class PositionalEncoding(tf.keras.layers.Layer):
    def __init__(self, d_model, max_sequence_length):
        super(PositionalEncoding, self).__init__()
        self.max_sequence_length = max_sequence_length
        self.d_model = d_model

    
    def call(self, inputs):
        even_i = tf.range(0, self.d_model, 2, dtype=tf.float32)
        denominator = tf.pow(10000.0, even_i / self.d_model)
        position = tf.reshape(
            tf.range(self.max_sequence_length, dtype=tf.float32),
            (1, self.max_sequence_length, 1),
        )
        even_PE = tf.sin(position / denominator)
        odd_PE = tf.cos(position / denominator)
        stacked = tf.stack([even_PE, odd_PE], axis=2)
        PE = tf.reshape(stacked, (1, self.max_sequence_length, -1))
        print("postional encoding output shape",PE.shape)
        return PE

In [9]:
class ConvolutionalLayer(tf.keras.layers.Layer):
    def __init__(self, input_shape, filters=64, kernel_size=3, **kwargs):
        super(ConvolutionalLayer, self).__init__(**kwargs)
        self.filters = filters
        self.kernel_size = kernel_size

        self.conv1 = layers.Conv1D(filters=self.filters, kernel_size=self.kernel_size, padding="same", trainable=True)
        self.batch_norm1 = layers.BatchNormalization()
        self.relu1 = layers.ReLU()

        self.conv2 = layers.Conv1D(filters=self.filters, kernel_size=self.kernel_size, padding="same",trainable=True)
        self.batch_norm2 = layers.BatchNormalization()
        self.relu2 = layers.ReLU()

        self.global_avg_pooling = layers.GlobalAveragePooling1D()

    
    def call(self, inputs, training=None, mask=None):
        conv1_out = self.relu1(self.batch_norm1(self.conv1(inputs), training=training))
        conv2_out = self.relu2(self.batch_norm2(self.conv2(conv1_out), training=training))
        gap_out = self.global_avg_pooling(conv2_out)
        if training:
            print("CNN output shape is  ", gap_out.shape)
        return gap_out

In [10]:
class Transformer(tf.keras.Model):
    def __init__(self, d_model, max_input_length):
        super(Transformer, self).__init__()
        self.d_model = d_model
        self.cnn_layer = ConvolutionalLayer(input_shape=(None, 500,40))  # Adjust input shape

    def call(self, inputs, training=None):
        cnn_output = self.cnn_layer(inputs)
        return cnn_output


In [11]:
model = tf.keras.Sequential([Transformer(d_model=20,max_input_length=500),layers.Dense(30)])
# model.build(input_shape=(None, 500, 20))

In [12]:
from sklearn.model_selection import train_test_split

# Assuming X_data and Y_data are your input features and labels
X_train, X_val, Y_train, Y_val = train_test_split(X_data, Y_data, test_size=0.2, random_state=42)

print(X_train.shape)
print(X_val.shape)

print(Y_train.shape)
print(Y_val.shape)

(10480, 500, 40)
(2620, 500, 40)
(10480, 30)
(2620, 30)


In [13]:
optim = tf.keras.optimizers.Adam(learning_rate=5)
model.compile(optimizer=optim, loss='mean_squared_error',metrics=['mae'])
model.build(input_shape=(13100,500,40))  # Replace your_input_shape with the actual input shape
model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 transformer (Transformer)   (13100, 64)               20608     
                                                                 
 dense (Dense)               (13100, 30)               1950      
                                                                 
Total params: 22558 (88.12 KB)
Trainable params: 22302 (87.12 KB)
Non-trainable params: 256 (1.00 KB)
_________________________________________________________________


In [14]:
def loss_function(y_true, y_pred):
    return tf.reduce_mean(tf.square(y_true - y_pred))

In [15]:
model.fit(X_train,Y_train,epochs=1,validation_data=(X_val, Y_val))

CNN output shape is   (None, 64)
CNN output shape is   (None, 64)
CNN output shape is   (None, 64)


2023-12-04 18:04:53.374550: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8904
2023-12-04 18:04:53.529594: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2023-12-04 18:04:54.215319: I external/local_xla/xla/service/service.cc:168] XLA service 0x7fb0ac2c53d0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-12-04 18:04:54.215346: I external/local_xla/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce GTX 1650, Compute Capability 7.5
2023-12-04 18:04:54.224553: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1701693294.316071    4774 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.




<keras.src.callbacks.History at 0x7fb0b9bd2e80>

In [16]:
prediction = model.predict(X_val)
print("prediction ",prediction[0])
print("actual ",Y_val[0])

prediction  [ 750.16785  1041.0319   1354.0059   1569.097    1295.1432   1371.7994
  881.2695   1193.3322   1107.3003   1065.8328   1113.4763    995.64655
  908.2389   1191.3553    801.30145   952.7885    814.50995   530.91705
  261.3897    335.10663   565.9772    286.17395   182.61465   128.91176
  173.74344    62.93481   112.46759   -20.164324  -76.85158    14.321409]
actual  [  18    8  655    1 3689   19  247   60 1007   15  269   22  712    1
 1137  164   11    4 3652    3   16    3   16    0    0    0    0    0
    0    0]
