In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import pandas as pd
import os

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
def string_to_tensor(str_to_convert):

  char_set = "abcçdefgğhıijklmnoöprsştuüvyz "

  y_list = []

  for char in str_to_convert:
    y_list.append(char_set.index(char))

  tensor = tf.convert_to_tensor(y_list)

  return tf.reshape(tensor, shape=(1, 1, -1))

In [None]:
def tensor_to_string(tensor_to_convert):

  char_set = "abcçdefgğhıijklmnoöprsştuüvyz "

  result = ""

  for timestep in tensor_to_convert[0]:

    char_index = tf.argmax(timestep, axis=0)

    if(char_index != 30):
      result += char_set[char_index]

  return result

In [None]:
def filter_non_chars(output_tensor, max_len):

  filtered_list = []

  for timestep in output_tensor[0]:
    if(tf.argmax(timestep, axis=0) != 30):
      filtered_list.append(timestep)

  result = tf.convert_to_tensor(filtered_list, dtype=tf.float32)
  result = result[0:max_len:1]
  result = tf.reshape(result, (1, max_len, len(output_tensor[0][0])))

  return result

In [None]:
def read_input_csv(file_name):
  x_train = pd.read_csv(os.getcwd() + f"/drive/MyDrive/data/input/{file_name}")
  x_train = tf.reshape(x_train, shape=(1, x_train.shape[0], -1))

  return x_train

In [None]:
x_train = []
y_train = []


for i in range(8):

  x_train.append(read_input_csv(f"{i}_audio.csv"))

  with open(os.getcwd() + f"/drive/MyDrive/data/output/{i}_label.txt", "r") as f:
    y_train.append(string_to_tensor(f.readline()))

In [None]:
def create_model() -> keras.Model:
  inputs = keras.Input(shape=(None, 32))
  x = layers.LSTM(1024, return_sequences=True, kernel_regularizer=keras.regularizers.L1(0.01))(inputs)
  x = layers.Dropout(.1)(x)
  x = layers.LSTM(1024, return_sequences=True, kernel_regularizer=keras.regularizers.L1(0.01))(x)
  x = layers.Dropout(.11)(x)
  x = layers.LSTM(2048, return_sequences=True, kernel_regularizer=keras.regularizers.L1(0.01))(x)
  x = layers.Dropout(.1)(x)
  outputs = layers.Dense(31, activation="softmax")(x)

  return keras.Model(inputs=inputs, outputs=outputs)

model = create_model()

print(model.summary())

# x_train = tf.constant([[[1,20,3], [4, 11, 56], [4, 5, 7], [87, 10, 0]]], dtype=tf.float32)

# y_real = tf.constant([[[11, 7]]])

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, None, 32)]        0         
                                                                 
 lstm (LSTM)                 (None, None, 1024)        4329472   
                                                                 
 dropout (Dropout)           (None, None, 1024)        0         
                                                                 
 lstm_1 (LSTM)               (None, None, 1024)        8392704   
                                                                 
 dropout_1 (Dropout)         (None, None, 1024)        0         
                                                                 
 lstm_2 (LSTM)               (None, None, 2048)        25174016  
                                                                 
 dropout_2 (Dropout)         (None, None, 2048)        0     

In [None]:
optimizer = keras.optimizers.Adam(learning_rate=0.0001)
loss_func = tf.losses.SparseCategoricalCrossentropy()
epochs = 300

for epoch in range(epochs):

  for i in range(len(x_train)):

    with tf.GradientTape() as tape:

      y_pred = model(x_train[i], training=True)

      loss = loss_func(y_train[i], filter_non_chars(y_pred, len(y_train[i][0][0])))

    gradients = tape.gradient(loss, model.trainable_weights)
    optimizer.apply_gradients(zip(gradients, model.trainable_weights))


  print(f"Epoch {epoch + 1} : loss = {loss}")



Epoch 1 : loss = 3.0610334873199463
Epoch 2 : loss = 3.077976703643799
Epoch 3 : loss = 3.0357868671417236
Epoch 4 : loss = 3.0282044410705566
Epoch 5 : loss = 3.025486469268799
Epoch 6 : loss = 3.012667179107666
Epoch 7 : loss = 3.00700306892395
Epoch 8 : loss = 3.0055713653564453
Epoch 9 : loss = 3.0142390727996826
Epoch 10 : loss = 3.0189292430877686
Epoch 11 : loss = 3.013176441192627
Epoch 12 : loss = 3.007023334503174
Epoch 13 : loss = 2.9980320930480957
Epoch 14 : loss = 2.9905247688293457
Epoch 15 : loss = 3.0080766677856445
Epoch 16 : loss = 3.01528263092041
Epoch 17 : loss = 3.0198886394500732
Epoch 18 : loss = 2.9914822578430176
Epoch 19 : loss = 2.969522476196289
Epoch 20 : loss = 2.9989712238311768
Epoch 21 : loss = 2.9752888679504395
Epoch 22 : loss = 2.951422929763794
Epoch 23 : loss = 2.955664873123169
Epoch 24 : loss = 2.9417574405670166
Epoch 25 : loss = 2.9260799884796143
Epoch 26 : loss = 2.9390547275543213
Epoch 27 : loss = 2.923968553543091
Epoch 28 : loss = 2.924

In [None]:
x_test = read_input_csv(f"{10}_audio.csv")

print(tensor_to_string(model.predict(x_test)))

üsiiişşşşşııııııııııııaaaaaaaaşşşşşşşşşşıııııııııııııııııııııııııııışşşşşşşşşııııııııııışıııııııııııııııırrrrşşşşşşıııııııııışşşşşşşşşşşşşııııııırrraaaıııııııııııbbbbrrrşşaııııııııııııışşşşşşşşşıııbbbbbbraaaaaaııııııbbbbbbbrrrbbşşşşşşııııııııııııııııbbbbbbbbbbııııııııbbbbbbbaaaaaııııııııııırrrrrşşşşşşşşşııııııııııııbşşşaaaııııııbbbrrıııııııııııııırryyyyyyyyyybbbbşşşşrrrrrrşşııııııııııııbbbbrraaaaıııııııbbbbrrrrrrrbbbbbbşşaıııııırrrrrrrffşşşşşııııbbbbbbrrrrraaaııııışrrrrrrççççııııııııyyybbbbbbbbbbaaaaaaıııırrrrrrrrfbııbbbbbbbbrrrraaaaaaaybbbbbbbbbççaaaaııııbbbbffffffaaıııııbbbffrrrraaaa


1
