In [1]:
import numpy as np
import tensorflow as tf

In [2]:
import os
from sklearn.model_selection import train_test_split


In [3]:
def load_data_from_folders(folders):
    data = []
    labels = []
    for label, folder in enumerate(folders):
        for filename in os.listdir(folder):
            if filename.endswith('.txt'):
                file_path = os.path.join(folder, filename)
                with open(file_path, 'r') as file:
                    numbers = []
                    for line in file:
                        
                        line_numbers = [float(num.strip()) for num in line.split(',')]
                        numbers.extend(line_numbers)
                    
                    if len(numbers) == 255:
                        data.append(numbers)
                        labels.append(label)
    return np.array(data), np.array(labels)


In [4]:
folders = ['B:/jupyter_notebook/keywords_spotting/class_1', 'B:/jupyter_notebook/keywords_spotting/class_2', 'B:/jupyter_notebook/keywords_spotting/class_3']
data, labels = load_data_from_folders(folders)


In [5]:
train_data, val_data, train_labels, val_labels = train_test_split(data, labels, test_size=0.2, random_state=42)


In [6]:
train_dataset = tf.data.Dataset.from_tensor_slices((train_data, tf.keras.utils.to_categorical(train_labels, num_classes=3)))
validation_dataset = tf.data.Dataset.from_tensor_slices((val_data, tf.keras.utils.to_categorical(val_labels, num_classes=3)))

BATCH_SIZE = 32
train_dataset = train_dataset.shuffle(buffer_size=len(train_data)).batch(BATCH_SIZE)
validation_dataset = validation_dataset.batch(BATCH_SIZE)


In [7]:

LEARNING_RATE = 0.005

In [8]:

model = tf.keras.models.Sequential([
    tf.keras.layers.Reshape((17, 15), input_shape=(255,)),  #  (17, 15)
    tf.keras.layers.Conv1D(8, kernel_size=3, padding='same', activation='relu'),
    tf.keras.layers.MaxPooling1D(pool_size=2, strides=2, padding='same'),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Conv1D(16, kernel_size=3, padding='same', activation='relu'),
    tf.keras.layers.MaxPooling1D(pool_size=2, strides=2, padding='same'),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(3, activation='softmax')
])


  super().__init__(**kwargs)


In [9]:

opt = tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE)

In [10]:
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

In [11]:
model.summary()

In [12]:

EPOCHS = 300
model.fit(train_dataset, epochs=EPOCHS, validation_data=validation_dataset, verbose=2)
# the following printed results are just for testing

Epoch 1/300
1/1 - 1s - 1s/step - accuracy: 0.2500 - loss: 10.7065 - val_accuracy: 0.3333 - val_loss: 6.5710
Epoch 2/300
1/1 - 0s - 18ms/step - accuracy: 0.3333 - loss: 8.7831 - val_accuracy: 0.3333 - val_loss: 3.5772
Epoch 3/300
1/1 - 0s - 19ms/step - accuracy: 0.3333 - loss: 5.4139 - val_accuracy: 0.3333 - val_loss: 1.4171
Epoch 4/300
1/1 - 0s - 17ms/step - accuracy: 0.2083 - loss: 5.5237 - val_accuracy: 0.5000 - val_loss: 1.1862
Epoch 5/300
1/1 - 0s - 17ms/step - accuracy: 0.2917 - loss: 3.2244 - val_accuracy: 0.5000 - val_loss: 1.6483
Epoch 6/300
1/1 - 0s - 15ms/step - accuracy: 0.4167 - loss: 3.3841 - val_accuracy: 0.5000 - val_loss: 1.9084
Epoch 7/300
1/1 - 0s - 14ms/step - accuracy: 0.4167 - loss: 3.0302 - val_accuracy: 0.5000 - val_loss: 1.9890
Epoch 8/300
1/1 - 0s - 16ms/step - accuracy: 0.4583 - loss: 2.9884 - val_accuracy: 0.5000 - val_loss: 1.9061
Epoch 9/300
1/1 - 0s - 19ms/step - accuracy: 0.2917 - loss: 2.4481 - val_accuracy: 0.5000 - val_loss: 1.7108
Epoch 10/300
1/1 - 0

<keras.src.callbacks.history.History at 0x2a029fda880>

In [14]:

model.save('B:/jupyter_notebook/keywords_spotting/keyword_model.h5')


