Data loading from wav files into a csv file


In [1]:
#import libraries
import numpy as np
from scipy.io import wavfile
import os
import pandas as pd

#classes
classes = ["ita", "eng", "por", "esp", "ara"]
sample_length = 60 * 16000 # 16kHz sampling of 60 seconds of audio

# Load the WAV file
folder = os.getcwd() + "/../rec/"
audio_files_list = os.listdir(folder)
print(len(audio_files_list))

audio_recs = np.zeros( (len(audio_files_list), sample_length), dtype="int16")
langs = []
print(audio_recs.shape)

for i, audio in enumerate(audio_files_list):
    sample_rate, audio_rec = wavfile.read(folder + audio)
    audio_recs[i] = audio_rec
    langs.append(audio[4:7])


# Create a dictionary with the structured data
data_dictionary = {
    'audio_raw_data': audio_recs.tolist(),
    'language': langs
}

'''
# Specify the data types for each column
data_types = {
    'audio_raw_data': 'object',
    'language': 'str'
}
'''
df = pd.DataFrame(data_dictionary)
# Save the DataFrame to a CSV file
df.to_csv('dataset0.csv', index=False)

print(df.head())


53
(53, 960000)
                                      audio_raw_data language
0  [14, -11, 4, -21, -16, -37, -22, 3, -43, -20, ...      ara
1  [8, 29, -4, -33, -44, -18, -51, -80, -7, -24, ...      ara
2  [26, 15, 10, 1, 11, 17, 17, 23, 26, 21, 14, 6,...      ara
3  [2324, 2316, 2304, 2312, 2300, 2329, 2356, 234...      ara
4  [671, 690, 711, 729, 726, 727, 754, 745, 764, ...      ara


In [2]:
window = 10
hop = 2
frequency = 16000

dataset = pd.read_csv("dataset0.csv")

audio_splits = np.empty((50 // hop * len(audio_files_list), frequency * 10), dtype="int16")
langs = []
split_index = 0


for index, sample in dataset.iterrows():
    #raw_data = np.array(literal_eval(sample["audio_raw_data"]), dtype="int16")
    raw_data = np.fromstring( (sample["audio_raw_data"].replace(' ', ''))[1:-1], dtype="int16", sep=',')
    print("file n: ", index, " with length ", len(raw_data))
    for time in range(0, 50 * frequency, hop * frequency):
        audio_splits[split_index] = raw_data[time : time+window*frequency]
        langs.append(sample["language"])
        split_index = split_index + 1
        

# Create a dictionary with the structured data
data_dictionary = {
    'audio_raw_data': audio_splits.tolist(),
    'language': langs
}

# Create a DataFrame from the dictionary
df = pd.DataFrame(data_dictionary)

# save new dataframes as csv for each language in the dataset
dataset_ita = df[df["language"] == "ita"]
print("computed ita: ", dataset_ita.shape)
dataset_ita.to_csv("dataset_ita.csv", index=False)
dataset_eng = df[df["language"] == "eng"]
print("computed eng: ", dataset_eng.shape)
dataset_eng.to_csv("dataset_eng.csv", index=False)

print("done")


file n:  0  with length  960000
file n:  1  with length  960000
file n:  2  with length  960000
file n:  3  with length  960000
file n:  4  with length  960000
file n:  5  with length  960000
file n:  6  with length  960000
file n:  7  with length  960000
file n:  8  with length  960000
file n:  9  with length  960000
file n:  10  with length  960000
file n:  11  with length  960000
file n:  12  with length  960000
file n:  13  with length  960000
file n:  14  with length  960000
file n:  15  with length  960000
file n:  16  with length  960000
file n:  17  with length  960000
file n:  18  with length  960000
file n:  19  with length  960000
file n:  20  with length  960000
file n:  21  with length  960000
file n:  22  with length  960000
file n:  23  with length  960000
file n:  24  with length  960000
file n:  25  with length  960000
file n:  26  with length  960000
file n:  27  with length  960000
file n:  28  with length  960000
file n:  29  with length  960000
file n:  30  with le

## Pre-processing data wtih tensorflow

In [55]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

print("TensorFlow version:", tf.__version__)

# Function to compute e^x with a parameter `a`
def func_e_x(x, a):
    return np.exp(a * x)

# training set size
n = 100

# Length of each vector
vector_length = 50

# Parameters for the function e^x
parameters = np.random.normal(0, 1, n)

vector = np.linspace(0, 1, vector_length)

# Generate n vectors of length 50 with e^x function using different parameters
vectors = np.array([func_e_x(vector, a) for a in parameters])

# Labels to compute the average of the input vector
labels = np.array([np.sign(np.mean(vec) - 1.0) for vec in vectors])
labels_onehot = np.zeros((labels.size, 2))

for i, y in enumerate(labels):
    labels_onehot[i] = [1, 0] if y == 1 else [0, 1]

print(vectors.shape)
print(labels_onehot.shape)


TensorFlow version: 2.12.0
(100, 50)
(100, 2)


In [47]:

print(labels_onehot)
print(labels_onehot.shape)

[[0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]]
(100, 2)


In [56]:
rows = 10
cols = 5

# tutorial says: tf.Module
class CustomModel(tf.keras.Model):

	def __init__(self):
		super(CustomModel, self).__init__()

		self.model = tf.keras.Sequential([
			
				tf.keras.layers.Flatten(input_shape=(rows, cols), name='flatten'),
				tf.keras.layers.Dense(32, activation='relu', name='dense_1'),
				tf.keras.layers.Dense(2, activation='softmax', name='output')
		])

		# self.model.compile(optimizer='sgd', loss='mean_squared_error')
		self.model.summary()

	def call(self, inputs):
        # Define the forward pass of your model
		return self.model(inputs)

	@tf.function(input_signature=[
		tf.TensorSpec([None, rows, cols], dtype=tf.float32)
	])
	def infer(self, x):
		output = self.model(x)
		return {
			"output": output
		}

	@tf.function(input_signature=[
		tf.TensorSpec([None, vector_length], dtype=tf.float32)
	])
	def preprocess(self, x):
		cosines = tf.raw_ops.Cosh(x=x)
		matrix = tf.raw_ops.Reshape(tensor=cosines, shape=[-1, rows, cols])
		return { "flatten_input": matrix }


In [57]:
NUM_EPOCHS = 5
BATCH_SIZE = 5

# Convert NumPy arrays to TensorFlow Dataset
train_x = tf.convert_to_tensor(vectors, dtype=tf.float32)
train_y = tf.convert_to_tensor(labels_onehot, dtype=tf.float32)

model = CustomModel()
train_x_preprocess = model.preprocess(train_x)

model.compile(optimizer="sgd", loss="categorical_crossentropy", metrics=['accuracy'])
model.fit(x = train_x_preprocess, y = train_y, batch_size = BATCH_SIZE, epochs = NUM_EPOCHS, validation_split=0.2)


Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 50)                0         
                                                                 
 dense_1 (Dense)             (None, 32)                1632      
                                                                 
 output (Dense)              (None, 2)                 66        
                                                                 
Total params: 1,698
Trainable params: 1,698
Non-trainable params: 0
_________________________________________________________________




Epoch 1/5


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x23bfa6b5e90>

In [65]:

tf.saved_model.save(model, "models",
    signatures = {
        'infer': model.infer.get_concrete_function(),
        'preprocess': model.preprocess.get_concrete_function(),
    })

# Convert the model
converter = tf.lite.TFLiteConverter.from_saved_model("models")
converter.target_spec.supported_ops = [
    tf.lite.OpsSet.TFLITE_BUILTINS,  # enable TensorFlow Lite ops.
    tf.lite.OpsSet.SELECT_TF_OPS  # enable TensorFlow ops.
]

converter.experimental_enable_resource_variables = True
tflite_model = converter.convert()




INFO:tensorflow:Assets written to: models\assets


INFO:tensorflow:Assets written to: models\assets


In [66]:
# Save the model.
with open('model.tflite', 'wb') as f:
	f.write(tflite_model)


In [67]:
interpreter = tf.lite.Interpreter(model_content=tflite_model)
interpreter.allocate_tensors()

preprocessing = interpreter.get_signature_runner("preprocess")
inference = interpreter.get_signature_runner("infer")

input = preprocessing(x = train_x)
result = inference(x= input)

predictions = np.argmax(result["output"], axis=1)
true_labels = np.argmax(train_y)

print(predictions)
print(true_labels)

RuntimeError: 