Data loading from wav files into a csv file


In [54]:
#import libraries
import numpy as np
from scipy.io import wavfile
import os
import pandas as pd

#classes
classes = ["ita", "eng", "por", "esp", "ara"]
sample_length = 60 * 16000 # 16kHz sampling of 60 seconds of audio

# Load the WAV file
folder = os.getcwd() + "/../rec/"
audio_files_list = os.listdir(folder)
print(len(audio_files_list))

audio_recs = np.zeros( (len(audio_files_list), sample_length), dtype="int16")
langs = []
print(audio_recs.shape)

for i, audio in enumerate(audio_files_list):
    sample_rate, audio_rec = wavfile.read(folder + audio)
    audio_recs[i] = audio_rec
    langs.append(audio[4:7])


# Create a dictionary with the structured data
data_dictionary = {
    'audio_raw_data': audio_recs.tolist(),
    'language': langs
}

'''
# Specify the data types for each column
data_types = {
    'audio_raw_data': 'object',
    'language': 'str'
}
'''
df = pd.DataFrame(data_dictionary)
# Save the DataFrame to a CSV file
df.to_csv('dataset0.csv', index=False)

print(df.head())


53
(53, 960000)
                                      audio_raw_data language
0  [14, -11, 4, -21, -16, -37, -22, 3, -43, -20, ...      ara
1  [8, 29, -4, -33, -44, -18, -51, -80, -7, -24, ...      ara
2  [26, 15, 10, 1, 11, 17, 17, 23, 26, 21, 14, 6,...      ara
3  [2324, 2316, 2304, 2312, 2300, 2329, 2356, 234...      ara
4  [671, 690, 711, 729, 726, 727, 754, 745, 764, ...      ara


In [72]:
window = 10
hop = 2
frequency = 16000

dataset = pd.read_csv("dataset0.csv")

audio_splits = np.empty((50 // hop * len(audio_files_list), frequency * 10), dtype="int16")
langs = []
split_index = 0

for index, sample in dataset.iterrows():
    #raw_data = np.array(literal_eval(sample["audio_raw_data"]), dtype="int16")
    raw_data = np.fromstring( (sample["audio_raw_data"].replace(' ', ''))[1:-1], dtype="int16", sep=',')
    print(index)
    print(len(raw_data))
    for time in range(0, 50 * frequency, hop * frequency):
        audio_splits[split_index] = raw_data[time : time+window*frequency]
        langs.append(sample["language"])
        split_index = split_index + 1
        

# Create a dictionary with the structured data
data_dictionary = {
    'audio_raw_data': audio_splits.tolist(),
    'language': langs
}

# Create a DataFrame from the dictionary
df = pd.DataFrame(data_dictionary)

# Save the DataFrame to a CSV file
df.to_csv('dataset0_splits.csv', index=False)

print(df.head())
print(df.shape)

0
960000
1
960000
2
960000
3
960000
4
960000
5
960000
6
960000
7
960000
8
960000
9
960000
10
960000
11
960000
12
960000
13
960000
14
960000
15
960000
16
960000
17
960000
18
960000
19
960000
20
960000
21
960000
22
960000
23
960000
24
960000
25
960000
26
960000
27
960000
28
960000
29
960000
30
960000
31
960000
32
960000
33
960000
34
960000
35
960000
36
960000
37
960000
38
960000
39
960000
40
960000
41
960000
42
960000
43
960000
44
960000
45
960000
46
960000
47
960000
48
960000
49
960000
50
960000
51
960000
52
960000
                                      audio_raw_data language
0  [14, -11, 4, -21, -16, -37, -22, 3, -43, -20, ...      ara
1  [273, 242, 279, 280, 282, 318, 315, 284, 287, ...      ara
2  [-227, -223, -225, -206, -202, -160, -170, -13...      ara
3  [764, 761, 760, 725, 669, 702, 652, 453, 293, ...      ara
4  [-120, -149, -171, -163, -172, -118, -131, -10...      ara
(1325, 2)


Pre-processing data 

In [51]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

print("TensorFlow version:", tf.__version__)

# Function to compute e^x with a parameter `a`
def func_e_x(x, a):
    return np.exp(a * x)

# training set size
n = 10

# Length of each vector
vector_length = 50

# Parameters for the function e^x
parameters = [1, 2, -1, 0.5, -0.5, -0.3, 0.1, 0.6, -0.4, -1.3]

vector = np.linspace(0, 1, vector_length)

# Generate n vectors of length 50 with e^x function using different parameters
vectors = np.array([func_e_x(vector, a) for a in parameters])

# Labels to compute the average of the input vector
labels = np.array([np.mean(vec) for vec in vectors])

print(vectors.shape)
print(labels.shape)


TensorFlow version: 2.12.0
(10, 50)
(10,)


In [52]:
rows = 10
cols = 5

# tutorial says: tf.Module
class CustomModel(tf.keras.Model):

	def __init__(self):
		super(CustomModel, self).__init__()

		self.model = tf.keras.Sequential([
			
				tf.keras.layers.Flatten(input_shape=(rows, cols), name='flatten'),
				tf.keras.layers.Dense(16, activation='relu', name='dense_1'),
				tf.keras.layers.Dense(1, activation='linear', name='dense_2')
		])

		# self.model.compile(optimizer='sgd', loss='mean_squared_error')
		self.model.summary()

	def call(self, inputs):
        # Define the forward pass of your model
		return self.model(inputs)

	@tf.function(input_signature=[
			tf.TensorSpec([None, rows, cols], dtype=tf.float32)
	])
	def infer(self, x):
		output = self.model(x)
		return {
			"output": output
		}

	@tf.function(input_signature=[
			tf.TensorSpec([None, vector_length], dtype=tf.float32)
	])
	def preprocess(self, x):
		cosines = tf.raw_ops.Cosh(x=x)
		matrix = tf.raw_ops.Reshape(tensor=cosines, shape=[-1, rows, cols])
		return { "flatten_input": matrix }


In [53]:
NUM_EPOCHS = 20
BATCH_SIZE = 2

# Convert NumPy arrays to TensorFlow Dataset
train_x = tf.convert_to_tensor(vectors, dtype=tf.float32)
train_y = tf.convert_to_tensor(labels, dtype=tf.float32)

model = CustomModel()
train_x = model.preprocess(train_x)
model.compile(optimizer="sgd", loss="mean_squared_error", metrics=['accuracy'])
model.fit(x = train_x, y = train_y, batch_size = BATCH_SIZE, epochs = NUM_EPOCHS)


Model: "sequential_25"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 50)                0         
                                                                 
 dense_1 (Dense)             (None, 16)                816       
                                                                 
 dense_2 (Dense)             (None, 1)                 17        
                                                                 
Total params: 833
Trainable params: 833
Non-trainable params: 0
_________________________________________________________________
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x23451a53d90>