In [1]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import LearningRateScheduler
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import librosa
import librosa.display
import IPython.display as ipd
import os

In [2]:
def feature_extraction(audio_path):
    x, sample_rate = librosa.load(audio_path, res_type="kaiser_fast")
    mfcc = np.mean(librosa.feature.mfcc(y=x, sr=sample_rate, n_mfcc=100).T, axis=0)
    return mfcc

In [3]:
features = {}
i = 0
directory = "E:\\ML project\\codes\\data\\LJSpeech-1.1\\wavs\\"
for audio in os.listdir(directory):
    audio_path = directory + audio
    features[i] = feature_extraction(audio_path)
    if i % 100 == 0:
        print(i)
    i += 1

0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
3100
3200
3300
3400
3500
3600
3700
3800
3900
4000
4100
4200
4300
4400
4500
4600
4700
4800
4900
5000
5100
5200
5300
5400
5500
5600
5700
5800
5900
6000
6100
6200
6300
6400
6500
6600
6700
6800
6900
7000
7100
7200
7300
7400
7500
7600
7700
7800
7900
8000
8100
8200
8300
8400
8500
8600
8700
8800
8900
9000
9100
9200
9300
9400
9500
9600
9700
9800
9900
10000
10100
10200
10300
10400
10500
10600
10700
10800
10900
11000
11100
11200
11300
11400
11500
11600
11700
11800
11900
12000
12100
12200
12300
12400
12500
12600
12700
12800
12900
13000


In [4]:
# Function to create a 10x10 matrix from an array
def create_10x10_matrix(array):
    if len(array) >= 100:
        return array[:100].reshape(10, 10)
    else:
        return None  # Not enough elements to create a 10x10 matrix


# Create a list to store the matrices
matrix_list = []

# Iterate through the keys and create matrices
for key in features:
    matrix = create_10x10_matrix(features[key])
    if matrix is not None:
        matrix_list.append(matrix)

# Convert the list of matrices to a NumPy array
matrix_array = np.array(matrix_list)

# Example: Print the shape of the resulting array
print("Shape of the matrix array:", matrix_array.shape)
matrix_array[0]
np.save("mfcc2_data", matrix_array)

Shape of the matrix array: (13100, 10, 10)


In [5]:
# Assuming you have a 3D NumPy array 'matrix_array' containing your 10x10 matrices
# The shape of matrix_array should be (num_matrices, 10, 10)

# Reshape to add the channel dimension
matrix_array = matrix_array.reshape(matrix_array.shape[0], 10, 10, 1)
y = np.load("E:\\ML project\\codes\\nlp\\cnn_output.npy")  # normailized
y = y.reshape(-1, 30)

In [6]:
# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(
    matrix_array, y, test_size=0.2, random_state=42
)

# Reshape to add the channel dimension
X_train_reshaped = X_train.reshape(X_train.shape[0], 10, 10, 1)
X_val_reshaped = X_val.reshape(X_val.shape[0], 10, 10, 1)


# Define a learning rate scheduler function
def lr_scheduler(epoch, lr):
    return lr * 0.95  # Adjust the multiplier as needed


# Build a simple CNN model
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), activation=None, input_shape=(10, 10, 1)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(128, activation=None))
model.add(Dense(30, activation=None))

# Use the Adam optimizer with the learning rate scheduler
optimizer = Adam(learning_rate=0.02)  # Set the initial learning rate
model.compile(optimizer=optimizer, loss="mse", metrics=["mse"])

# Define the learning rate scheduler callback
lr_scheduler_callback = LearningRateScheduler(lr_scheduler, verbose=1)

In [7]:
# Train the model with the learning rate scheduler callback
history = model.fit(
    X_train_reshaped,
    y_train,
    epochs=100,
    batch_size=32,
    validation_data=(X_val_reshaped, y_val),
    callbacks=[lr_scheduler_callback],
)


Epoch 1: LearningRateScheduler setting learning rate to 0.018999999575316905.
Epoch 1/100

Epoch 2: LearningRateScheduler setting learning rate to 0.018049999419599772.
Epoch 2/100

Epoch 3: LearningRateScheduler setting learning rate to 0.017147500067949295.
Epoch 3/100

Epoch 4: LearningRateScheduler setting learning rate to 0.016290125064551828.
Epoch 4/100

Epoch 5: LearningRateScheduler setting learning rate to 0.015475618280470371.
Epoch 5/100

Epoch 6: LearningRateScheduler setting learning rate to 0.01470183772034943.
Epoch 6/100

Epoch 7: LearningRateScheduler setting learning rate to 0.013966745790094137.
Epoch 7/100

Epoch 8: LearningRateScheduler setting learning rate to 0.013268408412113785.
Epoch 8/100

Epoch 9: LearningRateScheduler setting learning rate to 0.012604987947270274.
Epoch 9/100

Epoch 10: LearningRateScheduler setting learning rate to 0.011974738771095872.
Epoch 10/100

Epoch 11: LearningRateScheduler setting learning rate to 0.011376001965254545.
Epoch 11/

In [8]:
# Make predictions
predictions = model.predict(X_val_reshaped)

# Calculate MSE on validation set
mse_val = mean_squared_error(y_val, predictions)
print("Mean Squared Error on Validation Set:", mse_val)

Mean Squared Error on Validation Set: 0.0376098087706962
