In [27]:
import os
import re
import numpy as np
import pandas as pd
import librosa
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.layers import Conv1D, MaxPooling1D, BatchNormalization, LSTM, Dense, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

# Define the model
model = Sequential()
model.add(Conv1D(2048, kernel_size=5, strides=1, padding='same', activation='relu', input_shape=(50, 1)))
model.add(MaxPooling1D(pool_size=2, strides=2, padding='same'))
model.add(BatchNormalization())

model.add(Conv1D(1024, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=2, strides=2, padding='same'))
model.add(BatchNormalization())

model.add(Conv1D(512, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=2, strides=2, padding='same'))
model.add(BatchNormalization())

model.add(LSTM(256, return_sequences=True))
model.add(LSTM(128))

model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='linear'))

# Load and process data
root = '../own_data_train'
file_list = os.listdir(root)  # List all files in the directory

# Initialize lists to store data
data = []

# Extract integers from filenames
for filename in file_list:
    match = re.search(r'\d+', filename)
    if match:
        integer = int(match.group())
        data.append({'cycles': integer, 'filename': filename[:-4]})

# Convert list to DataFrame
df = pd.DataFrame(data, columns=['cycles', 'filename'])

# Parameters
duplication_factor = 1  # Amount of data duplication
max_length = 50  # Maximum length for MFCC features

# Define data augmentation functions
def add_noise(data, x):
    noise = np.random.randn(len(data))
    return data + x * noise

def shift(data, x):
    return np.roll(data, x)

def stretch(data, rate):
    return librosa.effects.time_stretch(data, rate)

def pitch_shift(data, rate):
    return librosa.effects.pitch_shift(data, sr=22050, n_steps=rate)

def pad_mfcc(mfcc, max_length):
    """Pad or truncate MFCC to ensure it has a consistent length."""
    if mfcc.shape[1] < max_length:
        # Pad with zeros
        pad_width = max_length - mfcc.shape[1]
        mfcc = np.pad(mfcc, ((0, 0), (0, pad_width)), mode='constant')
    elif mfcc.shape[1] > max_length:
        # Truncate
        mfcc = mfcc[:, :max_length]
    return mfcc

def mfcc_feature_extraction_rr(dir_):
    X_ = []
    y_ = []
    features = 50
    for sound_file in os.listdir(dir_):
        if sound_file.endswith('.wav'):
            label = list(df[df['filename'] == sound_file[:-4]]['cycles'])[0]
            data_x, sampling_rate = librosa.load(os.path.join(dir_, sound_file), sr=22050, res_type='kaiser_fast')
            mfccs = librosa.feature.mfcc(y=data_x, sr=sampling_rate, n_mfcc=features, n_fft=1024, hop_length=400)
            mfccs = pad_mfcc(mfccs, max_length=mfccs.shape[1])
            X_.append(mfccs)
            y_.append(label)
            
            data_shift = shift(data_x, 1600)
            mfccs_shift = librosa.feature.mfcc(y=data_shift, sr=sampling_rate, n_mfcc=features, n_fft=1024, hop_length=400)
            mfccs_shift = pad_mfcc(mfccs_shift, max_length=mfccs.shape[1])
            X_.append(mfccs_shift)
            y_.append(label)
            
            data_noise = add_noise(data_x, 0.005)
            mfccs_noise = librosa.feature.mfcc(y=data_noise, sr=sampling_rate, n_mfcc=features, n_fft=1024, hop_length=400)
            mfccs_noise = pad_mfcc(mfccs_noise, max_length=mfccs.shape[1])
            X_.append(mfccs_noise)
            y_.append(label)
    
    # Duplicate data
    X_data = np.tile(np.array(X_), (duplication_factor, 1, 1))
    y_data = np.tile(np.array(y_), duplication_factor)
    
    return X_data, y_data

# Extract features
audio_data = root + "/"
res_data, res_y = mfcc_feature_extraction_rr(audio_data)

# Split data into training and validation sets
x_train, x_val, y_train, y_val = train_test_split(res_data, res_y, test_size=0.2, random_state=10)

# Prepare data for LSTM
x_train_lstm = np.expand_dims(x_train, axis=2)
x_val_lstm = np.expand_dims(x_val, axis=2)

# Compile and train the model
optimiser = tf.keras.optimizers.Adam(learning_rate=0.00001)
model.compile(optimizer=optimiser, loss='mean_squared_error', metrics=['mae'])

history = model.fit(
    x_train_lstm, y_train, 
    batch_size=8, 
    epochs=10, 
    validation_data=(x_val_lstm, y_val),
)

# Save the trained model and history
model.save("windowed.h5")
model.save_weights('windowed.weights.h5')

import pickle
with open('windowed_history', 'wb') as file_pi:
    pickle.dump(history.history, file_pi)

# Plot training history
plt.plot(history.history['loss'], label='loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()


ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 2 dimensions. The detected shape was (114, 50) + inhomogeneous part.