In [7]:
import os
import numpy as np
import tensorflow as tf
import librosa
import soundfile as sf
from sklearn.model_selection import train_test_split


In [8]:
# Paths
base_path = os.path.abspath("..")
main_project_path = os.path.join(base_path, 'Main_Project')
raw_sick_cow_path = os.path.join(main_project_path, 'RAW_Sick_Cow')
noise_path = os.path.join(main_project_path, 'Noise')
output_path = os.path.join(main_project_path, 'Autoencoders_Denoised_Sick_Cow')

# Create output directory if it doesn't exist
os.makedirs(output_path, exist_ok=True)


In [9]:
def load_audio_files(directory, sample_rate=16000):
    audio_files = sorted([f for f in os.listdir(directory) if f.endswith('.wav')])
    audio_data = []

    max_length = 0  # Track the maximum length of the mel-spectrograms

    for file in audio_files:
        path = os.path.join(directory, file)
        audio, _ = librosa.load(path, sr=sample_rate)
        mel_spectrogram = librosa.feature.melspectrogram(y=audio, sr=sample_rate, n_mels=128)
        
        max_length = max(max_length, mel_spectrogram.shape[1])  # Update the max length
        audio_data.append(mel_spectrogram)

    # Now pad or truncate the spectrograms to the same length
    padded_audio_data = []
    for mel_spectrogram in audio_data:
        if mel_spectrogram.shape[1] < max_length:
            # Pad with zeros
            padded_mel = np.pad(mel_spectrogram, ((0, 0), (0, max_length - mel_spectrogram.shape[1])), mode='constant')
        else:
            # Truncate to the max length
            padded_mel = mel_spectrogram[:, :max_length]
        padded_audio_data.append(padded_mel)

    return np.array(padded_audio_data), audio_files

# Load raw sick cow audio data
raw_audio_data, raw_audio_files = load_audio_files(raw_sick_cow_path)
# Normalize data
raw_audio_data = raw_audio_data / np.max(raw_audio_data)
# Add a channel dimension
raw_audio_data = raw_audio_data[..., np.newaxis]


Fixing audio padding errors:

In [21]:
#Preprocessing the data once again
# Define a function to pad the spectrograms to a target shape
def pad_spectrograms(spectrograms, target_shape):
    padded_spectrograms = []
    for spectrogram in spectrograms:
        if spectrogram.shape[1] < target_shape[1]:
            # Pad the time dimension to match the target shape
            pad_width = target_shape[1] - spectrogram.shape[1]
            padded_spectrogram = np.pad(spectrogram, ((0, 0), (0, pad_width)), 'constant')
        else:
            # Crop if larger
            padded_spectrogram = spectrogram[:, :target_shape[1]]
        padded_spectrograms.append(padded_spectrogram)
    return np.array(padded_spectrograms)

# Determine the target shape
max_length = max([spectrogram.shape[1] for spectrogram in raw_audio_data])
target_shape = (128, max_length)

# Pad all spectrograms to have the same shape
raw_audio_data_padded = pad_spectrograms(raw_audio_data, target_shape)
raw_audio_data_padded = raw_audio_data_padded[..., np.newaxis]  # Add the channel dimension



In [22]:
#Adjusting autoencoder model

def autoencoder_model(input_shape):
    inputs = tf.keras.Input(shape=input_shape)
    
    # Encoder
    x = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    x = tf.keras.layers.MaxPooling2D((2, 2), padding='same')(x)
    x = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = tf.keras.layers.MaxPooling2D((2, 2), padding='same')(x)
    
    # Bottleneck
    x = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    
    # Decoder
    x = tf.keras.layers.UpSampling2D((2, 2))(x)
    x = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = tf.keras.layers.UpSampling2D((2, 2))(x)
    x = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', padding='same')(x)
    
    outputs = tf.keras.layers.Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)
    
    model = tf.keras.Model(inputs, outputs)
    return model

input_shape = raw_audio_data_padded.shape[1:]  # This should now be (128, max_length, 1)
autoencoder = autoencoder_model(input_shape)
autoencoder.compile(optimizer='adam', loss='mean_squared_error')


ValueError: Kernel shape must have the same length as input, but received kernel of shape (3, 3, 1, 32) and input of shape (None, 128, 313, 1, 1).

In [20]:
# # Define the Autoencoder Model
# def autoencoder_model(input_shape):
#     inputs = tf.keras.Input(shape=input_shape)
    
#     # Encoder
#     x = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
#     x = tf.keras.layers.MaxPooling2D((2, 2), padding='same')(x)
#     x = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same')(x)
#     x = tf.keras.layers.MaxPooling2D((2, 2), padding='same')(x)
    
#     # Bottleneck
#     x = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    
#     # Decoder
#     x = tf.keras.layers.Conv2DTranspose(64, (3, 3), strides=(1, 1), activation='relu', padding='same')(x)
#     x = tf.keras.layers.Conv2DTranspose(32, (3, 3), strides=(1, 1), activation='relu', padding='same')(x)
    
#     outputs = tf.keras.layers.Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)
    
#     model = tf.keras.Model(inputs, outputs)
#     return model


# input_shape = raw_audio_data.shape[1:]  # This should be (128, max_length, 1)
# autoencoder = autoencoder_model(input_shape)
# autoencoder.compile(optimizer='adam', loss='mean_squared_error')

# # Split the data
# from sklearn.model_selection import train_test_split

# X_train, X_val = train_test_split(raw_audio_data, test_size=0.1, random_state=42)

# # Train the model
# autoencoder.fit(X_train, X_train, 
#                 epochs=50, 
#                 batch_size=16, 
#                 validation_data=(X_val, X_val))


Epoch 1/50


ValueError: Dimensions must be equal, but are 128 and 32 for '{{node compile_loss/mean_squared_error/sub}} = Sub[T=DT_FLOAT](data_1, functional_11_1/conv2d_47_1/Sigmoid)' with input shapes: [?,128,313,1], [?,32,79,1].

In [None]:
autoencoder.summary()

In [None]:
## Loading and Preprocessing Data;

# def load_audio_files(directory, sample_rate=16000):
#     audio_files = sorted([f for f in os.listdir(directory) if f.endswith('.wav')])
#     audio_data = []

#     for file in audio_files:
#         path = os.path.join(directory, file)
#         audio, _ = librosa.load(path, sr=sample_rate)
#         mel_spectrogram = librosa.feature.melspectrogram(y=audio, sr=sample_rate, n_mels=128)
#         audio_data.append(mel_spectrogram)

#     return np.array(audio_data), audio_files

# # Load raw sick cow audio data
# raw_audio_data, raw_audio_files = load_audio_files(raw_sick_cow_path)
# # Normalize data
# raw_audio_data = raw_audio_data / np.max(raw_audio_data)
# # Add a channel dimension
# raw_audio_data = raw_audio_data[..., np.newaxis]


In [None]:
# def build_autoencoder(input_shape):
#     inputs = tf.keras.Input(shape=input_shape)

#     # Encoder
#     x = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
#     x = tf.keras.layers.MaxPooling2D((2, 2), padding='same')(x)
#     x = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same')(x)
#     encoded = tf.keras.layers.MaxPooling2D((2, 2), padding='same')(x)

#     # Decoder
#     x = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same')(encoded)
#     x = tf.keras.layers.UpSampling2D((2, 2))(x)
#     x = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', padding='same')(x)
#     x = tf.keras.layers.UpSampling2D((2, 2))(x)
#     decoded = tf.keras.layers.Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)

#     autoencoder = tf.keras.Model(inputs, decoded)
#     return autoencoder

# # Define model input shape
# input_shape = (raw_audio_data.shape[1], raw_audio_data.shape[2], 1)
# autoencoder = build_autoencoder(input_shape)
# autoencoder.compile(optimizer='adam', loss='mean_squared_error')
# autoencoder.summary()

# # Split data for training and validation
# X_train, X_val = train_test_split(raw_audio_data, test_size=0.1, random_state=42)

# # Train the model
# autoencoder.fit(X_train, X_train, 
#                 epochs=50, 
#                 batch_size=16, 
#                 validation_data=(X_val, X_val))
