In [2]:
import os
import librosa
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras import layers


In [3]:
from zipfile import ZipFile

file_name = 'ESC50.zip'

# Check if the file is a valid ZIP file
try:
    with ZipFile(file_name, 'r') as zip:
        print(zip.namelist())
except Exception as e:
    print(e)


File is not a zip file


In [5]:

# Function to load an audio file and extract features
def load_audio_file(file_path):
    signal, sr = librosa.load(file_path, sr=None)
    mfcc = librosa.feature.mfcc(signal, sr=sr)
    return mfcc

# Function to load a dataset
def load_dataset(path):
    files = []
    labels = []

    # Iterate over all files in the path
    for filename in os.listdir(path):
        if filename.endswith(".wav"):
            # Extract the class label from the filename
            class_label = filename.split("-")[-1].replace(".wav", "")

            # Load the audio file and extract features
            mfcc = load_audio_file(os.path.join(path, filename))

            files.append(mfcc)
            labels.append(class_label)

    return np.array(files), np.array(labels)

# Load the ESC-50 dataset
#ESC50_PATH = "/path/to/ESC-50"
#X_noise, y_noise = load_dataset(ESC50_PATH)

# Convert the ESC-50 dataset into a TensorFlow Dataset
#noise_dataset = tf.data.Dataset.from_tensor_slices((X_noise, y_noise))

# Load the LibriSpeech dataset
voice_dataset = tfds.load('librispeech', split='train')

# Preprocess the LibriSpeech dataset
voice_dataset = voice_dataset.map(lambda x: load_audio_file(x['speech']))


OSError: ignored

In [None]:

# Function to create a model
def create_model():
    model = tf.keras.Sequential()

    # Add convolutional layers
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(None, None, 1)))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))

    # Add dense layers
    model.add(layers.Flatten())
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(1))

    return model

# Create the models
#noise_model = create_model()
voice_model = create_model()


In [None]:

# Compile the models
noise_model.compile(optimizer='adam', loss='mse')
voice_model.compile(optimizer='adam', loss='mse')


In [None]:

# Train the models
noise_model.fit(noise_dataset.batch(32), epochs=10)
voice_model.fit(voice_dataset.batch(32), epochs=10)


In [None]:

# Save the models
noise_model.save('noise_model.h5')
voice_model.save('voice_model.h5')


In [None]:

# Evaluate the models
noise_evaluation = noise_model.evaluate(noise_dataset.batch(32))
voice_evaluation = voice_model.evaluate(voice_dataset.batch(32))

print('Noise model evaluation:', noise_evaluation)
print('Voice model evaluation:', voice_evaluation)


In [None]:

# Load a new audio file for prediction
new_audio_file = "/path/to/new/audio/file.wav"
new_audio_mfcc = load_audio_file(new_audio_file)

# Reshape the MFCCs to match the input shape of our model
new_audio_mfcc = new_audio_mfcc.reshape(1, *new_audio_mfcc.shape, 1)

# Make a prediction
prediction = voice_model.predict(new_audio_mfcc)

print('Prediction:', prediction)
