In [13]:
import os
import numpy as np
import pandas as pd

import librosa
import soundfile as sf

from IPython.display import Audio, display

In [2]:
# Check Specie in csv and if its audio, then take the line and save the Path in a new csv: backgrounds.txt
ROOT_PATH = "../"
dataset_csv = ROOT_PATH + 'Data/Dataset/CSVs/dataset.csv'
backgrounds_txt = ROOT_PATH + 'Data/Dataset/TXTs/backgrounds.txt'
backgrounds = []

dataset_df = pd.read_csv(dataset_csv)
for index, row in dataset_df.iterrows():
    if row['specie'] == 'No audio':
        backgrounds.append(row['path'])

with open(backgrounds_txt, 'w') as f:
    for item in backgrounds:
        f.write("%s\n" % item)

In [3]:
def add_noise(data, mean=0, standard_deviation=0.005):
    noise = np.random.normal(mean, standard_deviation, len(data))
    data_noise = data + noise
    return data_noise

def change_intensity(data, intensity_factor=0.5):
    return data * intensity_factor

def pitch_shift(data, sr, n_steps=2):
    return librosa.effects.pitch_shift(data, sr, n_steps)

In [6]:
def augment(data):
    # Apply data augmentations
    data = add_noise(data)
    data = change_intensity(data, intensity_factor=1.25)
    # data = pitch_shift(data, sr)

    return data

def augment_and_play(audio_path):
    # Load the audio file
    data, sr = librosa.load(audio_path)

    # Play the original audio
    print("Original Audio:")
    display(Audio(data, rate=sr))

    data = augment(data)

    # Play the augmented audio
    print("Augmented Audio:")
    display(Audio(data, rate=sr))

    return data

# Choose a random audio file from the dataset
audio_path = np.random.choice(backgrounds)
audio_path = "../Data/Dataset/Audios/" + audio_path
print(audio_path)
augment_and_play(audio_path)

../Data/Dataset/Audios/AM2/2023_07_18/AM02_20230718_064000.WAV
Original Audio:


Augmented Audio:


array([ 0.00484298,  0.00488723, -0.00735518, ..., -0.00138978,
       -0.00050392, -0.00889453])

In [7]:
txt_with_paths_to_augment = backgrounds_txt

In [15]:
def augment_and_save(audio_path):
    # Load the audio file
    data, sr = librosa.load(audio_path)

    # Play the original audio
    # print("Original Audio:")
    # display(Audio(data, rate=sr))

    # Apply data augmentations
    data = augment(data)

    # Play the augmented audio
    # print("Augmented Audio:")
    # display(Audio(data, rate=sr))

    # Create the new path
    old_root = os.path.dirname(audio_path)
    new_root = old_root.replace('Audios', "Audios/Data Augmentation")
    relative_path = os.path.relpath(audio_path, old_root)
    new_path = os.path.join(new_root, relative_path)

    # Create the new directory if it doesn't exist
    new_dir = os.path.dirname(new_path)
    os.makedirs(new_dir, exist_ok=True)

    # Save the data as a wav file in new_path
    sf.write(new_path, data, sr)

# Read the txt file and apply data augmentations to each audio
with open(txt_with_paths_to_augment, 'r') as f:
    for line in f:
        audio_path = line.strip()
        audio_path = "../Data/Dataset/Audios/" + audio_path
        augment_and_save(audio_path)

In [22]:
# Edit paths of backgrounds.txt and add Data Augmentation to the paths and save it in backgrounds_augmented.txt
backgrounds_augmented_txt = ROOT_PATH + 'Data/Dataset/TXTs/backgrounds_augmented.txt'
backgrounds_augmented = []

with open(backgrounds_txt, 'r') as f:
    for line in f:
        audio_path = line.strip()
        audio_path = "Data Augmentation/" + audio_path
        backgrounds_augmented.append(audio_path)

# Generate empty background_augmented file
with open(backgrounds_augmented_txt, 'w') as f:
    for item in backgrounds_augmented:
        f.write("%s\n" % item)

## Generate Mel Spectogram

In [23]:
import os
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
from tqdm import tqdm
from io import BytesIO
from PIL import Image

In [27]:
ROOT_PATH = "../"

DATASET_FOLDER = ROOT_PATH + "Data/Dataset/Audios/Data Augmentation"

In [29]:
def create_spectrogram(audio_file):
    y, sr = librosa.load(audio_file, sr=None)
    
    # Create the output path for the image
    output_image_path = audio_file.replace('Audios', 'images').replace(".WAV", ".PNG")
    
    # Ensure the output folder exists
    os.makedirs(os.path.dirname(output_image_path), exist_ok=True)
    
    # Define the frequency range
    fmin = 1  # Minimum frequency (0 Hz)
    fmax = 16000  # Maximum frequency (32000 Hz)

    fig, ax = plt.subplots(figsize=(12, 6))  # Set the background color to black
    D = librosa.amplitude_to_db(librosa.stft(y), ref=np.max)
    librosa.display.specshow(D, sr=sr, x_axis="time", y_axis="log", fmin=fmin, fmax=fmax, ax=ax)  # Specify frequency range
    ax.axis('off')  # Remove axes

    # Save the figure using the output_image_path
    fig.savefig(output_image_path, bbox_inches='tight', pad_inches=0, transparent=True)
    
    # Close the figure to release memory resources
    plt.close(fig)

# Recursively iterate through all subdirectories and audio files
for root, _, files in os.walk(DATASET_FOLDER):
    for file in files:
        if file.endswith('.WAV'):
            audio_file = os.path.join(root, file)
            output_image_path = audio_file.replace('Audios', 'images').replace(".WAV", ".PNG")
            if not os.path.exists(output_image_path): # Skip if the image already exists
                create_spectrogram(audio_file)

  D = librosa.amplitude_to_db(librosa.stft(y), ref=np.max)


In [30]:
# Count number of .WAV files in Dataset Folder and Count number of .PNG files in Images Folder
audio_files = sum([len(files) for _, _, files in os.walk(DATASET_FOLDER)])
image_files = sum([len(files) for _, _, files in os.walk(DATASET_FOLDER.replace('Audios', 'images'))])

print(f"Number of audio files: {audio_files}")
print(f"Number of image files: {image_files}")

Number of audio files: 170
Number of image files: 170
