<a href="https://colab.research.google.com/github/Bitang-Melyen-Tanulok/Csip_Csip/blob/main/Audio_augmentation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import pandas as pd
from google.colab import drive
drive.mount('/content/drive')

path = '/content/drive/MyDrive/DeepLearning/train_audio'

data = []

for i, folder in enumerate(os.listdir(path)):
  data.append([i, folder, len(os.listdir(os.path.join(path, folder))), sum([os.path.getsize(os.path.join(path, folder, file)) for file in os.listdir(os.path.join(path, folder))])/1024])

# sort data into ascending order by the 3rd parameter
data.sort(key=lambda x: x[2])

# create pandas DataFrame for better visualizaton
df = pd.DataFrame(data, columns=["ID", "Folder", "File count", "File size(KB)"])

# display DataFrame
display(df)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Unnamed: 0,ID,Folder,File count,File size(KB)
0,5,asiope1,5,521.342773
1,92,integr,5,579.404297
2,119,niwpig1,5,389.730469
3,16,blaeag1,6,944.285156
4,178,wynlau1,6,2686.714844
...,...,...,...,...
177,82,houspa,500,319008.625000
178,105,lirplo,500,127083.743164
179,107,litgre1,500,94257.537109
180,177,woosan,500,108092.727539


In [2]:
import numpy as np
# Calculate and print median file count
median_file_count = np.median(df["File count"])
print(f"Median audio count: {median_file_count}")

Median audio count: 60.5


The median number of audio files per folder is 60.5. Folders containing fewer audio files will be augmented using data augmentation techniques to increase the number of audio files and balance the dataset.


In [3]:
!pip install torch
!pip install librosa
!pip install albumentations
!pip install --upgrade albumentations
!pip install soundfile



In [4]:
import librosa
import librosa.display
import torch
import torch.nn as nn
import albumentations as A
import random
import soundfile as sf

# Load audio file and generate spectrogram
def load_audio_and_spectrogram(file_path, sr=16000):
    # Load the audio file
    audio, sr = librosa.load(file_path, sr=sr)

    # Generate Mel spectrogram
    spectrogram = librosa.feature.melspectrogram(y=audio, sr=sr)

    # Convert to dB scale if needed for visualization
    spectrogram_db = librosa.power_to_db(spectrogram, ref=np.max)

    return audio, spectrogram_db


# Augmentation pipeline
def augment_spectrogram(spectrogram):
    # Create an augmentation composition using the 'albumentations' library
    augmentations = A.Compose([
        # HorizontalFlip flips the spectrogram horizontally with a 50% probability.
        A.HorizontalFlip(p=0.5),

        # Choose one augmentation option
        A.OneOf([
            # This version doesn't have cutout in it
            # A.Cutout(max_h_size=5, max_w_size=16),
            # CoarseDropout randomly removes blocks from the spectrogram
            A.CoarseDropout(max_holes=8, max_height=5, max_width=16, p=1.0),
        ], p=0.5)  # Choose from augmentation options with a 50% probability
    ])

    # Apply augmentation to the spectrogram
    augmented = augmentations(image=spectrogram)

    # Return the augmented (modified) spectrogram
    return augmented["image"]


# Apply Mixup
class MixupV2(nn.Module):
    def __init__(self, mix_range=(0.3, 0.7), add_label=True):
        # Set the range for the Mixup technique (mixes data between 0.3 and 0.7)
        super(MixupV2, self).__init__()
        # Uniform distribution for determining the Mixup weight
        self.distribution = torch.distributions.Uniform(low=mix_range[0], high=mix_range[1])
        # Decision to add the label (label mixing)
        self.add_label = add_label

    def forward(self, X, Y):
        # X contains the input data, and Y contains the labels

        # Determine the size of X (batch)
        bs = X.shape[0]

        # Determine the dimensions of X
        n_dims = len(X.shape)

        # Create a random permutation for the batch indices
        perm = torch.randperm(bs)

        # Randomly generate Mixup weights within the specified range
        coeffs = self.distribution.rsample(torch.Size((bs,))).to(X.device)

        # Mix the input data using the Mixup technique
        if n_dims == 2:
            X = coeffs.view(-1, 1) * X + (1 - coeffs.view(-1, 1)) * X[perm]  # If 2D data (e.g., spectrogram)
        elif n_dims == 3:
            X = coeffs.view(-1, 1, 1) * X + (1 - coeffs.view(-1, 1, 1)) * X[perm]  # For 3D data
        else:
            X = coeffs.view(-1, 1, 1, 1) * X + (1 - coeffs.view(-1, 1, 1, 1)) * X[perm]  # For other dimensions

        # If we are also mixing labels
        if self.add_label:
            Y = Y + Y[perm]  # The labels are also mixed
            Y = torch.clamp(Y, 0, 1.0)  # Normalize the label values between 0 and 1
        else:
            Y = coeffs.view(-1, 1) * Y + (1 - coeffs.view(-1, 1)) * Y[perm]  # Mix the labels using Mixup

        # Return the new input data and labels if weights are not needed
        return X, Y


In [None]:
# Augment and save files
def augment_and_save_files(folder_path):
    for root, _, files in os.walk(folder_path):
        ogg_files = [f for f in files if f.endswith(".ogg")]

        folder_name = os.path.basename(root)
        # Only process if there are less than 61 files
        if len(ogg_files) < 61:
            counter = 1  # Numbering for new files
            for file_name in ogg_files:
                file_path = os.path.join(root, file_name)
                audio, spectrogram = load_audio_and_spectrogram(file_path)

                # Spectrogram augmentations
                augmented_spectrogram = augment_spectrogram(spectrogram)

                # Apply Mixup
                mixup = MixupV2()
                augmented_spectrogram_tensor = torch.tensor(augmented_spectrogram).unsqueeze(0)  # 1D tensor
                labels = torch.tensor([1.0])  # Example label
                mixed_spectrogram, mixed_labels = mixup(augmented_spectrogram_tensor, labels)

                # Save augmented files
                new_file_name = f"{folder_name}_{len(ogg_files)}_{file_name}"
                new_file_path = os.path.join(root, new_file_name)
                sf.write(new_file_path, audio, int(16000))  # Save the audio
                counter += 1
                print(f'New file has been created with name: {new_file_name}')

for folder in os.listdir(path):
    folder_path = os.path.join(path, folder)
    if os.path.isdir(folder_path):
        augment_and_save_files(folder_path)

print("Augmentation completed.")

New file has been created with name: ashwoo2_45_XC169810.ogg
New file has been created with name: ashwoo2_45_XC125152.ogg
New file has been created with name: ashwoo2_45_XC178326.ogg
New file has been created with name: ashwoo2_45_XC186765.ogg
New file has been created with name: ashwoo2_45_XC341867.ogg
New file has been created with name: ashwoo2_45_XC319072.ogg
New file has been created with name: ashwoo2_45_XC21842.ogg
New file has been created with name: ashwoo2_45_XC318460.ogg
New file has been created with name: ashwoo2_45_XC396949.ogg
New file has been created with name: ashwoo2_45_XC399855.ogg
New file has been created with name: ashwoo2_45_XC381367.ogg
New file has been created with name: ashwoo2_45_XC402514.ogg
New file has been created with name: ashwoo2_45_XC424487.ogg
New file has been created with name: ashwoo2_45_XC402826.ogg
New file has been created with name: ashwoo2_45_XC447333.ogg
New file has been created with name: ashwoo2_45_XC442123.ogg
New file has been created