# Table of Contents

1. Denoising the Dataset
2. Refining the Dataset
   * Categorizing the dataset
3. Augmentation of Dataset

# Denoising the Dataset

*Performing noise reduction on the UrbanSound8K dataset and saves the denoised audio files*

In [None]:
import os
import librosa
import noisereduce as nr
import soundfile as sf
from tqdm import tqdm  


input_dir = "/kaggle/input/urbansound8k" 
output_dir = "/kaggle/working/denoised_urbansound8k"

# Creating output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Processing each folder in the dataset
for folder in tqdm(sorted(os.listdir(input_dir))):
    folder_path = os.path.join(input_dir, folder)
    
    if os.path.isdir(folder_path):  
        output_folder = os.path.join(output_dir, folder)
        os.makedirs(output_folder, exist_ok=True)  

        # Process each audio file in the folder
        for file in os.listdir(folder_path):
            if file.endswith(".wav"):
                file_path = os.path.join(folder_path, file)
                
                try:
                    # Load the audio file
                    y, sr = librosa.load(file_path, sr=None)

                    # Estimate noise profile using the first 0.5 seconds
                    noise_start = 0
                    noise_end = int(sr * 0.5)  # Assume first 0.5 seconds contain noise
                    noise_profile = y[noise_start:noise_end]

                    # Applying noise reduction
                    y_denoised = nr.reduce_noise(y=y, sr=sr, y_noise=noise_profile)

                    # Saving the denoised audio
                    output_file_path = os.path.join(output_folder, file)
                    sf.write(output_file_path, y_denoised, sr)
                except Exception as e:
                    print(f"Error processing {file_path}: {e}")

print(f"✅ Noise reduction complete! Denoised files are saved in: {output_dir}")

# Refining the Dataset

*Processing denoised UrbanSound8K audio files to generate Mel spectrograms and save them as **grayscale images (224x224) and masking them** for use in models like ResNet50.*

In [None]:
import os
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image

# Path to the denoised audio folder
print("Started")
input_dir = "/kaggle/working/denoised_urbansound8k"
output_dir = "/kaggle/working/spectrograms"

# Creating the output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Function to apply noise masking (cropping axes)
def apply_mask(spectrogram_db):
    threshold = np.percentile(spectrogram_db, 5)  # Remove lowest 5% intensity values
    spectrogram_db[spectrogram_db < threshold] = np.min(spectrogram_db)
    return spectrogram_db

# Target from fold1->fold10
for i in range(1,11):  
    folder = f'fold{i}'
    folder_path = os.path.join(input_dir, folder)

    if os.path.isdir(folder_path):  
        output_folder = os.path.join(output_dir, folder)
        os.makedirs(output_folder, exist_ok=True)  

        # Process each audio file in the folder
        for file in os.listdir(folder_path):
            if file.endswith(".wav"):
                file_path = os.path.join(folder_path, file)
                try:
                    # Loading the audio file
                    y, sr = librosa.load(file_path, sr=None)

                    # Creating a Mel spectrogram
                    spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=2048, hop_length=512)
                    spectrogram_db = librosa.power_to_db(spectrogram, ref=np.max)

                    # Applying noise masking
                    spectrogram_db = apply_mask(spectrogram_db)

                    # Ploting the spectrogram
                    fig, ax = plt.subplots(figsize=(10, 10))  # Square size for ResNet
                    ax.set_axis_off()  # Remove axes
                    librosa.display.specshow(spectrogram_db, sr=sr, x_axis=None, y_axis=None, cmap='gray_r', fmax=8000)
                    plt.savefig("temp_spectrogram.png", bbox_inches='tight', pad_inches=0, dpi=100)
                    plt.close(fig)

                    # Resizing for ResNet50 (224x224)
                    img = Image.open("temp_spectrogram.png").convert("L")  # Convert to greyscale
                    img = img.resize((224, 224), Image.Resampling.LANCZOS)  # Use LANCZOS for resampling
                    output_path = os.path.join(output_folder, f"{os.path.splitext(file)[0]}_spectrogram.png")
                    img.save(output_path)

                    print(f"Spectrogram saved: {output_path}")
                except Exception as e:
                    print(f"Error processing {file_path}: {e}")

print("✅ All spectrograms in fold have been processed and saved.")

# Categorizing the Dataset

In [None]:
import shutil
import pandas as pd

# Loading the DataFrame with class information
csv_file = "/kaggle/input/urbansound8k/UrbanSound8K.csv" 
df = pd.read_csv(csv_file)


df['slice_prefix'] = df['slice_file_name'].str.replace('.wav', '', regex=False)

# targeting folders (1to 10 inclusive)
for i in range(1, 11):
    # Define the folder path for the current fold
    image_folder = f"/kaggle/working/spectrograms/fold{i}"  
    
    # Checking if the image folder exists
    if not os.path.exists(image_folder):
        print(f"Image folder not found: {image_folder}")
        continue

    # Output folder for sorted images
    output_folder = "/kaggle/working/sorted_images/folder1"
    os.makedirs(output_folder, exist_ok=True)

    # Loop through each row in the DataFrame
    for _, row in df.iterrows():
        base_name = row['slice_prefix'] 
        class_label = row['class']       

        # Finding the corresponding image file in the folder
        for file in os.listdir(image_folder):
            if file.startswith(base_name):
                image_path = os.path.join(image_folder, file)

             
                class_folder = os.path.join(output_folder, class_label)
                os.makedirs(class_folder, exist_ok=True)

                # Copying the image to the corresponding class folder
                shutil.copy(image_path, os.path.join(class_folder, file))
                print(f"Copied {file} to {class_folder}")
                break
        else:
            print(f"No matching image found for {base_name}")

print("Relabeling and sorting complete!")

# Augmentation of Dataset

*For increasing the number of data items, we are augmenting our dataset.*

In [None]:
import os
import numpy as np
from PIL import Image
import random
import matplotlib.pyplot as plt
import tensorflow as tf


print("started")
input_dir = "/kaggle/working/sorted_images/folder1"
augmented_dir = "/kaggle/working/augmented_spectrograms"

# Creating the augmented directory if it doesn't exist
os.makedirs(augmented_dir, exist_ok=True)

# Function to apply time shifting
def time_shift(img, max_shift=5):
    shift = random.randint(-max_shift, max_shift)
    return np.roll(img, shift, axis=1)  # Shift along the horizontal axis (time)

# Function to apply frequency shifting (along vertical axis)
def frequency_shift(img, max_shift=5):
    shift = random.randint(-max_shift, max_shift)
    return np.roll(img, shift, axis=0)  # Shift along the vertical axis (frequency)

# Function to apply random cropping
def random_crop(img, crop_size=(180, 180)):
    height, width = img.shape
    crop_height, crop_width = crop_size
    top = random.randint(0, height - crop_height)
    left = random.randint(0, width - crop_width)
    return img[top:top+crop_height, left:left+crop_width]

# Function to add random noise
def add_noise(img, noise_factor=0.1):
    noise = np.random.randn(*img.shape) * noise_factor
    img = img + noise
    img = np.clip(img, 0., 1.)  # Clip to [0, 1] to avoid overflow
    return img

# Function to augment and save the images
def augment_images():
    for class_name in os.listdir(input_dir):
        class_path = os.path.join(input_dir, class_name)
        
        if os.path.isdir(class_path):
            # Create the corresponding folder in the augmented directory
            augmented_class_path = os.path.join(augmented_dir, class_name)
            os.makedirs(augmented_class_path, exist_ok=True)

            # Process each image in the class folder
            for image_name in os.listdir(class_path):
                if image_name.endswith('.png'):
                    image_path = os.path.join(class_path, image_name)
                    img = Image.open(image_path).convert("L")  # Open as grayscale
                    img = np.array(img) / 255.0  # Normalize to [0, 1]

                    # Apply augmentations
                    augmented_images = []
                    augmented_images.append(img)  # Keep original image

                    # Time shift
                    augmented_images.append(time_shift(img))
                    # Frequency shift
                    augmented_images.append(frequency_shift(img))
                    # Random crop
                    augmented_images.append(random_crop(img))
                    # Add noise
                    augmented_images.append(add_noise(img))

                    # Save augmented images
                    for idx, augmented_img in enumerate(augmented_images):
                        augmented_img = (augmented_img * 255).astype(np.uint8)  # Denormalize to [0, 255]
                        augmented_img_pil = Image.fromarray(augmented_img)
                        augmented_img_pil.save(os.path.join(augmented_class_path, f"{os.path.splitext(image_name)[0]}_aug{idx}.png"))

                    print(f"Augmented images for {image_name} saved in {augmented_class_path}")

    print("✅ Augmentation completed.")

augment_images()