In [None]:
import os
import librosa
import json
import math
import numpy as np

print("Step 1 completed: Necessary libraries imported.")

In [None]:
# Parameters for audio preprocessing
SAMPLE_RATE = 22050
DURATION = 30
SAMPLES_PER_TRACK = SAMPLE_RATE * DURATION
N_MFCC = 13
N_FFT = 2048
HOP_LENGTH = 512
NUM_SEGMENTS = 5

print("Step 2 completed: Preprocessing parameters defined.")

In [None]:
# Function to initialize the data structure
def initialize_data_structure():
    """
    Initialize the structure to store MFCCs and labels.
    :return: A dictionary to store mapping, MFCC features, and labels.
    """
    return {
        "mapping": [],  # List of genres
        "mfcc": [],  # List of MFCC feature matrices
        "labels": []  # List of corresponding labels
    }

# Example usage
data = initialize_data_structure()
print("Step 3 completed: Data structure initialized.")
print(data)  # Print the initialized structure for verification

In [None]:
import os
import librosa
import numpy as np
import math

def save_mfcc_to_npy(dataset_path, output_dir, num_segments=5):
    """
    Extracts MFCCs from audio dataset and saves them into .npy files.

    :param dataset_path: Path to the dataset containing subfolders for each genre.
    :param output_dir: Directory to save the extracted MFCCs and labels as .npy files.
    :param num_segments: Number of segments to divide each track into.
    """
    # Initialize lists to store features and labels
    all_mfccs = []
    all_labels = []

    # Calculate the number of samples per segment
    samples_per_segment = int(SAMPLES_PER_TRACK / num_segments)
    num_mfcc_vectors_per_segment = math.ceil(samples_per_segment / HOP_LENGTH)

    # List to keep track of failed files
    failed_files = []

    # Walk through all subdirectories and files in the dataset
    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):
        # Ensure we are not at the root directory
        if dirpath != dataset_path:
            # Extract the genre label (subfolder name)
            genre_label = os.path.basename(dirpath)
            print(f"Processing genre: {genre_label}")

            # Process each file in the genre subdirectory
            for file_name in filenames:
                file_path = os.path.join(dirpath, file_name)

                try:
                    # Load the audio file
                    signal, sr = librosa.load(file_path, sr=SAMPLE_RATE)

                    # Ensure the audio file is long enough
                    if len(signal) >= SAMPLES_PER_TRACK:
                        # Process each segment
                        for s in range(num_segments):
                            start_sample = samples_per_segment * s
                            end_sample = start_sample + samples_per_segment

                            try:
                                # Extract MFCC for the segment
                                mfcc = librosa.feature.mfcc(
                                    y=signal[start_sample:end_sample],
                                    sr=sr,
                                    n_mfcc=N_MFCC,
                                    n_fft=N_FFT,
                                    hop_length=HOP_LENGTH
                                )
                                mfcc = mfcc.T  # Transpose to have time steps as rows

                                # Ensure MFCC matrix has the expected size
                                if len(mfcc) == num_mfcc_vectors_per_segment:
                                    all_mfccs.append(mfcc)
                                    all_labels.append(i - 1)  # Subtract 1 for zero-based indexing
                                    print(f"Processed file: {file_name}, segment: {s+1}")
                                else:
                                    print(f"Skipped segment {s+1} of file {file_name}: Unexpected MFCC shape.")
                            except Exception as e:
                                print(f"Error processing segment {s+1} of file {file_name}: {e}")
                    else:
                        print(f"Skipped {file_name}: Audio too short.")
                except Exception as e:
                    print(f"Error processing {file_path}: {e}")
                    failed_files.append(file_path)
                    continue

    # Convert lists to NumPy arrays
    all_mfccs = np.array(all_mfccs)
    all_labels = np.array(all_labels)

    # Ensure the output directory exists
    os.makedirs(output_dir, exist_ok=True)

    # Save as .npy files
    np.save(os.path.join(output_dir, "mfccs.npy"), all_mfccs)
    np.save(os.path.join(output_dir, "labels.npy"), all_labels)

    print(f"MFCCs and labels saved to {output_dir}")

    # Log failed files
    if failed_files:
        failed_log_path = os.path.join(output_dir, "failed_files.txt")
        with open(failed_log_path, "w") as log_file:
            for failed_file in failed_files:
                log_file.write(f"{failed_file}\n")
        print(f"Failed files log saved to {failed_log_path}")

In [None]:
# Define paths
dataset_path = r"C:\Users\schnuller\Desktop\ECE1513H\Project\Data\genres_original"  
output_dir = r"C:\Users\schnuller\Desktop\ECE1513H\Project\Saved NPY Files"  

# Extract and save MFCCs to .npy files
save_mfcc_to_npy(dataset_path, output_dir, num_segments=5)

In [None]:
import numpy as np  # Ensure the NumPy module is correctly imported
import os  # Used for file and directory operations

# Load preprocessed data
X = np.load(r"C:\Users\schnuller\Desktop\ECE1513H\Project\Saved NPY Files\mfccs.npy")  # Original MFCC features
y = np.load(r"C:\Users\schnuller\Desktop\ECE1513H\Project\Saved NPY Files\labels.npy")  # Original labels

print(f"Loaded dataset: X shape = {X.shape}, y shape = {y.shape}")

# Save the unaugmented original data
original_output_dir = r"C:\Users\schnuller\Desktop\ECE1513H\Project\Saved NPY Files"
os.makedirs(original_output_dir, exist_ok=True)

np.save(os.path.join(original_output_dir, "X_original.npy"), X)
np.save(os.path.join(original_output_dir, "y_original.npy"), y)

print(f"Original dataset saved to: {original_output_dir}")

In [None]:
# Import required libraries
import os
import librosa
import json
import math
import numpy as np

print("Step 1 completed: Necessary libraries imported.")

In [None]:
# Parameters for audio preprocessing
SAMPLE_RATE = 22050
DURATION = 30
SAMPLES_PER_TRACK = SAMPLE_RATE * DURATION
N_MFCC = 13
N_FFT = 2048
HOP_LENGTH = 512
NUM_SEGMENTS = 5

print("Step 2 completed: Preprocessing parameters defined.")

In [None]:
# Function to initialize the data structure
def initialize_data_structure():
    """
    Initialize the structure to store MFCCs and labels.
    :return: A dictionary to store mapping, MFCC features, and labels.
    """
    return {
        "mapping": [],  # List of genres
        "mfcc": [],  # List of MFCC feature matrices
        "labels": []  # List of corresponding labels
    }

# Example usage
data = initialize_data_structure()
print("Step 3 completed: Data structure initialized.")
print(data)  # Print the initialized structure for verification

In [None]:
import os
import librosa
import numpy as np
import math

def save_mfcc_to_npy(dataset_path, output_dir, num_segments=5):
    """
    Extracts MFCCs from audio dataset and saves them into .npy files.

    :param dataset_path: Path to the dataset containing subfolders for each genre.
    :param output_dir: Directory to save the extracted MFCCs and labels as .npy files.
    :param num_segments: Number of segments to divide each track into.
    """
    # Initialize lists to store features and labels
    all_mfccs = []
    all_labels = []

    # Calculate the number of samples per segment
    samples_per_segment = int(SAMPLES_PER_TRACK / num_segments)
    num_mfcc_vectors_per_segment = math.ceil(samples_per_segment / HOP_LENGTH)

    # List to keep track of failed files
    failed_files = []

    # Walk through all subdirectories and files in the dataset
    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):
        # Ensure we are not at the root directory
        if dirpath != dataset_path:
            # Extract the genre label (subfolder name)
            genre_label = os.path.basename(dirpath)
            print(f"Processing genre: {genre_label}")

            # Process each file in the genre subdirectory
            for file_name in filenames:
                file_path = os.path.join(dirpath, file_name)

                try:
                    # Load the audio file
                    signal, sr = librosa.load(file_path, sr=SAMPLE_RATE)

                    # Ensure the audio file is long enough
                    if len(signal) >= SAMPLES_PER_TRACK:
                        # Process each segment
                        for s in range(num_segments):
                            start_sample = samples_per_segment * s
                            end_sample = start_sample + samples_per_segment

                            try:
                                # Extract MFCC for the segment
                                mfcc = librosa.feature.mfcc(
                                    y=signal[start_sample:end_sample],
                                    sr=sr,
                                    n_mfcc=N_MFCC,
                                    n_fft=N_FFT,
                                    hop_length=HOP_LENGTH
                                )
                                mfcc = mfcc.T  # Transpose to have time steps as rows

                                # Ensure MFCC matrix has the expected size
                                if len(mfcc) == num_mfcc_vectors_per_segment:
                                    all_mfccs.append(mfcc)
                                    all_labels.append(i - 1)  # Subtract 1 for zero-based indexing
                                    print(f"Processed file: {file_name}, segment: {s+1}")
                                else:
                                    print(f"Skipped segment {s+1} of file {file_name}: Unexpected MFCC shape.")
                            except Exception as e:
                                print(f"Error processing segment {s+1} of file {file_name}: {e}")
                    else:
                        print(f"Skipped {file_name}: Audio too short.")
                except Exception as e:
                    print(f"Error processing {file_path}: {e}")
                    failed_files.append(file_path)
                    continue

    # Convert lists to NumPy arrays
    all_mfccs = np.array(all_mfccs)
    all_labels = np.array(all_labels)

    # Ensure the output directory exists
    os.makedirs(output_dir, exist_ok=True)

    # Save as .npy files
    np.save(os.path.join(output_dir, "mfccs.npy"), all_mfccs)
    np.save(os.path.join(output_dir, "labels.npy"), all_labels)

    print(f"MFCCs and labels saved to {output_dir}")

    # Log failed files
    if failed_files:
        failed_log_path = os.path.join(output_dir, "failed_files.txt")
        with open(failed_log_path, "w") as log_file:
            for failed_file in failed_files:
                log_file.write(f"{failed_file}\n")
        print(f"Failed files log saved to {failed_log_path}")

In [None]:
# Define paths
dataset_path = r"C:\Users\schnuller\Desktop\ECE1513H\Project\Data\genres_original"  
output_dir = r"C:\Users\schnuller\Desktop\ECE1513H\Project\Saved NPY Files"  

# Extract and save MFCCs to .npy files
save_mfcc_to_npy(dataset_path, output_dir, num_segments=5)

In [None]:

# 1. Time Shifting
def time_shift(mfcc, shift=10):
    return np.roll(mfcc, shift, axis=0)  

# 2. Frequency Masking
def frequency_mask(mfcc, mask_factor=5):
    mfcc = mfcc.copy()
    freq_start = np.random.randint(0, mfcc.shape[1] - mask_factor)
    mfcc[:, freq_start:freq_start + mask_factor] = 0
    return mfcc

# 3. Time Masking
def time_mask(mfcc, mask_factor=5):
    mfcc = mfcc.copy()
    time_start = np.random.randint(0, mfcc.shape[0] - mask_factor)
    mfcc[time_start:time_start + mask_factor, :] = 0
    return mfcc

# 4. Add Noise
def add_noise(mfcc, noise_factor=0.01):
    noise = noise_factor * np.random.randn(*mfcc.shape)
    return mfcc + noise

In [None]:
# List of Data Augmentation Methods
augmentations = [time_shift, frequency_mask, time_mask, add_noise]

# Load preprocessed data (.npy files)
mfccs_path = r"C:\Users\schnuller\Desktop\ECE1513H\Project\Saved NPY Files\mfccs.npy"
labels_path = r"C:\Users\schnuller\Desktop\ECE1513H\Project\Saved NPY Files\labels.npy"

X = np.load(mfccs_path)  # Load MFCC features
y = np.load(labels_path)  # Load labels

# Confirm the data shape
print(f"Loaded dataset: X shape = {X.shape}, y shape = {y.shape}")

# Expand the dataset
augmented_X = []
augmented_y = []

for i, sample in enumerate(X):
    # Original sample
    augmented_X.append(sample)
    augmented_y.append(y[i])

    # Augmented samples (generate 2 augmented versions for each sample)
    for _ in range(2):
        augmented_sample = sample
        augmentation = np.random.choice(augmentations)
        augmented_sample = augmentation(augmented_sample)
        augmented_X.append(augmented_sample)
        augmented_y.append(y[i])

# Convert to NumPy arrays
augmented_X = np.array(augmented_X)
augmented_y = np.array(augmented_y)

print(f"Original dataset size: {X.shape}")
print(f"Augmented dataset size: {augmented_X.shape}")

# Ensure the augmented dataset dimensions are correct
assert augmented_X.shape[0] == X.shape[0] * 3, "Enhanced dataset size mismatch."
assert augmented_y.shape[0] == y.shape[0] * 3, "Enhanced label size mismatch."

# Add a channel dimension to adapt to CNN input
augmented_X = augmented_X[..., np.newaxis]

# Print the final shape
print(f"Augmented dataset shape (with channel): {augmented_X.shape}")

In [None]:
# Set the save path
output_dir = r"C:\Users\schnuller\Desktop\ECE1513H\Project\Saved NPY Files"  # Custom save path
os.makedirs(output_dir, exist_ok=True)  # Ensure the target directory exists

# Save the augmented dataset
np.save(os.path.join(output_dir, "augmented_X.npy"), augmented_X)
np.save(os.path.join(output_dir, "augmented_y.npy"), augmented_y)

print(f"Augmented dataset saved to: {output_dir}")