In [11]:
import os
import subprocess
import shutil
# Function to convert any audio file to mp3 and save all mp3 files in a new directory, preserving folder structure
def convert_all_to_mp3(directory, output_directory):
    supported_extensions = ('.aac', '.opus', '.m4a', '.wav', '.flac')

    for root, dirs, files in os.walk(directory):
        for file_name in files:
            input_file = os.path.join(root, file_name)

            # Preserve the relative path in the output folder
            relative_path = os.path.relpath(root, directory)
            output_dir_with_structure = os.path.join(output_directory, relative_path)

            # Create the folder structure in the output directory if it doesn't exist
            if not os.path.exists(output_dir_with_structure):
                os.makedirs(output_dir_with_structure)

            # Destination for the converted or copied mp3 file in the new directory
            output_file = os.path.join(output_dir_with_structure, file_name.rsplit('.', 1)[0] + '.mp3')

            # If the file is already an mp3, copy it to the new directory with folder structure
            if file_name.lower().endswith('.mp3'):
                print(f"Copying {file_name} to {output_dir_with_structure}")
                shutil.copy(input_file, output_file)
                continue

            # Check if the file has a supported non-mp3 extension
            if file_name.lower().endswith(supported_extensions):
                # Temporary file for conversion
                temp_output_file = os.path.join(root, file_name.rsplit('.', 1)[0] + '_temp.mp3')

                try:
                    # Construct the ffmpeg command to convert any file to .mp3
                    command = ['ffmpeg', '-i', input_file, '-codec:a', 'libmp3lame', temp_output_file]
                    # Run the command
                    subprocess.run(command, check=True)
                    print(f"Converted {input_file} to MP3")

                    # Move the converted mp3 file to the output directory, preserving folder structure
                    shutil.move(temp_output_file, output_file)
                    print(f"Saved {output_file} to {output_dir_with_structure}")

                except subprocess.CalledProcessError as e:
                    print(f"Error converting {input_file}: {e}")

# Path to the dataset folder
dataset_folder = './data-set/DATASET_AUDIO'

# Path to the output folder for converted mp3 files, preserving folder structure
output_folder = './data-set/DATASET_AUDIO_CONVERTED'

# Convert all supported audio files in the dataset to mp3 and save to the output folder with structure
convert_all_to_mp3(dataset_folder, output_folder)

Converted ./data-set/DATASET_AUDIO\baby shark\Audio 1.m4a to MP3
Saved ./data-set/DATASET_AUDIO_CONVERTED\baby shark\Audio 1.mp3 to ./data-set/DATASET_AUDIO_CONVERTED\baby shark
Converted ./data-set/DATASET_AUDIO\baby shark\audio 10.m4a to MP3
Saved ./data-set/DATASET_AUDIO_CONVERTED\baby shark\audio 10.mp3 to ./data-set/DATASET_AUDIO_CONVERTED\baby shark
Converted ./data-set/DATASET_AUDIO\baby shark\audio 11.m4a to MP3
Saved ./data-set/DATASET_AUDIO_CONVERTED\baby shark\audio 11.mp3 to ./data-set/DATASET_AUDIO_CONVERTED\baby shark
Converted ./data-set/DATASET_AUDIO\baby shark\audio 12.m4a to MP3
Saved ./data-set/DATASET_AUDIO_CONVERTED\baby shark\audio 12.mp3 to ./data-set/DATASET_AUDIO_CONVERTED\baby shark
Converted ./data-set/DATASET_AUDIO\baby shark\audio 13.m4a to MP3
Saved ./data-set/DATASET_AUDIO_CONVERTED\baby shark\audio 13.mp3 to ./data-set/DATASET_AUDIO_CONVERTED\baby shark
Converted ./data-set/DATASET_AUDIO\baby shark\Audio 2.m4a to MP3
Saved ./data-set/DATASET_AUDIO_CONVER

In [41]:
# Imports
import os
import librosa
import numpy as np
import resampy
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D,Dense, Conv2D, MaxPooling2D, Flatten, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical

In [42]:
# Dataset path
dataset_folder = './data-set/DATASET_AUDIO_CONVERTED'

# Extract audio features using MFCC, Chroma, and Mel Spectrogram
def extract_features(file_name):
    audio, sample_rate = librosa.load(file_name, res_type='scipy')  # Load audio file
    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=60)  # Increase MFCC coefficients to 60
    mfccs_scaled = np.mean(mfccs.T, axis=0)

    # Extract additional features
    chroma = librosa.feature.chroma_stft(y=audio, sr=sample_rate)
    chroma_scaled = np.mean(chroma.T, axis=0)

    mel = librosa.feature.melspectrogram(y=audio, sr=sample_rate)
    mel_scaled = np.mean(mel.T, axis=0)

    # Concatenate all features
    return np.hstack((mfccs_scaled, chroma_scaled, mel_scaled))

In [43]:
# Prepare dataset
def prepare_dataset(dataset_folder):
    features = []
    labels = []

    for folder in os.listdir(dataset_folder):
        folder_path = os.path.join(dataset_folder, folder)
        if os.path.isdir(folder_path):  # Check if it's a directory
            for file in os.listdir(folder_path):
                if file.endswith(".mp3"):  # Assuming files are in .mp3 format
                    file_path = os.path.join(folder_path, file)
                    # Extract features and store them with their labels
                    features.append(extract_features(file_path))
                    labels.append(folder)  # Use the folder name as the label

    return np.array(features), np.array(labels)

In [None]:
# Load the dataset
X, y = prepare_dataset(dataset_folder)

# Encode labels to numerical values
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Split data into training, validation, and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

In [20]:
# Feature Scaling (Normalization)
from sklearn.preprocessing import StandardScaler
import joblib

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

# Save the scaler
joblib.dump(scaler, "scaler.pkl")
print("Scaler saved as scaler.pkl")

# Support Vector Machine (SVM) Classifier
svm_model = SVC(kernel='linear', random_state=42)  # You can experiment with different kernels like 'rbf'
svm_model.fit(X_train, y_train)

# Evaluate SVM on validation set
y_val_pred_svm = svm_model.predict(X_val)
val_accuracy_svm = accuracy_score(y_val, y_val_pred_svm)
print(f"Validation Accuracy with SVM: {val_accuracy_svm * 100:.2f}%")

# Evaluate SVM on test set
y_test_pred_svm = svm_model.predict(X_test)
test_accuracy_svm = accuracy_score(y_test, y_test_pred_svm)
print(f"Test Accuracy with SVM: {test_accuracy_svm * 100:.2f}%")

Scaler saved as scaler.pkl
Validation Accuracy with SVM: 36.88%
Test Accuracy with SVM: 33.52%
