In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
! pip install pydub



In [None]:
!apt-get install -y ffmpeg

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 49 not upgraded.


In [None]:
!sudo apt update
!sudo apt install ffmpeg libopus0 opus-tools

[33m0% [Working][0m            Get:1 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
[33m0% [Connecting to archive.ubuntu.com] [1 InRelease 12.7 kB/129 kB 10%] [Connect[0m                                                                               Get:2 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,626 B]
Get:3 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease [1,581 B]
Ign:4 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Get:5 https://r2u.stat.illinois.edu/ubuntu jammy Release [5,713 B]
Get:6 https://r2u.stat.illinois.edu/ubuntu jammy Release.gpg [793 B]
Hit:7 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:8 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Get:9 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  Packages [998 kB]
Get:10 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease [18.1 kB]
Get:11 http://security.

In [None]:
import os
import subprocess
import shutil

# Function to convert any audio file to mp3 and save all mp3 files in a new directory, preserving folder structure
def convert_all_to_mp3(directory, output_directory):
    # Supported audio file extensions except mp3
    supported_extensions = ('.aac', '.opus', '.m4a', '.wav', '.flac')

    for root, dirs, files in os.walk(directory):
        for file_name in files:
            input_file = os.path.join(root, file_name)

            # Preserve the relative path in the output folder
            relative_path = os.path.relpath(root, directory)
            output_dir_with_structure = os.path.join(output_directory, relative_path)

            # Create the folder structure in the output directory if it doesn't exist
            if not os.path.exists(output_dir_with_structure):
                os.makedirs(output_dir_with_structure)

            # Destination for the converted or copied mp3 file in the new directory
            output_file = os.path.join(output_dir_with_structure, file_name.rsplit('.', 1)[0] + '.mp3')

            # If the file is already an mp3, copy it to the new directory with folder structure
            if file_name.lower().endswith('.mp3'):
                print(f"Copying {file_name} to {output_dir_with_structure}")
                shutil.copy(input_file, output_file)
                continue

            # Check if the file has a supported non-mp3 extension
            if file_name.lower().endswith(supported_extensions):
                # Temporary file for conversion
                temp_output_file = os.path.join(root, file_name.rsplit('.', 1)[0] + '_temp.mp3')

                try:
                    # Construct the ffmpeg command to convert any file to .mp3
                    command = ['ffmpeg', '-i', input_file, '-codec:a', 'libmp3lame', temp_output_file]
                    # Run the command
                    subprocess.run(command, check=True)
                    print(f"Converted {input_file} to MP3")

                    # Move the converted mp3 file to the output directory, preserving folder structure
                    shutil.move(temp_output_file, output_file)
                    print(f"Saved {output_file} to {output_dir_with_structure}")

                except subprocess.CalledProcessError as e:
                    print(f"Error converting {input_file}: {e}")

# Path to the dataset folder
dataset_folder = '/content/drive/MyDrive/DataSet Of Sounds'

# Path to the output folder for converted mp3 files, preserving folder structure
output_folder = '/content/drive/MyDrive/DataSet Of Sounds/converted_mp3'

# Convert all supported audio files in the dataset to mp3 and save to the output folder with structure
convert_all_to_mp3(dataset_folder, output_folder)


Copying wb9.mp3 to /content/drive/MyDrive/DataSet Of Sounds/converted_mp3/Wheels On The Bus
Converted /content/drive/MyDrive/DataSet Of Sounds/Wheels On The Bus/wb8.m4a to MP3
Saved /content/drive/MyDrive/DataSet Of Sounds/converted_mp3/Wheels On The Bus/wb8.mp3 to /content/drive/MyDrive/DataSet Of Sounds/converted_mp3/Wheels On The Bus
Converted /content/drive/MyDrive/DataSet Of Sounds/Wheels On The Bus/wb6.m4a to MP3
Saved /content/drive/MyDrive/DataSet Of Sounds/converted_mp3/Wheels On The Bus/wb6.mp3 to /content/drive/MyDrive/DataSet Of Sounds/converted_mp3/Wheels On The Bus
Converted /content/drive/MyDrive/DataSet Of Sounds/Wheels On The Bus/wheels on bus.aac to MP3
Saved /content/drive/MyDrive/DataSet Of Sounds/converted_mp3/Wheels On The Bus/wheels on bus.mp3 to /content/drive/MyDrive/DataSet Of Sounds/converted_mp3/Wheels On The Bus
Copying wb10.mp3 to /content/drive/MyDrive/DataSet Of Sounds/converted_mp3/Wheels On The Bus
Converted /content/drive/MyDrive/DataSet Of Sounds/Whe

In [None]:
# Imports
import os
import librosa
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D,Dense, Conv2D, MaxPooling2D, Flatten, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical

In [None]:
dataset_folder = '/content/drive/MyDrive/DataSet Of Sounds/converted_mp3'


# Extract audio features using MFCC, Chroma, and Mel Spectrogram
def extract_features(file_name):
    audio, sample_rate = librosa.load(file_name, res_type='scipy')  # Load audio file
    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=60)  # Increase MFCC coefficients to 60
    mfccs_scaled = np.mean(mfccs.T, axis=0)

    # Extract additional features
    chroma = librosa.feature.chroma_stft(y=audio, sr=sample_rate)
    chroma_scaled = np.mean(chroma.T, axis=0)

    mel = librosa.feature.melspectrogram(y=audio, sr=sample_rate)
    mel_scaled = np.mean(mel.T, axis=0)

    # Concatenate all features
    return np.hstack((mfccs_scaled, chroma_scaled, mel_scaled))

In [None]:
# Prepare dataset
def prepare_dataset(dataset_folder):
    features = []
    labels = []

    for folder in os.listdir(dataset_folder):
        folder_path = os.path.join(dataset_folder, folder)
        if os.path.isdir(folder_path):  # Check if it's a directory
            for file in os.listdir(folder_path):
                if file.endswith(".mp3"):  # Assuming files are in .mp3 format
                    file_path = os.path.join(folder_path, file)
                    # Extract features and store them with their labels
                    features.append(extract_features(file_path))
                    labels.append(folder)  # Use the folder name as the label

    return np.array(features), np.array(labels)

In [None]:
# Load the dataset
X, y = prepare_dataset(dataset_folder)

# Encode labels to numerical values
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Split data into training, validation, and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

In [None]:
# Feature Scaling (Normalization)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

# Support Vector Machine (SVM) Classifier
svm_model = SVC(kernel='linear', random_state=42)  # You can experiment with different kernels like 'rbf'
svm_model.fit(X_train, y_train)

# Evaluate SVM on validation set
y_val_pred_svm = svm_model.predict(X_val)
val_accuracy_svm = accuracy_score(y_val, y_val_pred_svm)
print(f"Validation Accuracy with SVM: {val_accuracy_svm * 100:.2f}%")

# Evaluate SVM on test set
y_test_pred_svm = svm_model.predict(X_test)
test_accuracy_svm = accuracy_score(y_test, y_test_pred_svm)
print(f"Test Accuracy with SVM: {test_accuracy_svm * 100:.2f}%")


Validation Accuracy with SVM: 42.86%
Test Accuracy with SVM: 44.44%


In [None]:
# Random Forest Classifier (a strong classifier for structured data)
rf_model = RandomForestClassifier(n_estimators=200, random_state=42)
rf_model.fit(X_train, y_train)

# Evaluate on validation set
y_val_pred = rf_model.predict(X_val)
val_accuracy = accuracy_score(y_val, y_val_pred)
print(f"Validation Accuracy with RandomForest: {val_accuracy * 100:.2f}%")

# Evaluate on test set
y_test_pred = rf_model.predict(X_test)
test_accuracy = accuracy_score(y_test, y_test_pred)
print(f"Test Accuracy with RandomForest: {test_accuracy * 100:.2f}%")

Validation Accuracy with RandomForest: 42.86%
Test Accuracy with RandomForest: 44.44%


In [None]:
# Random Forest Classifier (a strong classifier for structured data)
rf_model = RandomForestClassifier(n_estimators=200, random_state=42)
rf_model.fit(X_train, y_train)

# Evaluate on validation set
y_val_pred = rf_model.predict(X_val)
val_accuracy = accuracy_score(y_val, y_val_pred)
print(f"Validation Accuracy with RandomForest: {val_accuracy * 100:.2f}%")

# Evaluate on test set
y_test_pred = rf_model.predict(X_test)
test_accuracy = accuracy_score(y_test, y_test_pred)
print(f"Test Accuracy with RandomForest: {test_accuracy * 100:.2f}%")

Validation Accuracy with RandomForest: 42.86%
Test Accuracy with RandomForest: 44.44%


In [None]:
#MLP Classifier (Neural Network)
mlp_model = MLPClassifier(hidden_layer_sizes=(128, 64), max_iter=500, random_state=42)
mlp_model.fit(X_train, y_train)

# Evaluate MLP on validation set
y_val_pred_mlp = mlp_model.predict(X_val)
val_accuracy_mlp = accuracy_score(y_val, y_val_pred_mlp)
print(f"Validation Accuracy with MLP: {val_accuracy_mlp * 100:.2f}%")

# Evaluate MLP on test set
y_test_pred_mlp = mlp_model.predict(X_test)
test_accuracy_mlp = accuracy_score(y_test, y_test_pred_mlp)
print(f"Test Accuracy with MLP: {test_accuracy_mlp * 100:.2f}%")

Validation Accuracy with MLP: 57.14%
Test Accuracy with MLP: 44.44%


In [None]:
# Reshape data for 1D CNN
X_train_cnn = np.expand_dims(X_train, axis=-1)  # Reshape into 3D array for 1D CNN (samples, time_steps, features)
X_val_cnn = np.expand_dims(X_val, axis=-1)
X_test_cnn = np.expand_dims(X_test, axis=-1)

# One-hot encode the labels for CNN classification
y_train_cnn = to_categorical(y_train)
y_val_cnn = to_categorical(y_val)
y_test_cnn = to_categorical(y_test)

# Create 1D CNN model
def create_1d_cnn_model(input_shape, num_classes):
    model = Sequential()

    # 1st Conv Layer (1D)
    model.add(Conv1D(32, kernel_size=3, activation='relu', input_shape=input_shape))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size=2))
    model.add(Dropout(0.3))

    # 2nd Conv Layer (1D)
    model.add(Conv1D(64, kernel_size=3, activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size=2))
    model.add(Dropout(0.3))

    # 3rd Conv Layer (1D)
    model.add(Conv1D(128, kernel_size=3, activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size=2))
    model.add(Dropout(0.3))

    # Flatten the output
    model.add(Flatten())

    # Fully connected Dense layer
    model.add(Dense(128, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.4))

    # Output Layer
    model.add(Dense(num_classes, activation='softmax'))

    return model

# Define input shape and number of classes
input_shape_cnn = (X_train.shape[1], 1)  # (time_steps, features)
num_classes_cnn = y_train_cnn.shape[1]

# Build CNN model
cnn_model = create_1d_cnn_model(input_shape_cnn, num_classes_cnn)

# Compile the model
cnn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
cnn_model.fit(X_train_cnn, y_train_cnn, validation_data=(X_val_cnn, y_val_cnn), epochs=30, batch_size=32)

# Evaluate CNN on the test set
test_loss_cnn, test_accuracy_cnn = cnn_model.evaluate(X_test_cnn, y_test_cnn)
print(f"Test Accuracy with CNN: {test_accuracy_cnn * 100:.2f}%")

# Make predictions on validation set
y_val_pred_cnn = cnn_model.predict(X_val_cnn)
val_accuracy_cnn = accuracy_score(np.argmax(y_val_cnn, axis=1), np.argmax(y_val_pred_cnn, axis=1))
print(f"Validation Accuracy with CNN: {val_accuracy_cnn * 100:.2f}%")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 11s/step - accuracy: 0.3704 - loss: 1.7706 - val_accuracy: 0.2857 - val_loss: 1.4252
Epoch 2/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 170ms/step - accuracy: 0.4444 - loss: 1.2508 - val_accuracy: 0.4286 - val_loss: 1.4542
Epoch 3/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 150ms/step - accuracy: 0.4815 - loss: 1.2760 - val_accuracy: 0.2857 - val_loss: 1.4894
Epoch 4/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 327ms/step - accuracy: 0.6667 - loss: 1.0278 - val_accuracy: 0.2857 - val_loss: 1.4999
Epoch 5/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 165ms/step - accuracy: 0.7037 - loss: 0.7828 - val_accuracy: 0.2857 - val_loss: 1.4828
Epoch 6/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 163ms/step - accuracy: 0.7037 - loss: 0.6251 - val_accuracy: 0.1429 - val_loss: 1.4722
Epoch 7/30
[1m1/1[0m [32m━━━━━━━━━━━━━

In [None]:
# Function to predict a single audio file
def predict_single_audio(file_path, model, scaler, label_encoder):
    # Extract features from the new audio file
    features = extract_features(file_path)

    # Reshape for SVM, RandomForest, and MLP (since they expect 2D inputs)
    features = features.reshape(1, -1)

    # Scale the features (apply the same scaler used during training)
    features_scaled = scaler.transform(features)

    # Predict the class using the model
    prediction = model.predict(features_scaled)

    # Decode the prediction to the original label
    predicted_label = label_encoder.inverse_transform(prediction)

    return predicted_label[0]

# Function to predict a single audio file using CNN
def predict_single_audio_cnn(file_path, cnn_model, scaler, label_encoder):
    # Extract features from the new audio file
    features = extract_features(file_path)

    # Scale the features (apply the same scaler used during training)
    features_scaled = scaler.transform(features.reshape(1, -1))  # Reshape to 2D for scaling (1, n_features)

    # Reshape for CNN (since CNN expects 3D inputs)
    features_scaled_cnn = features_scaled.reshape(1, features_scaled.shape[1], 1)

    # Predict the class using the CNN model
    prediction_cnn = cnn_model.predict(features_scaled_cnn)

    # Decode the prediction to the original label
    predicted_label_cnn = label_encoder.inverse_transform([np.argmax(prediction_cnn)])

    return predicted_label_cnn[0]

In [None]:
# Example usage
audio_file_path = "/content/drive/MyDrive/DataSet Of Sounds/converted_mp3/Wheels On The Bus/wb4.mp3"

# Predict using SVM model
svm_prediction = predict_single_audio(audio_file_path, svm_model, scaler, le)
print(f"SVM Prediction: {svm_prediction}")

# Predict using Random Forest model
rf_prediction = predict_single_audio(audio_file_path, rf_model, scaler, le)
print(f"Random Forest Prediction: {rf_prediction}")

# Predict using MLP model
mlp_prediction = predict_single_audio(audio_file_path, mlp_model, scaler, le)
print(f"MLP Prediction: {mlp_prediction}")

# Predict using CNN model
cnn_prediction = predict_single_audio_cnn(audio_file_path, cnn_model, scaler, le)
print(f"CNN Prediction: {cnn_prediction}")


SVM Prediction: Wheels On The Bus
Random Forest Prediction: Wheels On The Bus
MLP Prediction: Wheels On The Bus
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 358ms/step
CNN Prediction: Wheels On The Bus


In [5]:
#using kaggle dataset
! mkdir ~/.kaggle
!cp /content/drive/MyDrive/kaggle.json ~/.kaggle/kaggle.json

mkdir: cannot create directory ‘/root/.kaggle’: File exists


In [6]:
! chmod 600 ~/.kaggle/kaggle.json

In [7]:
! kaggle datasets download jesusrequena/mlend-hums-and-whistles

Dataset URL: https://www.kaggle.com/datasets/jesusrequena/mlend-hums-and-whistles
License(s): unknown
Downloading mlend-hums-and-whistles.zip to /content
100% 11.1G/11.1G [02:29<00:00, 88.2MB/s]
100% 11.1G/11.1G [02:29<00:00, 79.8MB/s]


In [8]:
import zipfile
import os

# Path to the zip file
zip_path = '/content/mlend-hums-and-whistles.zip'  # Adjust the path if needed

# Directory where the contents will be extracted
extract_to = '/content/ML_dataset'  # You can specify another directory here

# Create the directory if it doesn't exist
if not os.path.exists(extract_to):
    os.makedirs(extract_to)

# Opening the zip file
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    # Extracting all the files to the specified directory
    zip_ref.extractall(extract_to)

print("Files extracted successfully!")


Files extracted successfully!


In [9]:
!pip uninstall resampy -y  # Uninstall it first
!pip install resampy      # Reinstall it


[0mCollecting resampy
  Downloading resampy-0.4.3-py3-none-any.whl.metadata (3.0 kB)
Downloading resampy-0.4.3-py3-none-any.whl (3.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m28.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: resampy
Successfully installed resampy-0.4.3


In [10]:
import os
import librosa
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization
import resampy
import pickle
print(resampy.__version__)  # This will print the version if it's correctly installed


0.4.3


In [11]:
# Directory of the dataset
dataset_folder = '/content/ML_dataset'

# Define labels based on your dataset structure
labels = {
    'MLEndHWD_Frozen_Audio_Files': 0,
    'MLEndHWD_Hakuna_Audio_Files': 1,
    'MLEndHWD_Mamma_Audio_Files': 2,
    'MLEndHWD_Panther_Audio_Files': 3,
    'MLEndHWD_Potter_Audio_Files': 4,
    'MLEndHWD_Rain_Audio_Files': 5,
    'MLEndHWD_Showman_Audio_Files': 6,
    'MLEndHWD_StarWars_Audio_Files': 7
}

# Audio processing parameters
sample_rate = 22050  # Standard sample rate for audio
n_mfcc = 40          # Number of MFCCs to extract
max_pad_len = 862    # Max padding length

def extract_features(file_name, sample_rate=22050, n_mfcc=40, max_pad_len=862):
    # Load audio file with librosa
    audio, _ = librosa.load(file_name, sr=sample_rate, res_type='kaiser_fast')
    # Extract MFCCs from the audio
    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=n_mfcc)

    # Calculate the padding width
    # Check if the current MFCC has fewer frames than max_pad_len
    if mfccs.shape[1] < max_pad_len:
        # Pad width is the difference if it's less than max_pad_len
        pad_width = max_pad_len - mfccs.shape[1]
        mfccs = np.pad(mfccs, pad_width=((0, 0), (0, pad_width)), mode='constant')
    elif mfccs.shape[1] > max_pad_len:
        # If more frames than max_pad_len, truncate the excess
        mfccs = mfccs[:, :max_pad_len]

    return mfccs



In [12]:
import os
import numpy as np
import pickle

# Function to process batches of files and labels
def process_batch(batch_files, batch_labels, batch_num):
    X_batch, y_batch = [], []

    for file_path, label in zip(batch_files, batch_labels):
        features = extract_features(file_path)  # Assuming extract_features is defined elsewhere
        X_batch.append(features)
        y_batch.append(label)

    X_batch = np.array(X_batch)
    y_batch = np.array(y_batch)

    # Save batches to disk
    with open(f'X_batch_{batch_num}.pkl', 'wb') as f:
        pickle.dump(X_batch, f)
    with open(f'y_batch_{batch_num}.pkl', 'wb') as f:
        pickle.dump(y_batch, f)

    print(f'Batch {batch_num} processed and saved.')

# Assuming you have a dictionary of folder labels
labels = {'MLEndHWD_Frozen_Audio_Files': 0, 'MLEndHWD_Hakuna_Audio_Files': 1, 'MLEndHWD_Mamma_Audio_Files': 2,
          'MLEndHWD_Panther_Audio_Files': 3, 'MLEndHWD_Potter_Audio_Files': 4,
          'MLEndHWD_Rain_Audio_Files': 5, 'MLEndHWD_Showman_Audio_Files':6, 'MLEndHWD_StarWars_Audio_Files':7}  # Example labels

dataset_folder = "/content/ML_dataset"  # Ensure this is correct

all_files, all_labels = [], []

# Loop through each folder and label, collecting files
for folder, label in labels.items():
    folder_path = os.path.join(dataset_folder, folder)

    try:
        # Check if folder exists
        if not os.path.exists(folder_path):
            raise FileNotFoundError(f"Folder not found: {folder_path}")

        # Get files in the folder
        files = [os.path.join(folder_path, f) for f in os.listdir(folder_path)]
        all_files.extend(files)
        all_labels.extend([label] * len(files))

    except FileNotFoundError as e:
        print(e)  # Log the error message but continue execution

# Batch size and batch processing
batch_size = 100
for batch_num in range(0, len(all_files), batch_size):
    batch_files = all_files[batch_num:batch_num + batch_size]
    batch_labels = all_labels[batch_num:batch_num + batch_size]
    process_batch(batch_files, batch_labels, batch_num // batch_size)


Batch 0 processed and saved.
Batch 1 processed and saved.
Batch 2 processed and saved.
Batch 3 processed and saved.
Batch 4 processed and saved.
Batch 5 processed and saved.
Batch 6 processed and saved.
Batch 7 processed and saved.
Batch 8 processed and saved.
Batch 9 processed and saved.
Batch 10 processed and saved.
Batch 11 processed and saved.
Batch 12 processed and saved.
Batch 13 processed and saved.
Batch 14 processed and saved.
Batch 15 processed and saved.
Batch 16 processed and saved.
Batch 17 processed and saved.
Batch 18 processed and saved.
Batch 19 processed and saved.
Batch 20 processed and saved.
Batch 21 processed and saved.
Batch 22 processed and saved.
Batch 23 processed and saved.
Batch 24 processed and saved.
Batch 25 processed and saved.
Batch 26 processed and saved.
Batch 27 processed and saved.
Batch 28 processed and saved.
Batch 29 processed and saved.
Batch 30 processed and saved.
Batch 31 processed and saved.
Batch 32 processed and saved.
Batch 33 processed a

In [13]:
import pickle
import numpy as np

def load_batch(batch_num):
    # Load the X (features) and y (labels) batches from disk
    with open(f'X_batch_{batch_num}.pkl', 'rb') as f:
        X_batch = pickle.load(f)
    with open(f'y_batch_{batch_num}.pkl', 'rb') as f:
        y_batch = pickle.load(f)
    return np.array(X_batch), np.array(y_batch)


In [16]:
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.utils import to_categorical
import pickle

# Fit LabelEncoder before processing
label_encoder = LabelEncoder()
label_encoder.fit(list(labels.values()))
num_classes = len(labels)

# Preprocess batches with the initialized label encoder and scaler
scaler = StandardScaler()

def preprocess_batch(X_batch, y_batch):
    n_samples, n_features, _ = X_batch.shape
    X_batch_reshaped = X_batch.reshape((n_samples, n_features * max_pad_len))
    X_scaled = scaler.fit_transform(X_batch_reshaped)  # Standardize features
    X_scaled = X_scaled.reshape((n_samples, n_features, max_pad_len))

    print(f"Processing labels in batch: {np.unique(y_batch)}")

    try:
        y_encoded = label_encoder.transform(y_batch)
    except ValueError as e:
        print(f"Error encoding labels: {e}")
        print(f"All possible labels: {label_encoder.classes_}")
        raise e

    y_categorical = to_categorical(y_encoded, num_classes=num_classes)
    return X_scaled, y_categorical

print(f"Labels used for fitting: {label_encoder.classes_}")


Labels used for fitting: [0 1 2 3 4 5 6 7]


In [17]:
# Print all unique labels that LabelEncoder was fit on
print(f"Labels used for fitting: {label_encoder.classes_}")


Labels used for fitting: [0 1 2 3 4 5 6 7]


In [20]:
import os
import librosa
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras import Input

# Define paths and parameters
dataset_folder = '/content/ML_dataset'
sample_rate = 22050
n_mfcc = 40
max_pad_len = 862
batch_size = 100
epochs = 10

# Gather all files and labels using folder names
all_files, all_labels = [], []
for folder in os.listdir(dataset_folder):
    folder_path = os.path.join(dataset_folder, folder)
    if os.path.isdir(folder_path):
        files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith(".wav")]
        all_files.extend(files)
        all_labels.extend([folder] * len(files))

# Step 1: Fit LabelEncoder on the original folder names
label_encoder = LabelEncoder()
label_encoder.fit(all_labels)
num_classes = len(label_encoder.classes_)

# Define the model with Input layer
input_shape = (n_mfcc, max_pad_len)
model = Sequential([
    Input(shape=input_shape),
    Conv1D(64, kernel_size=3, activation='relu'),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),
    Conv1D(128, kernel_size=3, activation='relu'),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(num_classes, activation='softmax')
])
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Function to preprocess a batch
scaler = StandardScaler()


In [21]:
def process_batch(batch_files, batch_labels, batch_num):
    X_batch, y_batch = [], []

    for file_path, label in zip(batch_files, batch_labels):
        features = extract_features(file_path)
        X_batch.append(features)
        y_batch.append(label)  # Store the string label here, not an integer

    X_batch = np.array(X_batch)
    y_batch = np.array(y_batch)

    # Save batches to disk
    with open(f'X_batch_{batch_num}.pkl', 'wb') as f:
        pickle.dump(X_batch, f)
    with open(f'y_batch_{batch_num}.pkl', 'wb') as f:
        pickle.dump(y_batch, f)

    print(f'Batch {batch_num} processed and saved.')


In [22]:
def preprocess_batch(X_batch, y_batch):
    # Check that y_batch has the correct format
    print(f"Batch labels before encoding: {y_batch[:5]}")  # Debugging line to confirm labels

    n_samples, n_features, _ = X_batch.shape
    X_batch_reshaped = X_batch.reshape((n_samples, n_features * max_pad_len))
    X_scaled = scaler.fit_transform(X_batch_reshaped)
    X_scaled = X_scaled.reshape((n_samples, n_features, max_pad_len))

    # Encode labels and one-hot encode them
    y_encoded = label_encoder.transform(y_batch)  # Convert to integers based on strings
    y_categorical = to_categorical(y_encoded, num_classes=num_classes)

    return X_scaled, y_categorical


In [26]:
import os
import librosa
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras import Input
from sklearn.model_selection import train_test_split
import pickle

# Define paths and parameters
dataset_folder = '/content/ML_dataset'
sample_rate = 22050
n_mfcc = 40
max_pad_len = 862
batch_size = 100
epochs = 10

# Collect all files and labels using folder names
all_files, all_labels = [], []
for folder in os.listdir(dataset_folder):
    folder_path = os.path.join(dataset_folder, folder)
    if os.path.isdir(folder_path):
        files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith(".wav")]
        all_files.extend(files)
        all_labels.extend([folder] * len(files))

# Step 1: Fit LabelEncoder on original folder names
label_encoder = LabelEncoder()
label_encoder.fit(all_labels)
num_classes = len(label_encoder.classes_)

# Split data into training and validation sets
train_files, val_files, train_labels, val_labels = train_test_split(
    all_files, all_labels, test_size=0.2, stratify=all_labels, random_state=42
)

# Define the model
input_shape = (n_mfcc, max_pad_len)
model = Sequential([
    Input(shape=input_shape),
    Conv1D(64, kernel_size=3, activation='relu'),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),
    Conv1D(128, kernel_size=3, activation='relu'),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(num_classes, activation='softmax')
])
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


# Callbacks for early stopping and model checkpointing
callbacks = [
    EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True),
    ModelCheckpoint('best_model.keras', monitor='val_loss', save_best_only=True)  # Use .keras extension
]


# Function to extract MFCC features and preprocess the data
def extract_features(file_name, sample_rate=22050, n_mfcc=40, max_pad_len=862):
    audio, _ = librosa.load(file_name, sr=sample_rate, res_type='kaiser_fast')
    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=n_mfcc)
    if mfccs.shape[1] < max_pad_len:
        pad_width = max_pad_len - mfccs.shape[1]
        mfccs = np.pad(mfccs, pad_width=((0, 0), (0, pad_width)), mode='constant')
    else:
        mfccs = mfccs[:, :max_pad_len]
    return mfccs

# Preprocess batch function
scaler = StandardScaler()

def preprocess_batch(file_paths, labels):
    X_batch = np.array([extract_features(file) for file in file_paths])
    n_samples, n_features, _ = X_batch.shape
    X_batch_reshaped = X_batch.reshape((n_samples, n_features * max_pad_len))
    X_scaled = scaler.fit_transform(X_batch_reshaped)
    X_scaled = X_scaled.reshape((n_samples, n_features, max_pad_len))

    y_encoded = label_encoder.transform(labels)
    y_categorical = to_categorical(y_encoded, num_classes=num_classes)

    return X_scaled, y_categorical

# Prepare the validation data
X_val, y_val = preprocess_batch(val_files, val_labels)

# Training loop
for epoch in range(epochs):
    print(f'Epoch {epoch + 1}/{epochs}')
    for batch_num in range(0, len(train_files), batch_size):
        batch_files = train_files[batch_num:batch_num + batch_size]
        batch_labels = train_labels[batch_num:batch_num + batch_size]

        X_batch, y_batch = preprocess_batch(batch_files, batch_labels)
        model.train_on_batch(X_batch, y_batch)
        print(f'Processed batch {batch_num // batch_size + 1} for epoch {epoch + 1}')

    # Evaluate on validation set after each epoch
    val_loss, val_accuracy = model.evaluate(X_val, y_val, verbose=0)
    print(f'Validation loss: {val_loss}, Validation accuracy: {val_accuracy}')

    # Use callbacks to handle early stopping and checkpointing
    for callback in callbacks:
        callback.on_epoch_end(epoch, logs={'val_loss': val_loss})

# Save the scaler and label encoder for future use
with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)
with open('label_encoder.pkl', 'wb') as f:
    pickle.dump(label_encoder, f)

print("Training complete and model saved.")


Epoch 1/10
Processed batch 1 for epoch 1
Processed batch 2 for epoch 1
Processed batch 3 for epoch 1
Processed batch 4 for epoch 1
Processed batch 5 for epoch 1
Processed batch 6 for epoch 1
Processed batch 7 for epoch 1
Processed batch 8 for epoch 1
Processed batch 9 for epoch 1
Processed batch 10 for epoch 1
Processed batch 11 for epoch 1
Processed batch 12 for epoch 1
Processed batch 13 for epoch 1
Processed batch 14 for epoch 1
Processed batch 15 for epoch 1
Processed batch 16 for epoch 1
Processed batch 17 for epoch 1
Processed batch 18 for epoch 1
Processed batch 19 for epoch 1
Processed batch 20 for epoch 1
Processed batch 21 for epoch 1
Processed batch 22 for epoch 1
Processed batch 23 for epoch 1
Processed batch 24 for epoch 1
Processed batch 25 for epoch 1
Processed batch 26 for epoch 1
Processed batch 27 for epoch 1
Processed batch 28 for epoch 1
Processed batch 29 for epoch 1
Processed batch 30 for epoch 1
Processed batch 31 for epoch 1
Processed batch 32 for epoch 1
Proces

AttributeError: 'NoneType' object has no attribute 'get_weights'