<a href="https://colab.research.google.com/github/Hazzd12/CASA0018_coursework/blob/main/Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Audio preprocessing
First, we need to convert the original audio file to the Mayer spectrum, a common representation of audio features that is particularly suitable for feeding convolutional neural networks (CNNS) for training.

In [None]:
import numpy as np
import librosa
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
from skimage.transform import resize

def load_and_segment_audio(audio_path, target_length=1.5):
    y, sr = librosa.load(audio_path)
    buffer_length = int(sr * target_length)
    segments = [y[i:i + buffer_length] for i in range(0, len(y), buffer_length) if i + buffer_length <= len(y)]
    return segments, sr



def add__noise(data_segment, noise_level=0.005):
    # Ensure the noise is generated with the same shape as the data segment
    noise = np.random.randn(*data_segment.shape)
    augmented_data_segment = data_segment + noise_level * noise
    return augmented_data_segment


def resize_melspectrogram(mels, target_shape=(128, 128)):

    return resize(mels, target_shape, mode='constant', anti_aliasing=True)

def extract_melspectrogram(y, sr, n_fft=2048, hop_length=512, n_mels=128):
    S = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels)
    S_DB = librosa.power_to_db(S, ref=np.max)
    S_resized = resize_melspectrogram(S_DB, target_shape=(128, 128))
    S_resized = S_resized[..., np.newaxis]
    return S_resized


In [None]:
import zipfile
import os
import shutil

def unzip_audio_files(zip_path, extract_path):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)
    print(f"Extracted audio files to {extract_path}")

def delete_directory(directory_path):
    try:
        shutil.rmtree(directory_path)
        print(f"Directory '{directory_path}' deleted successfully.")
    except OSError as e:
        print(f"Error: {directory_path} : {e.strerror}")

zip_path = '/content/dataset/Data.zip'
extract_path = '/content/dataset/data'

delete_directory(extract_path)
unzip_audio_files(zip_path, extract_path)


Directory '/content/dataset/data' deleted successfully.
Extracted audio files to /content/dataset/data


In [None]:


def process_and_visualize(audio_path, target_length=1.5, noise_level=0.005):
    # Load and segment audio
    segments, sr = load_and_segment_audio(audio_path, target_length=target_length)

    processed_segments = []
    for segment in segments:
        # Add noise to the individual segment
        noisy_segment = add__noise(segment, noise_level=noise_level)

        # Extract mel spectrogram
        melspectrogram = extract_melspectrogram(noisy_segment, sr)

        processed_segments.append(melspectrogram)

    # If needed, visualize or further process the segments
    return processed_segments


#melspectrogram = process_and_visualize(str(audio_file))


In [None]:
from pathlib import Path

def resize_melspectrogram(mels, target_shape=(128, 128)):
    return resize(mels, target_shape, mode='constant', anti_aliasing=True)

def load_data_and_labels(audio_dir):
    categories = [f.name for f in os.scandir(audio_dir) if f.is_dir()]
    labels_dict = {category: i for i, category in enumerate(categories)}
    print(labels_dict)
    X, y = [], []
    for category, label in labels_dict.items():
        category_dir = Path(audio_dir) / category
        for audio_file in category_dir.glob('*.ogg'):
            try:
                segments, sr = load_and_segment_audio(str(audio_file))
                for segment in segments:
                    segment = add__noise(segment)
                    spectrogram = extract_melspectrogram(segment, sr)
                    X.append(spectrogram)
                    y.append(label)
            except Exception as e:
                print(f"Error processing {audio_file}: {e}")
    return np.array(X), np.array(y), categories

X, y, categories = load_data_and_labels(extract_path+'/Data')
np.save('X.npy', X)
np.save('y.npy', y)
print(categories)

{'102 - Rooster': 0, '105 - Frog': 1, '101 - Dog': 2, '104 - Cow': 3, '103 - Pig': 4}
['102 - Rooster', '105 - Frog', '101 - Dog', '104 - Cow', '103 - Pig']


In [None]:
from tensorflow.keras.regularizers import l2
def build_model(input_shape, num_classes):
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Flatten(),
        #Dense(64, activation='relu'),
        Dense(128, activation='relu', kernel_regularizer=l2(0.01)),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])
    return model

In [None]:

def calculate_similarity(feature1, feature2):
    return cosine_similarity(feature1.reshape(1, -1), feature2.reshape(1, -1))[0][0]

In [None]:

input_shape = (128, 128, 1)
num_classes = 5
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = build_model(input_shape, num_classes)
model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])


In [None]:

history = model.fit(X_train, y_train, epochs=15, validation_split=0.2)


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


1232

In [None]:
def extract_features(model, audio_path):
    # Extract the melspectrogram data
    spectrograms = process_and_visualize(audio_path)

    # Check if the returned list is not empty and prepare the data
    if spectrograms:
        # Option 1: Use the first spectrogram
        melspectrogram = spectrograms[0]

        # Option 2: Average the spectrograms (uncomment the following lines if this approach is needed)
        # melspectrogram = np.mean(np.array(spectrograms), axis=0)

        # Reshape the melspectrogram to fit the model's input expectations
        melspectrogram = melspectrogram.reshape(1, *melspectrogram.shape)

        # Use the model to predict or extract features
        features = model.predict(melspectrogram)
        return features

    else:
        raise ValueError("No spectrograms generated from the audio processing.")


In [None]:

converter = tf.lite.TFLiteConverter.from_keras_model(model)


tflite_model = converter.convert()

In [None]:

with open('model.tflite', 'wb') as f:
    f.write(tflite_model)

np.save('features.npy',category_features)