In [71]:
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import os

In [72]:
yamnet_model_handle = "https://tfhub.dev/google/yamnet/1"
yamnet = hub.load(yamnet_model_handle)

In [89]:
def extract_embedding(audio_file):
    try:
        y, sr = librosa.load(audio_file, sr=16000, mono=True)
        waveform = tf.convert_to_tensor(y, dtype=tf.float32)
        scores, embeddings, spectrogram = yamnet(waveform)
        
        # Ensure embeddings are 3D (batch, time, features)
        if len(embeddings.shape) == 2:
            embeddings = tf.expand_dims(embeddings, axis=0)  # Add batch dim
        
        # Compute mean over time axis (axis=1)
        mean_embedding = np.mean(embeddings.numpy()[0], axis=0)
        return mean_embedding
    except Exception as e:
        print(f"Error processing {audio_file}: {e}")
        return np.zeros(1024)  # Return a zero vector if extraction fails

In [90]:
# Load dataset
audio_dir = ""
# Load dataset
train_audio_dir = "C:/Users/5A_Traders/Downloads/FYP_ON_DEV/FYP_IntelliTrain/AudioClassification/Dataset/archive/DataTrain"
test_audio_dir = "C:/Users/5A_Traders/Downloads/FYP_ON_DEV/FYP_IntelliTrain/AudioClassification/Dataset/archive/DataTest"


In [91]:
def load_data(directory):
    labels, features = [], []
    for class_label in os.listdir(directory):
        class_path = os.path.join(directory, class_label)
        if os.path.isdir(class_path):
            for audio_file in os.listdir(class_path):
                file_path = os.path.join(class_path, audio_file)
                try:
                    feature = extract_embedding(file_path)
                    features.append(feature)
                    labels.append(class_label)
                except Exception as e:
                    print(f"Error processing {file_path}: {e}")
    return np.array(features), np.array(labels)


In [92]:
# Load ALL data (train + test) or adjust paths if you want separate train/test dirs
# Here we assume you're using DataTrain for training and DataTest for testing
X_train, y_train = load_data(train_audio_dir)
X_test, y_test = load_data(test_audio_dir)

# Encode labels using LabelEncoder (fit on ALL labels to cover all classes)
le = LabelEncoder()
le.fit(np.concatenate([y_train, y_test]))  # Fit on all possible labels

# Transform string labels to integers
y_train = le.transform(y_train)
y_test = le.transform(y_test)

# Convert features to numpy arrays
X_train = np.array(X_train)
X_test = np.array(X_test)


# If features are 1D (due to errors), reshape to (n_samples, 1024)
if X_train.ndim == 1:
    X_train = X_train.reshape(-1, 1024)
    X_test = X_test.reshape(-1, 1024)

print("X_train shape:", X_train.shape)  # Should be (n_samples, 1024)

X_train shape: (10, 1024)


In [93]:
print("X_train shape:", X_train.shape)  # Should be (n_samples, 1024)
print("Example feature shape:", X_train[0].shape)  # Should be (1024,)

X_train shape: (10, 1024)
Example feature shape: (1024,)


In [94]:
# Define the Model
model = Sequential([
    Input(shape=(X_train.shape[1],)),  # Shape (1024,)
    Dense(256, activation='relu'),
    Dropout(0.3),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(len(le.classes_), activation='softmax')
])

In [96]:
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',  # For integer-encoded labels
    metrics=['accuracy']
)

In [97]:
# Train Model
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=20, batch_size=16)

Epoch 1/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - accuracy: 0.0000e+00 - loss: 2.1761 - val_accuracy: 0.3077 - val_loss: 1.5369
Epoch 2/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 130ms/step - accuracy: 0.3000 - loss: 1.6143 - val_accuracy: 0.5385 - val_loss: 1.2988
Epoch 3/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 124ms/step - accuracy: 0.7000 - loss: 1.1691 - val_accuracy: 0.7692 - val_loss: 1.2058
Epoch 4/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 127ms/step - accuracy: 0.8000 - loss: 0.9770 - val_accuracy: 0.7692 - val_loss: 1.0701
Epoch 5/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 209ms/step - accuracy: 0.9000 - loss: 0.7414 - val_accuracy: 0.6923 - val_loss: 1.0446
Epoch 6/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 137ms/step - accuracy: 0.8000 - loss: 0.6795 - val_accuracy: 0.6154 - val_loss: 1.0347
Epoch 7/20
[1m1/1[0m [32m━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x2cd3aa41ed0>

In [None]:
# Save Model
model.save("audio_classification_model.h5")
print("Model training complete and saved!")