In [None]:
import os
import numpy as np
import seaborn as sns
import librosa
import librosa.display
import tensorflow as tf
from tensorflow.keras.utils import to_categorical, plot_model
import cv2
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential

In [None]:


# Define the dataset directory
data_dir = '/content/drive/MyDrive/PhysioNet2021/Data'
classes = ['Abnormal', 'Normal']  # Define class labels

# Function to load and preprocess audio files
def load_and_preprocess_data(data_dir, classes, target_shape=(128, 128)):
    data, labels = [], []

    for i, class_name in enumerate(classes):
        class_path = os.path.join(data_dir, class_name)  # Adjust for nested structure
        for filename in os.listdir(class_path):
            if filename.endswith('.wav'):
                file_path = os.path.join(class_path, filename)

                # Load audio file
                audio_data, sample_rate = librosa.load(file_path, sr=None)

                # Convert to Mel spectrogram
                mel_spectrogram = librosa.feature.melspectrogram(y=audio_data, sr=sample_rate)
                mel_spectrogram_db = librosa.power_to_db(mel_spectrogram, ref=np.max)

                # Resize the spectrogram
                resized_spectrogram = cv2.resize(mel_spectrogram_db, target_shape)

                # Convert grayscale to RGB-like format
                spectrogram_rgb = np.stack([resized_spectrogram] * 3, axis=-1)

                data.append(spectrogram_rgb)
                labels.append(i)

    return np.array(data), np.array(labels)

# Load the dataset
data, labels = load_and_preprocess_data(data_dir, classes)

# Convert labels to one-hot encoding
labels = to_categorical(labels, num_classes=len(classes))

In [None]:
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from tensorflow.keras.applications.inception_v3 import preprocess_input
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

best_f1 = 0
best_model = None
best_history = None
best_y_true = None
best_y_pred = None
best_fold = None
fold_accuracies = []
fold_precisions = []
fold_recalls = []
fold_f1s = []

fold = 1
for train_index, test_index in skf.split(data, np.argmax(labels, axis=1)):
    print(f"\nTraining Fold {fold}...")

    X_train, X_test = data[train_index], data[test_index]
    y_train, y_test = labels[train_index], labels[test_index]

    X_train = preprocess_input(X_train)
    X_test = preprocess_input(X_test)

    # Load Pretrained GoogleNet (InceptionV3)
    base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(128, 128, 3))

    # Freeze initial layers (fine-tuning later)
    base_model.trainable = False

    # Add Custom Layers on top of GoogleNet
    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)
    output_layer = Dense(len(classes), activation='softmax')(x)

    # Create the model
    model = Model(inputs=base_model.input, outputs=output_layer)

    classifier = Model(inputs=base_model.output, outputs=output_layer)
    plot_model(classifier, to_file='inceptionv3_top_only.png', show_shapes=True)

    # Compile the model
    model.compile(optimizer=Adam(learning_rate=1e-4),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    # Callbacks for early stopping & learning rate decay
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)

    # Train the model
    history=model.fit(X_train, y_train,
              epochs=50,
              batch_size=32,
              validation_data=(X_test, y_test),
              callbacks=[early_stopping, reduce_lr], verbose=0)
    y_pred = np.argmax(model.predict(X_test), axis=1)
    y_true = np.argmax(y_test, axis=1)

    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    f1 = f1_score(y_true, y_pred, average='weighted')
    print(f"Fold {fold} Weighted F1-score: {f1:.4f}")
    fold_accuracies.append(accuracy)
    fold_precisions.append(precision)
    fold_recalls.append(recall)
    fold_f1s.append(f1)

    # Save best fold
    if f1 > best_f1:
        best_f1 = f1
        best_model = model
        best_history = history
        best_y_pred = y_pred
        best_y_true = y_true

    fold += 1

# === After all folds === #
print("\n Cross-validation complete.")
print("=== Average Metrics Across Folds ===")
print(f"Accuracy: {np.mean(fold_accuracies):.4f}")
print(f"Precision: {np.mean(fold_precisions):.4f}")
print(f"Recall: {np.mean(fold_recalls):.4f}")
print(f"F1 Score: {np.mean(fold_f1s):.4f}")

# === Best Fold Results === #
print("\n Classification Report (Best Fold):")
print(classification_report(best_y_true, best_y_pred, target_names=classes))

print(" Confusion Matrix (Best Fold):")
cm = confusion_matrix(best_y_true, best_y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=classes, yticklabels=classes)
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix – Best Fold")
plt.show()




In [None]:
model.save('inceptionv3_model.keras')

In [None]:


def plot_training_history(history):
    """
    Plots training and validation loss & accuracy from a Keras model history object.

    Args:
        history: Keras History object from model.fit()
    """
    # Create subplots for Loss and Accuracy
    fig, ax = plt.subplots(1, 2, figsize=(12, 5))

    # Loss Plot
    ax[0].plot(history.history['loss'], label='Training Loss', color='blue')
    ax[0].plot(history.history['val_loss'], label='Validation Loss', color='red')
    ax[0].set_xlabel('Epochs')
    ax[0].set_ylabel('Loss')
    ax[0].set_title('Training & Validation Loss')
    ax[0].legend()

    # Accuracy Plot
    ax[1].plot(history.history['accuracy'], label='Training Accuracy', color='blue')
    ax[1].plot(history.history['val_accuracy'], label='Validation Accuracy', color='red')
    ax[1].set_xlabel('Epochs')
    ax[1].set_ylabel('Accuracy')
    ax[1].set_title('Training & Validation Accuracy')
    ax[1].legend()

    # Show the plots
    plt.show()


In [None]:
# Plot training performance
plot_training_history(history)

**YamNet**

In [None]:
!pip install tensorflow-io



In [None]:
import tensorflow as tf
import tensorflow_hub as hub
import librosa
import numpy as np
import tensorflow_io as tfio

# Load YAMNet Pretrained Model
yamnet_model = hub.load('https://tfhub.dev/google/yamnet/1')

# Function to Convert Audio to YAMNet Features
def extract_yamnet_features(file_path):
    audio_data, sample_rate = librosa.load(file_path, sr=16000)  # Resample to 16kHz
    waveform = tf.convert_to_tensor(audio_data, dtype=tf.float32)

    # Run YAMNet to get embeddings
    scores, embeddings, spectrogram = yamnet_model(waveform)

    return embeddings.numpy()  # Convert to NumPy array


In [None]:
import os

data_dir = '/content/drive/MyDrive/PhysioNet2021/Data'
classes = ['Abnormal', 'Normal']

X, y = [], []

for class_idx, class_name in enumerate(classes):
    class_path = os.path.join(data_dir, class_name)  # Adjust for nested folder structure
    for filename in os.listdir(class_path):
        if filename.endswith('.wav'):
            file_path = os.path.join(class_path, filename)

            # Extract YAMNet embeddings
            embedding = extract_yamnet_features(file_path)

            X.append(embedding.mean(axis=0))  # Take the mean of embeddings (feature vector)
            y.append(class_idx)  # Assign class label

# Convert to NumPy arrays
X = np.array(X)
y = tf.keras.utils.to_categorical(y, num_classes=len(classes))


In [None]:
best_f1 = 0
best_model = None
best_history = None
best_y_true = None
best_y_pred = None
best_fold = None
fold_accuracies = []
fold_precisions = []
fold_recalls = []
fold_f1s = []

fold = 1
for train_index, test_index in skf.split(X, np.argmax(y, axis=1)):
    print(f"\nTraining Fold {fold}...")

    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    model = Sequential([
    Dense(128, activation='relu', input_shape=(X.shape[1],)),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(len(classes), activation='softmax')  # Output layer
])

    plot_model(model, to_file='yamnet.png', show_shapes=True, show_layer_names=True)
    # Compile the model
    model.compile(optimizer=Adam(learning_rate=0.0005),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])


    # Callbacks for early stopping & learning rate decay
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)

    # Train the model
    history=model.fit(X_train, y_train,
              epochs=50,
              batch_size=32,
              validation_data=(X_test, y_test),
              callbacks=[early_stopping, reduce_lr], verbose=0)
    y_pred = np.argmax(model.predict(X_test), axis=1)
    y_true = np.argmax(y_test, axis=1)

    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    f1 = f1_score(y_true, y_pred, average='weighted')
    print(f"Fold {fold} Weighted F1-score: {f1:.4f}")
    fold_accuracies.append(accuracy)
    fold_precisions.append(precision)
    fold_recalls.append(recall)
    fold_f1s.append(f1)

    # Save best fold
    if f1 > best_f1:
        best_f1 = f1
        best_model = model
        best_history = history
        best_y_pred = y_pred
        best_y_true = y_true

    fold += 1

# === After all folds === #
print("\n Cross-validation complete.")
print("=== Average Metrics Across Folds ===")
print(f"Accuracy: {np.mean(fold_accuracies):.4f}")
print(f"Precision: {np.mean(fold_precisions):.4f}")
print(f"Recall: {np.mean(fold_recalls):.4f}")
print(f"F1 Score: {np.mean(fold_f1s):.4f}")

# === Best Fold Results === #
print("\n Classification Report (Best Fold):")
print(classification_report(best_y_true, best_y_pred, target_names=classes))

print(" Confusion Matrix (Best Fold):")
cm = confusion_matrix(best_y_true, best_y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=classes, yticklabels=classes)
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix – Best Fold")
plt.show()


In [None]:
model.save('yamnet_model.keras')

In [None]:
# Plot training performance
plot_training_history(history)

In [None]:
# Load VGGish Pretrained Model from TensorFlow Hub
vggish_model = hub.load("https://tfhub.dev/google/vggish/1")

# Function to Extract VGGish Features from Audio
def extract_vggish_features(file_path):
    # Load the audio file and resample to 16kHz (VGGish requires 16kHz input)
    audio_data, sr = librosa.load(file_path, sr=16000)

    # Convert to Tensor
    waveform = tf.convert_to_tensor(audio_data, dtype=tf.float32)

    # Run VGGish to get audio embeddings
    embeddings = vggish_model(waveform)

    return embeddings.numpy()  # Convert to NumPy array

In [None]:
# Define dataset directory
data_dir = '/content/drive/MyDrive/PhysioNet2021/Data'
classes = ['Abnormal', 'Normal']

X, y = [], []

# Loop through dataset and extract VGGish features
for class_idx, class_name in enumerate(classes):
    class_path = os.path.join(data_dir, class_name)  # Adjust for nested structure
    for filename in os.listdir(class_path):
        if filename.endswith('.wav'):
            file_path = os.path.join(class_path, filename)

            # Extract VGGish embeddings
            embedding = extract_vggish_features(file_path)

            # Store features
            X.append(embedding.mean(axis=0))  # Average embeddings for a single vector
            y.append(class_idx)

# Convert to NumPy arrays
X = np.array(X)
y = to_categorical(y, num_classes=len(classes))

In [None]:
best_f1 = 0
best_model = None
best_history = None
best_y_true = None
best_y_pred = None
best_fold = None
fold_accuracies = []
fold_precisions = []
fold_recalls = []
fold_f1s = []

fold = 1
for train_index, test_index in skf.split(X, np.argmax(y, axis=1)):
    print(f"\nTraining Fold {fold}...")

    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    model = Sequential([
    Dense(128, activation='relu', input_shape=(X.shape[1],)),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(len(classes), activation='softmax')  # Output layer for classification
])

    plot_model(model, to_file='vggish.png', show_shapes=True, show_layer_names=True)

    # Compile the model
    model.compile(optimizer=Adam(learning_rate=0.0005),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])


    # Callbacks for early stopping & learning rate decay
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)

    # Train the model
    history=model.fit(X_train, y_train,
              epochs=50,
              batch_size=32,
              validation_data=(X_test, y_test),
              callbacks=[early_stopping, reduce_lr], verbose=0)
    y_pred = np.argmax(model.predict(X_test), axis=1)
    y_true = np.argmax(y_test, axis=1)

    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    f1 = f1_score(y_true, y_pred, average='weighted')
    print(f"Fold {fold} Weighted F1-score: {f1:.4f}")
    fold_accuracies.append(accuracy)
    fold_precisions.append(precision)
    fold_recalls.append(recall)
    fold_f1s.append(f1)

    # Save best fold
    if f1 > best_f1:
        best_f1 = f1
        best_model = model
        best_history = history
        best_y_pred = y_pred
        best_y_true = y_true

    fold += 1

# === After all folds === #
print("\n Cross-validation complete.")
print("=== Average Metrics Across Folds ===")
print(f"Accuracy: {np.mean(fold_accuracies):.4f}")
print(f"Precision: {np.mean(fold_precisions):.4f}")
print(f"Recall: {np.mean(fold_recalls):.4f}")
print(f"F1 Score: {np.mean(fold_f1s):.4f}")

# === Best Fold Results === #
print("\n Classification Report (Best Fold):")
print(classification_report(best_y_true, best_y_pred, target_names=classes))

print(" Confusion Matrix (Best Fold):")
cm = confusion_matrix(best_y_true, best_y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=classes, yticklabels=classes)
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix – Best Fold")
plt.show()


In [None]:
model.save('vggish_model.keras')

In [None]:
plot_training_history(history)

In [None]:
model.save('audio_classification_model.keras')

**ATTEMPT**

In [None]:
# Load the trained model
from tensorflow.keras.models import load_model
model_path = "/content/audio_classification_model.keras"  # Path where the model is saved
model.save(model_path)  # Save the trained model
model = load_model(model_path)

# Function to classify a new audio file
def classify_audio(file_path, model):
    # Extract VGGish features from the new audio file
    embedding = extract_vggish_features(file_path)

    # Average embeddings to match training input shape
    embedding = np.mean(embedding, axis=0).reshape(1, -1)  # Reshape for model input

    # Make a prediction
    prediction_probs = model.predict(embedding)  # Get class probabilities
    predicted_class = np.argmax(prediction_probs)  # Get predicted class index

    # Print results
    print(f"Predicted Class: {classes[predicted_class]}")
    print(f"Class Probabilities: {prediction_probs}")

    return predicted_class, prediction_probs

# Test on a new unseen audio file
test_audio_file = "/content/c0002.wav"  # Replace with actual file path
predicted_class, prediction_probs = classify_audio(test_audio_file, model)


In [None]:
import librosa
import numpy as np
import tensorflow as tf
import librosa.display
import matplotlib.pyplot as plt
import os

# Function to extract Mel spectrogram features
def extract_mel_spectrogram(file_path, sr=16000, n_mels=64, hop_length=512):
    # Load audio file and resample to 16kHz
    audio_data, _ = librosa.load(file_path, sr=sr)

    # Compute Mel spectrogram
    mel_spectrogram = librosa.feature.melspectrogram(y=audio_data, sr=sr, n_mels=n_mels, hop_length=hop_length)

    # Convert to log scale (better for CNNs)
    mel_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)

    return mel_spectrogram

# Example Usage:
file_path = "/content/c0002.wav"
mel_spec = extract_mel_spectrogram(file_path)

# Display Mel Spectrogram
plt.figure(figsize=(10, 4))
librosa.display.specshow(mel_spec, sr=16000, hop_length=512, cmap='viridis')
plt.colorbar(label='dB')
plt.title("Mel Spectrogram for Abnormal")
plt.show()

# Example Usage:
file_path = "/content/c0003.wav"
mel_spec = extract_mel_spectrogram(file_path)

# Display Mel Spectrogram
plt.figure(figsize=(10, 4))
librosa.display.specshow(mel_spec, sr=16000, hop_length=512, cmap='viridis')
plt.colorbar(label='dB')
plt.title("Mel Spectrogram for Normal")
plt.show()

