In [1]:
!pip install pandas numpy librosa tensorflow scikit-learn



In [3]:
import os
import pandas as pd
import numpy as np
import librosa
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, BatchNormalization, ReLU, Dropout, Dense, Lambda
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [4]:
# Define dataset paths (Modify this based on the actual location of your dataset)
dataset_path = r"C:/Users/avixa/Desktop/Madhav Lab"
train_metadata_path = os.path.join(dataset_path, "metadata_train.csv")
test_metadata_path = os.path.join(dataset_path, "metadata_test.csv")
train_folder = os.path.join(dataset_path, "train")
test_folder = os.path.join(dataset_path, "test")

# Load metadata
train_metadata = pd.read_csv(train_metadata_path)
test_metadata = pd.read_csv(test_metadata_path)

# Audio parameters
SAMPLE_RATE = 8000
AUDIO_LENGTH = 4 * SAMPLE_RATE  # 4 seconds * 8000 Hz
NUM_CLASSES = train_metadata['Class ID'].nunique()

In [5]:
def load_train_audio(file_name):
    """Load and preprocess audio file"""
    file_path = os.path.join(train_folder, file_name)
    audio_signal, _ = librosa.load(file_path, sr=SAMPLE_RATE)
    
    # Zero-pad if shorter than required length
    if len(audio_signal) < AUDIO_LENGTH:
        pad_length = AUDIO_LENGTH - len(audio_signal)
        audio_signal = np.pad(audio_signal, (0, pad_length))
    else:
        audio_signal = audio_signal[:AUDIO_LENGTH]  # Truncate if longer
    
    return audio_signal.reshape(-1, 1)  # Reshape for Conv1D

def load_test_audio(file_name):
    """Load and preprocess audio file"""
    file_path = os.path.join(test_folder, file_name)
    audio_signal, _ = librosa.load(file_path, sr=SAMPLE_RATE)
    
    # Zero-pad if shorter than required length
    if len(audio_signal) < AUDIO_LENGTH:
        pad_length = AUDIO_LENGTH - len(audio_signal)
        audio_signal = np.pad(audio_signal, (0, pad_length))
    else:
        audio_signal = audio_signal[:AUDIO_LENGTH]  # Truncate if longer
    
    return audio_signal.reshape(-1, 1)  # Reshape for Conv1D


# Prepare dataset
def process_train_data(meta_data):
    features, labels = [], []
    for _, row in meta_data.iterrows():
        audio_sample = load_train_audio(row['Filename'])
        features.append(audio_sample)
        labels.append(row['Class ID'])
    return np.array(features), np.array(labels)

def process_test_data(meta_data):
    features, labels = [], []
    for _, row in meta_data.iterrows():
        audio_sample = load_test_audio(row['Filename'])
        features.append(audio_sample)
        labels.append(row['Class_id'])
    return np.array(features), np.array(labels)

X_train, y_train = process_train_data(train_metadata)
X_test, y_test = process_test_data(test_metadata)

In [6]:
# Train-validation split (80% train, 20% validation)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.20, random_state=42, stratify=y_train)

In [7]:
# Define the model
def build_model():
    model = Sequential([
        Conv1D(64, kernel_size=80, strides=4, padding='same', activation=None, input_shape=(AUDIO_LENGTH, 1)),
        BatchNormalization(), ReLU(),
        MaxPooling1D(pool_size=4), Dropout(0.1),

        Conv1D(64, kernel_size=3, strides=1, padding='same', activation=None),
        BatchNormalization(), ReLU(),
        Conv1D(64, kernel_size=3, strides=1, padding='same', activation=None),
        BatchNormalization(), ReLU(),
        MaxPooling1D(pool_size=4), Dropout(0.1),

        Conv1D(128, kernel_size=3, strides=1, padding='same', activation=None),
        BatchNormalization(), ReLU(),
        Conv1D(128, kernel_size=3, strides=1, padding='same', activation=None),
        BatchNormalization(), ReLU(),
        MaxPooling1D(pool_size=4), Dropout(0.1),

        Conv1D(256, kernel_size=3, strides=1, padding='same', activation=None),
        BatchNormalization(), ReLU(),
        Conv1D(256, kernel_size=3, strides=1, padding='same', activation=None),
        BatchNormalization(), ReLU(),
        Conv1D(256, kernel_size=3, strides=1, padding='same', activation=None),
        BatchNormalization(), ReLU(),
        MaxPooling1D(pool_size=4), Dropout(0.1),

        Conv1D(512, kernel_size=3, strides=1, padding='same', activation=None),
        BatchNormalization(), ReLU(),
        Conv1D(512, kernel_size=3, strides=1, padding='same', activation=None),
        BatchNormalization(), ReLU(),
        Dropout(0.1),

        Conv1D(512, kernel_size=3, strides=1, padding='same', activation=None),
        BatchNormalization(), ReLU(),
        Lambda(lambda x: tf.reduce_mean(x, axis=1)),  # Global average pooling
        Dropout(0.2),

        Dense(NUM_CLASSES, activation='softmax')
    ])
    return model

In [8]:
# Compile model
model = build_model()
model.compile(optimizer=SGD(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Callback to save the best model based on validation loss
checkpoint = ModelCheckpoint("best_model.h5", monitor="val_loss", save_best_only=True, mode="min", verbose=1)

# Train model
history = model.fit(X_train, y_train, epochs=500, batch_size=32, validation_data=(X_val, y_val), verbose=1, callbacks=[checkpoint])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



Epoch 1/500


KeyboardInterrupt: 

In [9]:
# Load best model
model = tf.keras.models.load_model("best_model.h5")
model.load_weights("best_model.h5")

# Function to evaluate model performance
def evaluate_model(model, X, y, dataset_name="Dataset"):
    y_pred = np.argmax(model.predict(X), axis=1)
    print(f"\nClassification Report for {dataset_name}:")
    print(classification_report(y, y_pred, digits=4))

# Report metrics
evaluate_model(model, X_train, y_train, "Training Set")
evaluate_model(model, X_val, y_val, "Validation Set")
evaluate_model(model, X_test, y_test, "Test Set")

ValueError: Layer count mismatch when loading weights from file. Model expected 22 layers, found 23 saved layers.

In [10]:
# Load the trained model
model = tf.keras.models.load_model("best_model.h5")

# Load your test dataset (X_test: features, y_test: true labels)

# Get model predictions
y_pred_probs = model.predict(X_test)

# Convert probabilities to class labels
y_pred = np.argmax(y_pred_probs, axis=1)  # If multi-class
y_true = np.argmax(y_test, axis=1) if y_test.ndim > 1 else y_test  # Convert one-hot labels if needed

# Compute metrics
print("Accuracy:", accuracy_score(y_true, y_pred))
print("Classification Report:\n", classification_report(y_true, y_pred))

# Compute F1-score
f1 = f1_score(y_true, y_pred, average="weighted")  # Can also use "macro", "micro", or "binary"
print("F1 Score:", f1)

ValueError: Layer count mismatch when loading weights from file. Model expected 22 layers, found 23 saved layers.