<a href="https://colab.research.google.com/github/Apples-17/Decibel_duel_PS/blob/main/Deciduel_final_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os
print("Listing everything under MyDrive:\n")
print(os.listdir('/content/drive/MyDrive'))

In [None]:
base_dir = '/content/drive/MyDrive/Decibel Duel'
train_dir = f'{base_dir}/train/train'
test_dir = f'{base_dir}/test/test'
out_submission=f'{base_dir}/submission.csv'

In [None]:
import os

print("Test folder exists:", os.path.exists(test_dir))
print("Number of test files:", len(os.listdir(test_dir)))

# Show a few filenames
print("Sample files:", os.listdir(test_dir)[:5])

In [None]:
import matplotlib.pyplot as plt
import IPython.display as ipd
import librosa
import librosa.display
import pandas as pd
import os
import numpy as np
from glob import glob
import random
from tqdm import tqdm
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
import pandas as pd

In [None]:
audio_file_path=f'{train_dir}/dog_bark/344-3-1-0.wav'

In [None]:
plt.figure(figsize=(12,4))
librosa_audio_data,librosa_audio_sample_rate=librosa.load(audio_file_path)
librosa.display.waveshow(librosa_audio_data, sr=librosa_audio_sample_rate)
ipd.Audio(audio_file_path)

In [None]:
librosa_audio_sample_rate

In [None]:
librosa_audio_data

In [None]:
from scipy.io import wavfile as wav
wave_sample_rate, wave_audio=wav.read(audio_file_path)
wave_audio

In [None]:
plt.figure(figsize=(12,4))
plt.plot(wave_audio)

In [None]:
### MFCC-Mel Frequency Cepstral Coefficients
mfccs=librosa.feature.mfcc(y=librosa_audio_data, sr=librosa_audio_sample_rate)
mfccs.shape

In [None]:
mfccs

In [None]:
audio_dataset_path=f'{train_dir}/'

In [None]:
def features_extractor(file_name):
  audio, sample_rate=librosa.load(file_name, res_type='kaiser_fast')
  mfccs_features=librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
  mfccs_scaled_features=np.mean(mfccs_features.T, axis=0)
  return mfccs_scaled_features

In [None]:
!pip install resampy

In [None]:
import resampy

In [None]:
from tqdm import tqdm

extracted_features = []
for class_label in os.listdir(train_dir):
    class_path = os.path.join(train_dir, class_label)
    if not os.path.isdir(class_path):
        continue

    for file_name in tqdm(os.listdir(class_path), desc=f"Processing {class_label}"):
        file_path = os.path.join(class_path, file_name)

        try:
            data = features_extractor(file_path)
            extracted_features.append([data, class_label])
        except Exception as e:
            print(f"Error with file {file_name}: {e}")

In [None]:
extracted_features_df=pd.DataFrame(extracted_features, columns=['features', 'class'])
extracted_features_df.head()

In [None]:
X=np.array(extracted_features_df['features'].tolist())
y=np.array(extracted_features_df['class'].tolist())

In [None]:
X.shape

In [None]:
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
labelencoder=LabelEncoder()
y=to_categorical(labelencoder.fit_transform(y))

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test=train_test_split(X, y, test_size=0.2, random_state=0)

In [None]:
X_train.shape

In [None]:
X_test.shape

In [None]:
y_train.shape

In [None]:
y_test.shape

In [None]:
X_train

In [None]:
y_train

In [None]:
def extract_enhanced_melspec(file_path, sr=22050, duration=5, n_mels=128, augment=False):
    try:
        y, sr = librosa.load(file_path, sr=sr, duration=duration)

        if augment and np.random.random() > 0.5:
            if np.random.random() > 0.5:
                rate = np.random.uniform(0.9, 1.1)
                y = librosa.effects.time_stretch(y, rate=rate)

            if np.random.random() > 0.5:
                n_steps = np.random.randint(-2, 3)
                y = librosa.effects.pitch_shift(y, sr=sr, n_steps=n_steps)

        target_length = sr * duration
        if len(y) < target_length:
            y = np.pad(y, (0, target_length - len(y)), mode='constant')
        else:
            y = y[:target_length]

        mel_spec = librosa.feature.melspectrogram(
            y=y, sr=sr, n_mels=n_mels, fmax=8000, n_fft=2048, hop_length=512
        )
        mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)

        mel_spec_db = (mel_spec_db - mel_spec_db.mean()) / (mel_spec_db.std() + 1e-6)

        return mel_spec_db
    except:
        return None

X_train_all = []
y_train_all = []

for class_label in os.listdir(train_dir):
    class_path = os.path.join(train_dir, class_label)
    if not os.path.isdir(class_path):
        continue

    files = [f for f in os.listdir(class_path) if f.endswith(('.wav', '.mp3'))]

    for file_name in tqdm(files, desc=f"Processing {class_label}"):
        file_path = os.path.join(class_path, file_name)

        mel_spec = extract_enhanced_melspec(file_path, augment=False)
        if mel_spec is not None:
            X_train_all.append(mel_spec)
            y_train_all.append(class_label)

        mel_spec_aug = extract_enhanced_melspec(file_path, augment=True)
        if mel_spec_aug is not None:
            X_train_all.append(mel_spec_aug)
            y_train_all.append(class_label)

X_train_all = np.array(X_train_all)
X_train_all = X_train_all[..., np.newaxis]

print(f"\nTotal samples with augmentation: {len(X_train_all)}")

# Encode labels
le = LabelEncoder()
y_encoded = to_categorical(le.fit_transform(y_train_all))

In [None]:
#MODEL
def build_enhanced_model(input_shape, num_classes):
    model = models.Sequential([
        layers.Input(shape=input_shape),

        # 1
        layers.Conv2D(32, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.Conv2D(32, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.2),

        # 2
        layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.2),

        # 3
        layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.3),

        # 4
        layers.Conv2D(256, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.Conv2D(256, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.3),

        # 5
        layers.Conv2D(512, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.GlobalAveragePooling2D(),

        layers.Dense(1024, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
        layers.BatchNormalization(),
        layers.Dropout(0.5),

        layers.Dense(512, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
        layers.BatchNormalization(),
        layers.Dropout(0.5),

        layers.Dense(256, activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(0.4),

        layers.Dense(num_classes, activation='softmax')
    ])

    return model

In [None]:
n_splits = 5
models_list = []
fold_scores = []

skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
y_labels = np.argmax(y_encoded, axis=1)

for fold, (train_idx, val_idx) in enumerate(skf.split(X_train_all, y_labels), 1):
    print(f"\n{'='*70}")
    print(f"FOLD {fold}/{n_splits}")
    print(f"{'='*70}")

    X_tr, X_val = X_train_all[train_idx], X_train_all[val_idx]
    y_tr, y_val = y_encoded[train_idx], y_encoded[val_idx]

    model = build_enhanced_model(X_train_all.shape[1:], y_encoded.shape[1])

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
        loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1),
        metrics=['accuracy']
    )

    early_stop = callbacks.EarlyStopping(
        monitor='val_accuracy',
        patience=20,
        restore_best_weights=True,
        verbose=0
    )

    reduce_lr = callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=7,
        min_lr=1e-7,
        verbose=0
    )

    history = model.fit(
        X_tr, y_tr,
        validation_data=(X_val, y_val),
        epochs=100,
        batch_size=32,
        callbacks=[early_stop, reduce_lr],
        verbose=0
    )

    val_loss, val_acc = model.evaluate(X_val, y_val, verbose=0)
    print(f"Fold {fold} Validation Accuracy: {val_acc:.4f}")

    models_list.append(model)
    fold_scores.append(val_acc)

    model.save(f'{base_dir}/model_fold_{fold}.h5')

print(f"Individual fold accuracies: {[f'{s:.4f}' for s in fold_scores]}")
print(f"Mean CV Accuracy: {np.mean(fold_scores):.4f} (Â±{np.std(fold_scores):.4f})")

In [None]:
#THIS IS JUST A CODE FOR MODEL LOADING. I RAN OUT OF GPU AT THAT TIME, SO I USED IT THE SAVED MODEL. HONESTLY, IDK IF IT'S RIGHT. BUT THAT'S ALL I THINK.

# from tensorflow.keras.models import load_model
# import os
# import numpy as np


# base_dir = '/content/drive/MyDrive/Decibel Duel'
# n_splits = 5

# model_paths = [os.path.join(base_dir, f'model_fold_{i}.h5') for i in range(1, n_splits+1)]
# missing = [p for p in model_paths if not os.path.exists(p)]
# if missing:
#     raise FileNotFoundError(f"Missing model files: {missing}")

# class_names = sorted([d for d in os.listdir(train_dir) if os.path.isdir(os.path.join(train_dir, d))])

# le = LabelEncoder()
# le.fit(class_names)

# models_list = []
# for p in model_paths:
#     print(f"Loading {p} ...")
#     model = load_model(p)
#     models_list.append(model)

# print(f"Loaded {len(models_list)} models. Ready for ensemble + TTA.")


In [None]:
test_files = sorted([f for f in os.listdir(test_dir) if f.endswith(('.wav', '.mp3'))])
predictions = []

print(f"\nProcessing {len(test_files)} test files...")

for file_name in tqdm(test_files):
    file_path = os.path.join(test_dir, file_name)

    all_preds = []

    for model in models_list:
        mel_spec = extract_enhanced_melspec(file_path, augment=False)
        if mel_spec is not None:
            mel_input = mel_spec[np.newaxis, ..., np.newaxis]
            pred = model.predict(mel_input, verbose=0)
            all_preds.append(pred[0])

        for _ in range(2):
            mel_spec_aug = extract_enhanced_melspec(file_path, augment=True)
            if mel_spec_aug is not None:
                mel_input = mel_spec_aug[np.newaxis, ..., np.newaxis]
                pred = model.predict(mel_input, verbose=0)
                all_preds.append(pred[0])

    if len(all_preds) > 0:
        avg_pred = np.mean(all_preds, axis=0)
        pred_class_idx = np.argmax(avg_pred)
        pred_class = le.classes_[pred_class_idx]

        predictions.append({
            'ID': file_name,
            'Class': pred_class
        })

# Save submission
submission_df = pd.DataFrame(predictions)
output_file = f'{base_dir}/submission.csv'
submission_df.to_csv(output_file, index=False)