<a href="https://colab.research.google.com/github/MayankKhoria2007/Decibel-Duel-solution/blob/main/Audioclassification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import files
uploaded = files.upload()


In [None]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json


In [None]:
!kaggle datasets download -d mayankkhoria/frequencytrain
!kaggle datasets download -d mayankkhoria/frequencytest


In [None]:
!unzip /content/frequencytrain.zip -d /content/frequencytrain
!unzip /content/frequencytrain.zip -d /content/frequencytest



In [None]:
# =============================================================
# Produces submission.csv (predictions only) + Validation Accuracy
# ============================================================

import os
import math
import random
import warnings
from glob import glob
from tqdm import tqdm

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

import torchaudio
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

warnings.filterwarnings("ignore")
torch.manual_seed(42)
np.random.seed(42)
random.seed(42)

# =============================================================
# Dataset paths
# =============================================================

TRAIN_DIR = "/content/frequencytrain/train"
TEST_DIR  = "/content/frequencytest/train"

# =============================================================
# Audio settings and hyperparameters
# =============================================================
SAMPLE_RATE = 22050
DURATION = 2.5
NUM_SAMPLES = int(SAMPLE_RATE * DURATION)
N_MELS = 128
N_FFT = 2048
HOP_LENGTH = 512

BATCH_SIZE = 32
EPOCHS = 30
LEARNING_RATE = 1e-3
DEVICE = torch.device("cuda")
NUM_WORKERS = 0

# =============================================================
# Helper functions
# =============================================================
def load_audio(path, sr=SAMPLE_RATE, duration=DURATION):
    waveform, orig_sr = torchaudio.load(path)
    if orig_sr != sr:
        waveform = torchaudio.functional.resample(waveform, orig_freq=orig_sr, new_freq=sr)
    waveform = waveform.mean(dim=0, keepdim=True)
    samples = waveform.shape[-1]
    target = int(sr * duration)
    if samples > target:
        waveform = waveform[..., :target]
    elif samples < target:
        pad_amt = target - samples
        waveform = torch.nn.functional.pad(waveform, (0, pad_amt))
    return waveform

# =============================================================
# Dataset class
# =============================================================
class AudioDatasetTorch(Dataset):
    def __init__(self, file_label_list, encoder=None, is_train=True,
                 n_mels=N_MELS, n_fft=N_FFT, hop_length=HOP_LENGTH, sr=SAMPLE_RATE):
        self.items = file_label_list
        self.encoder = encoder
        self.is_train = is_train
        self.mel_transform = torchaudio.transforms.MelSpectrogram(
            sample_rate=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels
        )
        self.db_transform = torchaudio.transforms.AmplitudeToDB()
        self.freq_mask = torchaudio.transforms.FrequencyMasking(freq_mask_param=18)
        self.time_mask = torchaudio.transforms.TimeMasking(time_mask_param=25)

    def __len__(self):
        return len(self.items)

    def __getitem__(self, idx):
        path, label = self.items[idx]
        wav = load_audio(path)
        mel = self.mel_transform(wav)
        mel_db = self.db_transform(mel)
        mel_db = (mel_db - mel_db.mean()) / (mel_db.std() + 1e-6)
        if self.is_train:
            if random.random() < 0.5:
                mel_db = self.freq_mask(mel_db)
            if random.random() < 0.5:
                mel_db = self.time_mask(mel_db)
        if label is None:
            return mel_db, os.path.basename(path)
        else:
            y = int(self.encoder.transform([label])[0])
            return mel_db, y

# =============================================================
# Data preparation
# =============================================================
labels = sorted([d for d in os.listdir(TRAIN_DIR) if os.path.isdir(os.path.join(TRAIN_DIR, d))])
print("Labels:", labels)
encoder = LabelEncoder()
encoder.fit(labels)

train_files = []
for lab in labels:
    folder = os.path.join(TRAIN_DIR, lab)
    for f in os.listdir(folder):
        if f.lower().endswith((".wav", ".flac", ".mp3")):
            train_files.append((os.path.join(folder, f), lab))

train_paths, val_paths = train_test_split(
    train_files, test_size=0.15, stratify=[l for (_, l) in train_files], random_state=42
)

test_files = sorted([f for f in os.listdir(TEST_DIR) if f.lower().endswith((".wav", ".flac", ".mp3"))])
test_items = [(os.path.join(TEST_DIR, f), None) for f in test_files]

print(f"Train samples: {len(train_paths)}, Val samples: {len(val_paths)}, Test samples: {len(test_items)}")

train_dataset = AudioDatasetTorch(train_paths, encoder=encoder, is_train=True)
val_dataset   = AudioDatasetTorch(val_paths, encoder=encoder, is_train=False)
test_dataset  = AudioDatasetTorch(test_items, encoder=None, is_train=False)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)
val_loader   = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)
test_loader  = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=NUM_WORKERS)

# =============================================================
# Model definition
# =============================================================
class EEGStyleCNN(nn.Module):
    def __init__(self, n_classes):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(32, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(64, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(128, 256, 3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.AdaptiveAvgPool2d((1, 1)),
            nn.Dropout(0.35)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(128, n_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

model = EEGStyleCNN(n_classes=len(labels)).to(DEVICE)
print(model)

# =============================================================
# Training setup
# =============================================================
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=1e-5)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', patience=3, factor=0.5)

best_val_acc = 0.0
patience = 6
stalled = 0

# =============================================================
# Training loop
# =============================================================
for epoch in range(EPOCHS):
    model.train()
    running_loss, correct, total = 0.0, 0, 0
    pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS} [train]", leave=False)
    for inputs, targets in pbar:
        inputs, targets = inputs.to(DEVICE), targets.to(DEVICE)
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        _, preds = outputs.max(1)
        correct += (preds == targets).sum().item()
        total += targets.size(0)

        pbar.set_postfix({"loss": f"{running_loss / total:.4f}", "acc": f"{100*correct/total:.2f}"})

    train_loss = running_loss / total
    train_acc = 100 * correct / total

    # Validation
    model.eval()
    val_correct, val_total, val_loss = 0, 0, 0.0
    with torch.no_grad():
        for inputs, targets in tqdm(val_loader, desc=f"Epoch {epoch+1}/{EPOCHS} [val]", leave=False):
            inputs, targets = inputs.to(DEVICE), targets.to(DEVICE)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            val_loss += loss.item() * inputs.size(0)
            _, preds = outputs.max(1)
            val_correct += (preds == targets).sum().item()
            val_total += targets.size(0)

    val_loss = val_loss / val_total
    val_acc = 100 * val_correct / val_total

    print(f"Epoch {epoch+1:02d} -> Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}% "
          f"|| Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.2f}%")

    scheduler.step(val_acc)

    if val_acc > best_val_acc + 1e-4:
        best_val_acc = val_acc
        stalled = 0
    else:
        stalled += 1
        if stalled >= patience:
            print(f"Early stopping: no improvement in val acc for {patience} epochs.")
            break

# =============================================================
# Final validation accuracy
# =============================================================
print("\n==========================")
print(f"‚úÖ Final Validation Accuracy: {val_acc:.2f}%")
print(f"üèÜ Best Validation Accuracy:  {best_val_acc:.2f}%")
print("==========================\n")

# =============================================================
# Prediction on test data
# =============================================================
model.eval()
filenames, preds_labels = [], []

with torch.no_grad():
    for mel_db, fname in tqdm(test_loader, desc="Predicting on test set"):
        mel_db = mel_db.to(DEVICE)
        outputs = model(mel_db)
        _, pred = outputs.max(1)
        label = encoder.inverse_transform(pred.cpu().numpy())[0]
        filenames.append(fname[0])
        preds_labels.append(label)

submission = pd.DataFrame({"ID": filenames, "Class": preds_labels})
out_path = "/content/submission.csv"
submission.to_csv(out_path, index=False)
print(f"‚úÖ submission.csv created at: {out_path}")
