# The Last Frequency: Final Winning Ensemble

This notebook combines 15 models (5 x ResNet-18, 5 x ResNet-34, 5 x EfficientNet-B0) to achieve the highest possible accuracy on the leaderboard.

### Strategy:
1. **Load weights** for all folds of all three architectures.
2. **Predict Probabilities**: Calculate Softmax probabilities for each model on the test sets.
3. **Weighted Averaging**: Combine the probabilities (Simple Mean or Weighted Mean).
4. **Final Decision**: Take the `argmax` of the averaged probabilities.

In [None]:
import os, json, numpy as np, pandas as pd, torch, torch.nn as nn, torch.nn.functional as F, torchaudio, torchvision.models as models
from tqdm.auto import tqdm
from torch.utils.data import DataLoader, Dataset

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
MODEL_PATH = '/kaggle/input/the-last-frequency-models'
DATA_DIR = '/kaggle/input/the-last-frequency'

class CFG:
    sample_rate, n_fft, hop_length, n_mels, target_frames = 16000, 1024, 256, 128, 64
    batch_size = 128
    num_classes = 35

with open(f'{DATA_DIR}/label_map.json') as f: 
    label_map = {int(k): v for k, v in json.load(f).items()}

class SpecTransform(nn.Module):
    def __init__(self):
        super().__init__()
        self.mel_spec = torchaudio.transforms.MelSpectrogram(sample_rate=CFG.sample_rate, n_fft=CFG.n_fft, hop_length=CFG.hop_length, n_mels=CFG.n_mels)
        self.amp_to_db = torchaudio.transforms.AmplitudeToDB()
    def forward(self, x):
        x = self.amp_to_db(self.mel_spec(x))
        if x.shape[-1] > CFG.target_frames: x = x[..., :CFG.target_frames]
        elif x.shape[-1] < CFG.target_frames: x = F.pad(x, (0, CFG.target_frames - x.shape[-1]))
        return x.unsqueeze(1)

class AudioResNet(nn.Module):
    def __init__(self, arch='resnet18'):
        super().__init__()
        if arch == 'resnet18': model = models.resnet18(weights=None)
        else: model = models.resnet34(weights=None)
        model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        model.fc = nn.Linear(model.fc.in_features, CFG.num_classes)
        self.backbone, self.spec_layer = model, SpecTransform()
    def forward(self, x): return self.backbone(self.spec_layer(x))

class AudioEffNet(nn.Module):
    def __init__(self):
        super().__init__()
        model = models.efficientnet_b0(weights=None)
        old_conv = model.features[0][0]
        model.features[0][0] = nn.Conv2d(1, old_conv.out_channels, kernel_size=old_conv.kernel_size, stride=old_conv.stride, padding=old_conv.padding, bias=False)
        model.classifier[1] = nn.Linear(model.classifier[1].in_features, CFG.num_classes)
        self.backbone, self.spec_layer = model, SpecTransform()
    def forward(self, x): return self.backbone(self.spec_layer(x))

class TestDataset(Dataset):
    def __init__(self, waveforms): self.waveforms = waveforms
    def __len__(self): return len(self.waveforms)
    def __getitem__(self, idx): return torch.from_numpy(self.waveforms[idx]).float()

### Inference Loop

In [None]:
def get_probs(waveforms):
    loader = DataLoader(TestDataset(waveforms), batch_size=CFG.batch_size, shuffle=False)
    all_probs = []
    
    # Architecture config: (class, weight, prefix)
    models_to_run = [
        (AudioResNet, 1.0, 'best_model_fold'),      # ResNet-18
        (lambda: AudioResNet(arch='resnet34'), 1.2, 'resnet34_fold'), # ResNet-34 (higher weight)
        (AudioEffNet, 1.1, 'effnet_fold')           # EffNet-B0
    ]
    
    for model_class, weight, prefix in models_to_run:
        for fold in range(5):
            path = f'{MODEL_PATH}/{prefix}_{fold}.pth'
            if not os.path.exists(path):
                print(f"Warning: {path} not found, skipping.")
                continue
            
            print(f"Predicting with {prefix} Fold {fold} (Weight: {weight})...")
            model = model_class().to(device)
            model.load_state_dict(torch.load(path, map_location=device))
            model.eval()
            
            probs = []
            with torch.no_grad():
                for x in tqdm(loader):
                    out = model(x.to(device))
                    probs.append(F.softmax(out, dim=1).cpu().numpy() * weight)
            
            all_probs.append(np.concatenate(probs))
            
    return np.sum(all_probs, axis=0)

print("Loading test data...")
pub = np.load(f'{DATA_DIR}/public_test_waveforms.npy')
priv = np.load(f'{DATA_DIR}/private_test_waveforms.npy')

print("Starting Grand Ensemble Inference...")
final_pub_probs = get_probs(pub)
final_priv_probs = get_probs(priv)

final_indices = np.concatenate([final_pub_probs.argmax(1), final_priv_probs.argmax(1)])
final_cmds = [label_map[idx] for idx in final_indices]

submission = pd.DataFrame({'Id': range(len(final_cmds)), 'Command': final_cmds})
submission.to_csv('submission.csv', index=False)
print("\nFinal submission.csv saved!")