# The Last Frequency: Final Winning Ensemble (1-Channel Fix)

Этот блокнот настроен на работу с твоими текущими 1-канальными весами. 
Если ты переобучишь модели с Delta-признаками, нужно будет вернуть 3 канала.

In [None]:
import os, json, numpy as np, pandas as pd, torch, torch.nn as nn, torch.nn.functional as F, torchaudio, torchvision.models as models
from tqdm.auto import tqdm
from torch.utils.data import DataLoader, Dataset

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
MODEL_PATH = '/kaggle/input/the-last-frequency-models'
DATA_DIR = '/kaggle/input/the-last-frequency'

class CFG:
    sample_rate, n_fft, hop_length, n_mels, target_frames = 16000, 1024, 256, 128, 64
    batch_size = 128
    num_classes = 35
    in_channels = 1 # МЕНЯЕМ НА 1, чтобы совпало с твоими весами

with open(f'{DATA_DIR}/label_map.json') as f: 
    label_map = {int(k): v for k, v in json.load(f).items()}

class SpecTransform(nn.Module):
    def __init__(self):
        super().__init__()
        self.mel_spec = torchaudio.transforms.MelSpectrogram(sample_rate=CFG.sample_rate, n_fft=CFG.n_fft, hop_length=CFG.hop_length, n_mels=CFG.n_mels)
        self.amp_to_db = torchaudio.transforms.AmplitudeToDB()
    def forward(self, x):
        spec = self.amp_to_db(self.mel_spec(x))
        if spec.shape[-1] > CFG.target_frames: spec = spec[..., :CFG.target_frames]
        elif spec.shape[-1] < CFG.target_frames: spec = F.pad(spec, (0, CFG.target_frames - spec.shape[-1]))
        return spec.unsqueeze(1) # Возвращаем (B, 1, F, T)

class AudioResNet(nn.Module):
    def __init__(self, arch='resnet18'):
        super().__init__()
        if arch == 'resnet18': model = models.resnet18(weights=None)
        else: model = models.resnet34(weights=None)
        
        # ФИКС: Устанавливаем 1 входной канал для conv1, чтобы совпало с весами [64, 1, 7, 7]
        model.conv1 = nn.Conv2d(CFG.in_channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
        
        model.fc = nn.Sequential(
            nn.Dropout(0.3),
            nn.Linear(model.fc.in_features, CFG.num_classes)
        )
        self.backbone, self.spec_layer = model, SpecTransform()
    def forward(self, x): return self.backbone(self.spec_layer(x))

class AudioEffNet(nn.Module):
    def __init__(self):
        super().__init__()
        model = models.efficientnet_b0(weights=None)
        # ФИКС: 1 канал для EffNet
        old_conv = model.features[0][0]
        model.features[0][0] = nn.Conv2d(CFG.in_channels, old_conv.out_channels, 3, stride=2, padding=1, bias=False)
        
        model.classifier[1] = nn.Sequential(
            nn.Dropout(0.3),
            nn.Linear(model.classifier[1].in_features, CFG.num_classes)
        )
        self.backbone, self.spec_layer = model, SpecTransform()
    def forward(self, x): return self.backbone(self.spec_layer(x))

class TestDataset(Dataset):
    def __init__(self, waveforms): self.waveforms = waveforms
    def __len__(self): return len(self.waveforms)
    def __getitem__(self, idx): return torch.from_numpy(self.waveforms[idx]).float()

In [None]:
def get_probs(waveforms):
    loader = DataLoader(TestDataset(waveforms), batch_size=CFG.batch_size, shuffle=False)
    all_probs = []
    
    models_to_run = [
        (lambda: AudioResNet(arch='resnet18'), 1.0, 'best_model_fold'), 
        (lambda: AudioResNet(arch='resnet34'), 1.2, 'resnet34_fold'), 
        (AudioEffNet, 1.1, 'effnet_fold')
    ]
    
    for model_factory, weight, prefix in models_to_run:
        for fold in range(5):
            path = f'{MODEL_PATH}/{prefix}_{fold}.pth'
            if not os.path.exists(path): continue
            
            print(f"Predicting with {prefix}_{fold}...")
            model = model_factory().to(device)
            model.load_state_dict(torch.load(path, map_location=device))
            model.eval()
            
            probs = []
            with torch.no_grad():
                for x in tqdm(loader, leave=False): 
                    out = model(x.to(device))
                    probs.append(F.softmax(out, dim=1).cpu().numpy() * weight)
            all_probs.append(np.concatenate(probs))
            
    return np.sum(all_probs, axis=0)

print("Loading test data...")
pub = np.load(f'{DATA_DIR}/public_test_waveforms.npy')
priv = np.load(f'{DATA_DIR}/private_test_waveforms.npy')

print("Starting inference...")
final_pub_probs = get_probs(pub)
final_priv_probs = get_probs(priv)

final_indices = np.concatenate([final_pub_probs.argmax(1), final_priv_probs.argmax(1)])
final_cmds = [label_map[idx] for idx in final_indices]

pd.DataFrame({'Id': range(len(final_cmds)), 'Command': final_cmds}).to_csv('submission.csv', index=False)
print("Done! Fixed submission.csv ready.")