# Step 1: Pseudo-Labeling Generator (ResNet-18 Only)

Этот блокнот использует только ResNet-18 (5 фолдов) для генерации меток для уверенных сэмплов из теста.
Настроен на 1-канальный вход для совместимости с твоими текущими весами.

In [None]:
import os, json, numpy as np, pandas as pd, torch, torch.nn as nn, torch.nn.functional as F, torchaudio, torchvision.models as models
from tqdm.auto import tqdm
from torch.utils.data import DataLoader, Dataset

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
MODEL_PATH = '/kaggle/input/the-last-frequency-models'
DATA_DIR = '/kaggle/input/the-last-frequency'
THRESHOLD = 0.98  # Порог уверенности

class CFG:
    sample_rate, n_fft, hop_length, n_mels, target_frames = 16000, 1024, 256, 128, 64
    batch_size, num_classes = 128, 35

class SpecTransform(nn.Module):
    def __init__(self):
        super().__init__()
        self.mel_spec = torchaudio.transforms.MelSpectrogram(sample_rate=CFG.sample_rate, n_fft=CFG.n_fft, hop_length=CFG.hop_length, n_mels=CFG.n_mels)
        self.amp_to_db = torchaudio.transforms.AmplitudeToDB()
    def forward(self, x):
        x = self.amp_to_db(self.mel_spec(x))
        if x.shape[-1] > CFG.target_frames: x = x[..., :CFG.target_frames]
        elif x.shape[-1] < CFG.target_frames: x = F.pad(x, (0, CFG.target_frames - x.shape[-1]))
        return x.unsqueeze(1)

class AudioResNet(nn.Module):
    def __init__(self):
        super().__init__()
        model = models.resnet18(weights=None)
        model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        model.fc = nn.Sequential(
            nn.Dropout(0.3),
            nn.Linear(model.fc.in_features, CFG.num_classes)
        )
        self.backbone, self.spec_layer = model, SpecTransform()
    def forward(self, x): return self.backbone(self.spec_layer(x))

class TestDataset(Dataset):
    def __init__(self, waveforms): self.waveforms = waveforms
    def __len__(self): return len(self.waveforms)
    def __getitem__(self, idx): return torch.from_numpy(self.waveforms[idx]).float()

def get_resnet_probs(waveforms):
    loader = DataLoader(TestDataset(waveforms), batch_size=CFG.batch_size, shuffle=False)
    all_probs = []
    prefix = 'best_model_fold'
    for fold in range(5):
        path = f'{MODEL_PATH}/{prefix}_{fold}.pth'
        if not os.path.exists(path): continue
        print(f"Loading ResNet-18 Fold {fold}...")
        m = AudioResNet().to(device)
        m.load_state_dict(torch.load(path, map_location=device))
        m.eval()
        probs = []
        with torch.no_grad():
            for x in tqdm(loader, leave=False): 
                probs.append(F.softmax(m(x.to(device)), dim=1).cpu().numpy())
        all_probs.append(np.concatenate(probs))
    return np.mean(all_probs, axis=0)

print("Predicting probabilities for test set (ResNet-18 Only)... ")
pub, priv = np.load(f'{DATA_DIR}/public_test_waveforms.npy'), np.load(f'{DATA_DIR}/private_test_waveforms.npy')
test_waveforms = np.concatenate([pub, priv])
test_probs = get_resnet_probs(test_waveforms)

confidences = np.max(test_probs, axis=1)
pseudo_labels = np.argmax(test_probs, axis=1)
mask = confidences > THRESHOLD

print(f"Found {np.sum(mask)} high-confidence samples out of {len(test_waveforms)}")

train_wavs = np.load(f'{DATA_DIR}/train_waveforms.npy')
train_labels = np.load(f'{DATA_DIR}/train_labels.npy')

expanded_wavs = np.concatenate([train_wavs, test_waveforms[mask]])
expanded_labels = np.concatenate([train_labels, pseudo_labels[mask]])

np.save('expanded_train_waveforms.npy', expanded_wavs)
np.save('expanded_train_labels.npy', expanded_labels)
print("Expanded dataset saved as expanded_train_waveforms.npy and expanded_train_labels.npy")