In [18]:


import torch
import torchaudio
from torchvision.models import resnet34


import numpy as np


def load_sample(path):

    SAMPLE_RATE = 48000
    N_FFT = SAMPLE_RATE * 64 // 1000 + 4
    HOP_LENGTH = SAMPLE_RATE * 16 // 1000 + 4
    
    signal, _ = torchaudio.load(path)
    signal = prepare_sample(signal)

    spec = torch.stft(
        input=signal,
        n_fft=N_FFT,
        hop_length=HOP_LENGTH,
        normalized=True
    )

    real = spec[..., 0]
    img = spec[..., 1]
    spec = torch.cat([real, img], dim=0)

    return spec

def prepare_sample(waveform):

    waveform = waveform.numpy()
    current_len = waveform.shape[1]

    output = np.zeros((1, 165000), dtype='float32')
    output[0, -current_len:] = waveform[0, :165000]
    output = torch.from_numpy(output)

    return output

def main():
    
    model = resnet34()
    model.conv1 = torch.nn.Conv2d(2, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    model.fc = torch.nn.Linear(in_features=512, out_features=1, bias=True)
    model = model.cpu()

    model.load_state_dict(torch.load('/content/drive/MyDrive/weigths_final.pth', map_location=torch.device('cpu')), strict=False)

    path = input('Введите путь до вашего файла: ')
    spec = load_sample(path)
    spec = torch.unsqueeze(spec, 0)

    model.eval()
    with torch.no_grad():
        preds = model(spec)
    
    preds = np.around(preds[0])[0].item()
    
    print('Шума нет' if preds == 0 else 'Шум есть')

if __name__ == '__main__':
    main()

Введите путь до вашего файла: /content/00041967-8eec-4faa-9997-dff2b35a5f7b.wav
Шума нет
