In [1]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
import torch
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
from torch import tensor

import torchaudio
from torch.nn.utils.rnn import pad_sequence
from tqdm import tqdm
import numpy as np

import librosa

In [2]:
# 定义一个自定义数据集类
class AudioDataset(Dataset):
    def __init__(self, file_list):
        self.file_list = file_list
        #self.label_list = label_list

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, idx):
        file_path = self.file_list[idx]
        #label = self.label_list[idx]

        # 使用 Librosa 加载音频文件，并将其转换为梅尔频谱图
        y, sr = librosa.load(file_path, sr=16000)
        mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=1024, hop_length=512, n_mels=80)
        mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
        mel_spec_db = np.expand_dims(mel_spec_db, axis=0)

        # 返回梅尔频谱图
        return mel_spec_db


In [3]:
idx_to_label = "Red-billed Starling,Intermediate Egret,Blue-and-white Flycatcher,Pin-tailed Snipe,Eastern Marsh-Harrier,Manchurian Reed Warbler,Chinese Pond-Heron,Rock Bunting,Isabelline Shrike,Japanese Scops-Owl,Red-backed Shrike,Bronzed Drongo,Claudia's Leaf Warbler,Common Myna,Koklass Pheasant,Barred Warbler,Besra,Pallid Harrier,Tickell's Leaf Warbler,Gray-cheeked Warbler".split(',')

NUM_CLASSES = len(idx_to_label)

label_to_idx = {idx_to_label[i]: i for i in range(NUM_CLASSES)}

test_data_path = 'C:/Users/maple/研究生杂学/声音检测/data/test2_hard'

label_to_idx = {value: key for key, value in label_to_idx.items()}

In [4]:
label_to_idx

{0: 'Red-billed Starling',
 1: 'Intermediate Egret',
 2: 'Blue-and-white Flycatcher',
 3: 'Pin-tailed Snipe',
 4: 'Eastern Marsh-Harrier',
 5: 'Manchurian Reed Warbler',
 6: 'Chinese Pond-Heron',
 7: 'Rock Bunting',
 8: 'Isabelline Shrike',
 9: 'Japanese Scops-Owl',
 10: 'Red-backed Shrike',
 11: 'Bronzed Drongo',
 12: "Claudia's Leaf Warbler",
 13: 'Common Myna',
 14: 'Koklass Pheasant',
 15: 'Barred Warbler',
 16: 'Besra',
 17: 'Pallid Harrier',
 18: "Tickell's Leaf Warbler",
 19: 'Gray-cheeked Warbler'}

In [5]:
test_files=[]
#train_labels =[]
for wav_file in tqdm(os.listdir(test_data_path)):
    test_files.append(test_data_path + f'/{wav_file}')

100%|████████████████████████████████████████████████████████████████████████████████████████| 186/186 [00:00<?, ?it/s]


In [6]:
test_files

['C:/Users/maple/研究生杂学/声音检测/data/test2_hard/TEST1001.ogg',
 'C:/Users/maple/研究生杂学/声音检测/data/test2_hard/TEST1002.ogg',
 'C:/Users/maple/研究生杂学/声音检测/data/test2_hard/TEST1003.ogg',
 'C:/Users/maple/研究生杂学/声音检测/data/test2_hard/TEST1004.ogg',
 'C:/Users/maple/研究生杂学/声音检测/data/test2_hard/TEST1005.ogg',
 'C:/Users/maple/研究生杂学/声音检测/data/test2_hard/TEST1006.ogg',
 'C:/Users/maple/研究生杂学/声音检测/data/test2_hard/TEST1007.ogg',
 'C:/Users/maple/研究生杂学/声音检测/data/test2_hard/TEST1008.ogg',
 'C:/Users/maple/研究生杂学/声音检测/data/test2_hard/TEST1009.ogg',
 'C:/Users/maple/研究生杂学/声音检测/data/test2_hard/TEST1010.ogg',
 'C:/Users/maple/研究生杂学/声音检测/data/test2_hard/TEST1011.ogg',
 'C:/Users/maple/研究生杂学/声音检测/data/test2_hard/TEST1012.ogg',
 'C:/Users/maple/研究生杂学/声音检测/data/test2_hard/TEST1013.ogg',
 'C:/Users/maple/研究生杂学/声音检测/data/test2_hard/TEST1014.ogg',
 'C:/Users/maple/研究生杂学/声音检测/data/test2_hard/TEST1015.ogg',
 'C:/Users/maple/研究生杂学/声音检测/data/test2_hard/TEST1016.ogg',
 'C:/Users/maple/研究生杂学/声音检测/data/test2_hard/TEST1017.ogg

In [7]:
test_dataset = AudioDataset(test_files)
test_loader = DataLoader(test_dataset, shuffle=False)

In [8]:
len(test_loader)

186

In [26]:
model = torch.load('models/ResNet50-30.pt')

In [27]:
model

ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [10]:
import pandas as pd
result = pd.read_csv(r'C:\Users\maple\研究生杂学\声音检测\data\test2_FileList.csv')

In [11]:
result

Unnamed: 0,FileName
0,TEST1001.ogg
1,TEST1002.ogg
2,TEST1003.ogg
3,TEST1004.ogg
4,TEST1005.ogg
...,...
181,TEST1182.ogg
182,TEST1183.ogg
183,TEST1184.ogg
184,TEST1185.ogg


In [12]:
result['Predicted_ID'] = None

In [13]:
result

Unnamed: 0,FileName,Predicted_ID
0,TEST1001.ogg,
1,TEST1002.ogg,
2,TEST1003.ogg,
3,TEST1004.ogg,
4,TEST1005.ogg,
...,...,...
181,TEST1182.ogg,
182,TEST1183.ogg,
183,TEST1184.ogg,
184,TEST1185.ogg,


In [14]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [28]:
model.eval()

ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [29]:
for idx, inputs in enumerate(test_loader):
    inputs = inputs.to(device)
    outputs = model(inputs)
    pred = outputs.argmax(dim=1, keepdim=True)
    pred_int = pred.item()
    result.iloc[idx,1] = pred_int
result

Unnamed: 0,FileName,Predicted_ID
0,TEST1001.ogg,9
1,TEST1002.ogg,7
2,TEST1003.ogg,19
3,TEST1004.ogg,14
4,TEST1005.ogg,8
...,...,...
181,TEST1182.ogg,13
182,TEST1183.ogg,16
183,TEST1184.ogg,17
184,TEST1185.ogg,13


In [30]:
for i in range(186):
    if(result.iloc[i,1]==0):
        result.iloc[i,1]=20
result

Unnamed: 0,FileName,Predicted_ID
0,TEST1001.ogg,9
1,TEST1002.ogg,7
2,TEST1003.ogg,19
3,TEST1004.ogg,14
4,TEST1005.ogg,8
...,...,...
181,TEST1182.ogg,13
182,TEST1183.ogg,16
183,TEST1184.ogg,17
184,TEST1185.ogg,13


In [31]:
result.to_csv('test_hard1.csv', index=False)

In [32]:
model = torch.load('models/ResNetSE-50.pt')
model.eval()

ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (act1): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act1): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (drop_block): Identity()
      (act2): ReLU(inplace=True)
      (aa): Identity()
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
     

In [33]:
for idx, inputs in enumerate(test_loader):
    inputs = inputs.to(device)
    #print(inputs.shape)
    outputs = model(inputs)
    pred = outputs.argmax(dim=1, keepdim=True)
    print(pred)
    pred_int = pred.item()
    result.iloc[idx,1] = pred_int
result

tensor([[1]], device='cuda:0')
tensor([[14]], device='cuda:0')
tensor([[19]], device='cuda:0')
tensor([[4]], device='cuda:0')
tensor([[4]], device='cuda:0')
tensor([[4]], device='cuda:0')
tensor([[13]], device='cuda:0')
tensor([[4]], device='cuda:0')
tensor([[14]], device='cuda:0')
tensor([[9]], device='cuda:0')
tensor([[12]], device='cuda:0')
tensor([[17]], device='cuda:0')
tensor([[11]], device='cuda:0')
tensor([[17]], device='cuda:0')
tensor([[4]], device='cuda:0')
tensor([[14]], device='cuda:0')
tensor([[11]], device='cuda:0')
tensor([[0]], device='cuda:0')
tensor([[14]], device='cuda:0')
tensor([[13]], device='cuda:0')
tensor([[13]], device='cuda:0')
tensor([[13]], device='cuda:0')
tensor([[17]], device='cuda:0')
tensor([[1]], device='cuda:0')
tensor([[14]], device='cuda:0')
tensor([[18]], device='cuda:0')
tensor([[4]], device='cuda:0')
tensor([[13]], device='cuda:0')
tensor([[14]], device='cuda:0')
tensor([[14]], device='cuda:0')
tensor([[4]], device='cuda:0')
tensor([[0]], devic

Unnamed: 0,FileName,Predicted_ID
0,TEST1001.ogg,1
1,TEST1002.ogg,14
2,TEST1003.ogg,19
3,TEST1004.ogg,4
4,TEST1005.ogg,4
...,...,...
181,TEST1182.ogg,0
182,TEST1183.ogg,16
183,TEST1184.ogg,13
184,TEST1185.ogg,14


In [24]:
for i in range(186):
    if(result.iloc[i,1]==0):
        result.iloc[i,1]=20
result

Unnamed: 0,FileName,Predicted_ID
0,TEST1001.ogg,1
1,TEST1002.ogg,14
2,TEST1003.ogg,19
3,TEST1004.ogg,4
4,TEST1005.ogg,4
...,...,...
181,TEST1182.ogg,20
182,TEST1183.ogg,16
183,TEST1184.ogg,13
184,TEST1185.ogg,14


In [34]:
result.to_csv('test_hard2.csv', index=False)

In [9]:
model = torch.load('models/ResNetSE-100.pt')
model.eval()

ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (act1): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act1): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (drop_block): Identity()
      (act2): ReLU(inplace=True)
      (aa): Identity()
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
     

In [15]:
for idx, inputs in enumerate(test_loader):
    inputs = inputs.to(device)
    #print(inputs.shape)
    outputs = model(inputs)
    pred = outputs.argmax(dim=1, keepdim=True)
    print(pred)
    pred_int = pred.item()
    result.iloc[idx,1] = pred_int
result

tensor([[14]], device='cuda:0')
tensor([[4]], device='cuda:0')
tensor([[19]], device='cuda:0')
tensor([[14]], device='cuda:0')
tensor([[4]], device='cuda:0')
tensor([[13]], device='cuda:0')
tensor([[8]], device='cuda:0')
tensor([[4]], device='cuda:0')
tensor([[4]], device='cuda:0')
tensor([[4]], device='cuda:0')
tensor([[12]], device='cuda:0')
tensor([[13]], device='cuda:0')
tensor([[12]], device='cuda:0')
tensor([[1]], device='cuda:0')
tensor([[11]], device='cuda:0')
tensor([[19]], device='cuda:0')
tensor([[12]], device='cuda:0')
tensor([[13]], device='cuda:0')
tensor([[15]], device='cuda:0')
tensor([[19]], device='cuda:0')
tensor([[13]], device='cuda:0')
tensor([[8]], device='cuda:0')
tensor([[0]], device='cuda:0')
tensor([[1]], device='cuda:0')
tensor([[14]], device='cuda:0')
tensor([[8]], device='cuda:0')
tensor([[4]], device='cuda:0')
tensor([[13]], device='cuda:0')
tensor([[14]], device='cuda:0')
tensor([[13]], device='cuda:0')
tensor([[14]], device='cuda:0')
tensor([[13]], devic

Unnamed: 0,FileName,Predicted_ID
0,TEST1001.ogg,14
1,TEST1002.ogg,4
2,TEST1003.ogg,19
3,TEST1004.ogg,14
4,TEST1005.ogg,4
...,...,...
181,TEST1182.ogg,19
182,TEST1183.ogg,12
183,TEST1184.ogg,3
184,TEST1185.ogg,4


In [16]:
for i in range(186):
    if(result.iloc[i,1]==0):
        result.iloc[i,1]=20
result.to_csv('test_hard3.csv', index=False)

In [17]:
result.to_csv('test_hard4.csv', index=False)

In [21]:
model = torch.load('ResNet18.pt')
model.eval()
for idx, inputs in enumerate(test_loader):
    inputs = inputs.to(device)
    #print(inputs.shape)
    outputs = model(inputs)
    pred = outputs.argmax(dim=1, keepdim=True)
    print(pred)
    pred_int = pred.item()
    result.iloc[idx,1] = pred_int
for i in range(186):
    if(result.iloc[i,1]==0):
        result.iloc[i,1]=20

print(result)
result.to_csv('test_hard.csv', index=False)

tensor([[18]], device='cuda:0')
tensor([[0]], device='cuda:0')
tensor([[19]], device='cuda:0')
tensor([[1]], device='cuda:0')
tensor([[8]], device='cuda:0')
tensor([[13]], device='cuda:0')
tensor([[19]], device='cuda:0')
tensor([[4]], device='cuda:0')
tensor([[14]], device='cuda:0')
tensor([[11]], device='cuda:0')
tensor([[3]], device='cuda:0')
tensor([[9]], device='cuda:0')
tensor([[17]], device='cuda:0')
tensor([[1]], device='cuda:0')
tensor([[11]], device='cuda:0')
tensor([[9]], device='cuda:0')
tensor([[14]], device='cuda:0')
tensor([[13]], device='cuda:0')
tensor([[14]], device='cuda:0')
tensor([[19]], device='cuda:0')
tensor([[6]], device='cuda:0')
tensor([[19]], device='cuda:0')
tensor([[7]], device='cuda:0')
tensor([[1]], device='cuda:0')
tensor([[9]], device='cuda:0')
tensor([[5]], device='cuda:0')
tensor([[4]], device='cuda:0')
tensor([[19]], device='cuda:0')
tensor([[12]], device='cuda:0')
tensor([[10]], device='cuda:0')
tensor([[13]], device='cuda:0')
tensor([[0]], device='