In [1]:
! python -m pip install --no-index --find-links=../input/openvino-wheels -r ../input/openvino-wheels/requirements.txt
!pip install onnxsim

Looking in links: ../input/openvino-wheels
Processing /kaggle/input/openvino-wheels/openvino_dev-2024.6.0-17404-py3-none-any.whl (from openvino-dev[onnx]==2024.6.0->-r ../input/openvino-wheels/requirements.txt (line 1))
Processing /kaggle/input/openvino-wheels/networkx-3.1-py3-none-any.whl (from openvino-dev==2024.6.0->openvino-dev[onnx]==2024.6.0->-r ../input/openvino-wheels/requirements.txt (line 1))
Processing /kaggle/input/openvino-wheels/openvino_telemetry-2025.1.0-py3-none-any.whl (from openvino-dev==2024.6.0->openvino-dev[onnx]==2024.6.0->-r ../input/openvino-wheels/requirements.txt (line 1))
Processing /kaggle/input/openvino-wheels/openvino-2024.6.0-17404-cp311-cp311-manylinux2014_x86_64.whl (from openvino-dev==2024.6.0->openvino-dev[onnx]==2024.6.0->-r ../input/openvino-wheels/requirements.txt (line 1))
Processing /kaggle/input/openvino-wheels/fastjsonschema-2.17.1-py3-none-any.whl (from openvino-dev[onnx]==2024.6.0->-r ../input/openvino-wheels/requirements.txt (line 1))

In [2]:
import os
import gc
import warnings
import logging
import time
import math
import cv2
from pathlib import Path
import joblib


import numpy as np
import pandas as pd
import librosa
import soundfile as sf
from soundfile import SoundFile 
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.cuda.amp import autocast, GradScaler
import timm
from tqdm.auto import tqdm
from glob import glob
import torchaudio
import random
import itertools
from typing import Union

import pickle
import torchaudio
import torchaudio.transforms as AT
from contextlib import contextmanager
import concurrent.futures

import onnx # 用于加载和保存 ONNX 模型
import onnxsim # 用于简化 ONNX 模型
from openvino.tools import mo # 用于模型转换
import openvino as ov
from openvino.runtime import Core # 用于模型加载和推理




warnings.filterwarnings("ignore")
logging.basicConfig(level=logging.ERROR)

Collecting onnxruntime
  Downloading onnxruntime-1.22.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.5 kB)
Collecting coloredlogs (from onnxruntime)
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl.metadata (12 kB)
Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime)
  Downloading humanfriendly-10.0-py2.py3-none-any.whl.metadata (9.2 kB)
Downloading onnxruntime-1.22.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (16.4 MB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 16.4/16.4 MB 76.2 MB/s eta 0:00:00
Downloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 46.0/46.0 kB 1.9 MB/s eta 0:00:00
Downloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 86.8/86.8 kB 3.7 MB/s eta 0:00:00
Installing collected packages: humanfriendly, coloredlogs, onnxruntime
Successfully installed coloredlogs-15.0.1 humanfriendly-10.0 onnxruntime-1.22.0


In [3]:
class CFG:
 
    taxonomy_csv = '/kaggle/input/birdclef-2025/taxonomy.csv'

    model_dicts = {
    "eca_nfnet_l0":['/kaggle/input/birdclef-2025-sed-models-p/sed0.pth',
                    '/kaggle/input/birdclef-2025-sed-models-p/sed1.pth',
                    '/kaggle/input/birdclef-2025-sed-models-p/sed2.pth']
  
}

    num_classes = 206
    n_mels=128
    in_channels = 3
    device = 'cpu'  
    pretrained=False
    # Inference parameters
    batch_size = 32  
    # TTA 的次数。 如果 use_tta 为 True，则指定对每个测试样本进行多少次增强。
    threshold = 0.5

    debug =  True
    # True  False
    debug_count = 3

cfg = CFG()
print(f"Using device: {cfg.device}")
print(f"Loading taxonomy data...")
taxonomy_df = pd.read_csv(cfg.taxonomy_csv)
species_ids = taxonomy_df['primary_label'].tolist()

Using device: cpu
Loading taxonomy data...


In [4]:
def init_layer(layer):
    nn.init.xavier_uniform_(layer.weight)
    if hasattr(layer, "bias"):
        if layer.bias is not None:
            layer.bias.data.fill_(0.)
def init_bn(bn):
    bn.bias.data.fill_(0.)
    bn.weight.data.fill_(1.0)


def init_weights(model):
    classname = model.__class__.__name__
    if classname.find("Conv2d") != -1:
        nn.init.xavier_uniform_(model.weight, gain=np.sqrt(2))
        model.bias.data.fill_(0)
    elif classname.find("BatchNorm") != -1:
        model.weight.data.normal_(1.0, 0.02)
        model.bias.data.fill_(0)
    elif classname.find("GRU") != -1:
        for weight in model.parameters():
            if len(weight.size()) > 1:
                nn.init.orghogonal_(weight.data)
    elif classname.find("Linear") != -1:
        model.weight.data.normal_(0, 0.01)
        model.bias.data.zero_()


def interpolate(x, ratio):
    (batch_size, time_steps, classes_num) = x.shape
    upsampled = x[:, :, None, :].repeat(1, 1, ratio, 1)
    upsampled = upsampled.reshape(batch_size, time_steps * ratio, classes_num)
    return upsampled


def pad_framewise_output(framewise_output, frames_num):
    output = F.interpolate(
        framewise_output.unsqueeze(1),
        size=(frames_num, framewise_output.size(2)),
        align_corners=True,
        mode="bilinear").squeeze(1)

    return output


class AttBlockV2(nn.Module):
    def __init__(self,
                 in_features: int,
                 out_features: int,
                 activation="linear"):
        super().__init__()

        self.activation = activation
        self.att = nn.Conv1d(
            in_channels=in_features,
            out_channels=out_features,
            kernel_size=1,
            stride=1,
            padding=0,
            bias=True)
        self.cla = nn.Conv1d(
            in_channels=in_features,
            out_channels=out_features,
            kernel_size=1,
            stride=1,
            padding=0,
            bias=True)

        self.init_weights()

    def init_weights(self):
        init_layer(self.att)
        init_layer(self.cla)

    def forward(self, x):
        norm_att = torch.softmax(torch.tanh(self.att(x)), dim=-1)
        cla = self.nonlinear_transform(self.cla(x))
        x = torch.sum(norm_att * cla, dim=2)
        return x, norm_att, cla

    def nonlinear_transform(self, x):
        if self.activation == 'linear':
            return x
        elif self.activation == 'sigmoid':
            return torch.sigmoid(x)


class TimmSED(nn.Module):
    def __init__(self, base_model_name: str, pretrained=False, num_classes=24, in_channels=1, n_mels=24):
        super().__init__()

        self.bn0 = nn.BatchNorm2d(n_mels)

        base_model = timm.create_model(
            base_model_name, pretrained=pretrained, in_chans=in_channels)
        layers = list(base_model.children())[:-2]
        self.encoder = nn.Sequential(*layers)

        in_features = base_model.num_features

        self.fc1 = nn.Linear(in_features, in_features, bias=True)
        self.att_block2 = AttBlockV2(
            in_features, num_classes, activation="sigmoid")

        self.init_weight()

    def init_weight(self):
        init_bn(self.bn0)
        init_layer(self.fc1)
        

    def forward(self, input_data):
        x = input_data.transpose(2,3)
        x = torch.cat((x,x,x),1)

        x = x.transpose(2, 3)

        x = self.encoder(x)
        
        x = torch.mean(x, dim=2)

        x1 = F.max_pool1d(x, kernel_size=3, stride=1, padding=1)
        x2 = F.avg_pool1d(x, kernel_size=3, stride=1, padding=1)
        x = x1 + x2

        x = x.transpose(1, 2)
        x = F.relu_(self.fc1(x))
        x = x.transpose(1, 2)

        (clipwise_output, norm_att, segmentwise_output) = self.att_block2(x)
        logit = torch.sum(norm_att * self.att_block2.cla(x), dim=2)


        return logit

In [5]:
def convert_pytorch_to_openvino(pytorch_model, model_name, output_dir, example_input_shape,i):
    """
    将 PyTorch 模型转换为 OpenVINO IR (Intermediate Representation) 格式，
    并使用 ONNX Simplifier 进行优化。
    """
    os.makedirs(output_dir, exist_ok=True)
    
    # 原始 ONNX 模型的路径 (由 PyTorch 导出)
    onnx_original_path = os.path.join(output_dir, f"{model_name}_{i}_original.onnx")
    # 简化后的 ONNX 模型的路径 (由 ONNX Simplifier 处理后)
    onnx_simplified_path = os.path.join(output_dir, f"{model_name}_{i}_simplified.onnx")
    # OpenVINO IR 模型的 XML 文件路径
    xml_path = os.path.join(output_dir, f"{model_name}_{i}.xml")
    
    # 1. 将 PyTorch 模型设置为评估模式并移动到 CPU
    pytorch_model.eval()
    pytorch_model.cpu() # 将模型移动到 CPU 进行导出，通常更稳定
    
    print(f"--- 阶段 1/3: 将 PyTorch 模型 '{model_name}' 导出为 ONNX ---")
    dummy_input = torch.randn(example_input_shape)
    
    try:
        torch.onnx.export(pytorch_model, 
                          dummy_input, # 虚拟输入
                          onnx_original_path,
                          verbose=False, # 设置为 True 可以看到更详细的导出过程
                          opset_version=13, # 推荐使用 13 或更高版本
                          input_names=['input0'],
                          output_names=['output0'],
                          # dynamic_axes 的设置需要与你的实际模型需求匹配
                          # 如果你的模型可以处理动态 batch_size，可以保留这一行
                          # 如果你的模型的时间步长也可以动态，可以加上 'time_steps'
                          dynamic_axes={'input_audio': {0: 'batch_size'}} 
                         )
        print(f"原始 ONNX 模型已保存到: {onnx_original_path}")
    except Exception as e:
        print(f"导出 ONNX 失败: {e}")
        return None

    # 2. 使用 ONNX Simplifier 优化 ONNX 模型
    if os.path.exists(onnx_original_path):
        print(f"--- 阶段 2/3: 使用 ONNX Simplifier 优化 ONNX 模型 ---")
        try:
            # onnxsim.simplify 可以直接接受文件路径
            simplified_model, check_ok = onnxsim.simplify(onnx_original_path)

            if check_ok:
                onnx.save(simplified_model, onnx_simplified_path)
                print(f"ONNX 模型已简化并保存到: {onnx_simplified_path}")
            else:
                print("警告: ONNX Simplifier 报告简化过程中存在问题，但仍将尝试使用简化后的模型进行转换。")
                onnx.save(simplified_model, onnx_simplified_path) # 即使有警告也尝试保存和使用
        except Exception as e:
            print(f"ONNX Simplifier 失败: {e}")
            print(f"注意: 无法简化 ONNX 模型，尝试使用原始 ONNX 进行 OpenVINO 转换（可能失败）。")
            # 如果简化失败，可以选择直接返回 None，或者尝试使用原始 ONNX (不推荐，因为原始的就报错)
            # 这里我们选择直接返回 None，因为简化是解决 BN 问题的关键
            return None
    else:
        print("原始 ONNX 文件不存在，无法进行简化。")
        return None

    # 3. 使用 OpenVINO Model Optimizer 将简化的 ONNX 模型转换为 OpenVINO IR
    print(f"--- 阶段 3/3: 使用 OpenVINO Model Optimizer 将简化的 ONNX 模型转换为 OpenVINO IR ---")
    try:
        # 使用简化的 ONNX 模型进行转换
        ov_model = ov.convert_model(onnx_simplified_path) 
        # 保存为 FP16 可以减小模型大小并提高推理速度
        ov.save_model(ov_model, xml_path, compress_to_fp16=True) 
        print(f"OpenVINO IR 模型已保存到: {xml_path} 和 {Path(xml_path).with_suffix('.bin')}")
        return xml_path
    except Exception as e:
        print(f"转换为 OpenVINO IR 失败: {e}")
        return None


In [6]:
def load_openvino_model(xml_path, device="CPU"):
    """
    加载 OpenVINO IR 模型并编译。

    Args:
        xml_path (str): OpenVINO IR 模型的 .xml 文件路径。
        device (str): 推理设备
    Returns:
        openvino.runtime.CompiledModel: 编译后的 OpenVINO 模型对象。
    """
    core = Core()
    model = core.read_model(model=xml_path)
    
    # 编译模型以优化到指定设备
    compiled_model = core.compile_model(model=model, device_name=device)
    print(f"OpenVINO 模型 '{Path(xml_path).stem}' 已编译到设备: {device}")
    return compiled_model

In [7]:
if __name__ == "__main__":
    for model_name,model_paths in cfg.model_dicts.items():
        for i,model_path in enumerate(model_paths):
            
            dummy_model = TimmSED(base_model_name = model_name,
                                  pretrained=cfg.pretrained,
                                  num_classes=cfg.num_classes,
                                  in_channels=cfg.in_channels,
                                  n_mels=cfg.n_mels )
            dummy_model.load_state_dict(torch.load(model_path, 
                                                   map_location='cpu',
                                                   weights_only = False)) # 加载训练好的权重
            
            output_ir_path = convert_pytorch_to_openvino(
                dummy_model, 
                model_name, 
                output_dir=f"/kaggle/working/model_{i}", 
                example_input_shape=(12, 1, 128,313),
                i=i)
            if output_ir_path:
                print(f"模型转换成功，IR路径: {output_ir_path}")
            else:
                print("模型转换失败。")
        
            
            model =  load_openvino_model(output_ir_path, device="CPU")
        
            eginput = np.random.rand(*(12,1,128,313))
            print(eginput.shape)
            y = model(eginput)
            # print(y.shape)
            print(y['output0'].shape)
            print("...............................")
            

--- 阶段 1/3: 将 PyTorch 模型 'eca_nfnet_l0' 导出为 ONNX ---
原始 ONNX 模型已保存到: /kaggle/working/model_0/eca_nfnet_l0_0_original.onnx
--- 阶段 2/3: 使用 ONNX Simplifier 优化 ONNX 模型 ---
ONNX 模型已简化并保存到: /kaggle/working/model_0/eca_nfnet_l0_0_simplified.onnx
--- 阶段 3/3: 使用 OpenVINO Model Optimizer 将简化的 ONNX 模型转换为 OpenVINO IR ---
OpenVINO IR 模型已保存到: /kaggle/working/model_0/eca_nfnet_l0_0.xml 和 /kaggle/working/model_0/eca_nfnet_l0_0.bin
模型转换成功，IR路径: /kaggle/working/model_0/eca_nfnet_l0_0.xml
OpenVINO 模型 'eca_nfnet_l0_0' 已编译到设备: CPU
(12, 1, 128, 313)
(12, 206)
...............................
--- 阶段 1/3: 将 PyTorch 模型 'eca_nfnet_l0' 导出为 ONNX ---
原始 ONNX 模型已保存到: /kaggle/working/model_1/eca_nfnet_l0_1_original.onnx
--- 阶段 2/3: 使用 ONNX Simplifier 优化 ONNX 模型 ---
ONNX 模型已简化并保存到: /kaggle/working/model_1/eca_nfnet_l0_1_simplified.onnx
--- 阶段 3/3: 使用 OpenVINO Model Optimizer 将简化的 ONNX 模型转换为 OpenVINO IR ---
OpenVINO IR 模型已保存到: /kaggle/working/model_1/eca_nfnet_l0_1.xml 和 /kaggle/working/model_1/eca_nfnet_l0_1.bin
模型转换成