In [1]:
#import torch
#import torchaudio
from torchvision import transforms
from torch.utils.data import  Dataset, DataLoader
import copy
#model = torch.hub.load('pytorch/vision:v0.10.0', 'inception_v3', pretrained=True)
#model.eval()

In [2]:
def batchSize():
    return 64
def epocCount():
    return 50

In [1]:
!python3 -c "import torchvision; print(torchvision.__version__)"

0.10.1+cu102


In [3]:
import warnings
from collections import namedtuple
from functools import partial
from typing import Any, Callable, List, Optional, Tuple

import torch
import torch.nn.functional as F
from torch import nn, Tensor

import torchvision
from torchvision.transforms._presets import ImageClassification
from torchvision.utils import _log_api_usage_once
from torchvision.models._api import register_model, Weights, WeightsEnum
from torchvision.models._meta import _IMAGENET_CATEGORIES
from torchvision.models._utils import _ovewrite_named_param, handle_legacy_interface


__all__ = ["Inception3", "InceptionOutputs", "_InceptionOutputs", "Inception_V3_Weights", "inception_v3"]


InceptionOutputs = namedtuple("InceptionOutputs", ["logits", "aux_logits"])
InceptionOutputs.__annotations__ = {"logits": Tensor, "aux_logits": Optional[Tensor]}

# Script annotations failed with _GoogleNetOutputs = namedtuple ...
# _InceptionOutputs set here for backwards compat
_InceptionOutputs = InceptionOutputs


class Inception3(nn.Module):
    def __init__(
        self,
        num_classes: int = 152,
        aux_logits: bool = True,
        transform_input: bool = False,
        inception_blocks: Optional[List[Callable[..., nn.Module]]] = None,
        init_weights: Optional[bool] = None,
        dropout: float = 0.5,
    ) -> None:
        super().__init__()
        _log_api_usage_once(self)
        if inception_blocks is None:
            inception_blocks = [BasicConv2d, InceptionA, InceptionB, InceptionC, InceptionD, InceptionE, InceptionAux]
        if init_weights is None:
            warnings.warn(
                "The default weight initialization of inception_v3 will be changed in future releases of "
                "torchvision. If you wish to keep the old behavior (which leads to long initialization times"
                " due to scipy/scipy#11299), please set init_weights=True.",
                FutureWarning,
            )
            init_weights = True
        if len(inception_blocks) != 7:
            raise ValueError(f"length of inception_blocks should be 7 instead of {len(inception_blocks)}")
        conv_block = inception_blocks[0]
        inception_a = inception_blocks[1]
        inception_b = inception_blocks[2]
        inception_c = inception_blocks[3]
        inception_d = inception_blocks[4]
        inception_e = inception_blocks[5]
        inception_aux = inception_blocks[6]

        self.aux_logits = aux_logits
        self.transform_input = transform_input
        self.Conv2d_1a_3x3 = conv_block(3, 32, kernel_size=3, stride=2)
        self.Conv2d_2a_3x3 = conv_block(32, 32, kernel_size=3)
        self.Conv2d_2b_3x3 = conv_block(32, 64, kernel_size=3, padding=1)
        self.maxpool1 = nn.MaxPool2d(kernel_size=3, stride=2)
        self.Conv2d_3b_1x1 = conv_block(64, 80, kernel_size=1)
        self.Conv2d_4a_3x3 = conv_block(80, 192, kernel_size=3)
        self.maxpool2 = nn.MaxPool2d(kernel_size=3, stride=2)
        self.Mixed_5b = inception_a(192, pool_features=32)
        self.Mixed_5c = inception_a(256, pool_features=64)
        self.Mixed_5d = inception_a(288, pool_features=64)
        self.Mixed_6a = inception_b(288)
        self.Mixed_6b = inception_c(768, channels_7x7=128)
        self.Mixed_6c = inception_c(768, channels_7x7=160)
        self.Mixed_6d = inception_c(768, channels_7x7=160)
        self.Mixed_6e = inception_c(768, channels_7x7=192)
        self.AuxLogits: Optional[nn.Module] = None
        if aux_logits:
            self.AuxLogits = inception_aux(768, num_classes)
        self.Mixed_7a = inception_d(768)
        self.Mixed_7b = inception_e(1280)
        self.Mixed_7c = inception_e(2048)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.dropout = nn.Dropout(p=dropout)
        self.fc = nn.Linear(2048, num_classes)
        if init_weights:
            for m in self.modules():
                if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
                    stddev = float(m.stddev) if hasattr(m, "stddev") else 0.1  # type: ignore
                    torch.nn.init.trunc_normal_(m.weight, mean=0.0, std=stddev, a=-2, b=2)
                elif isinstance(m, nn.BatchNorm2d):
                    nn.init.constant_(m.weight, 1)
                    nn.init.constant_(m.bias, 0)

    def _transform_input(self, x: Tensor) -> Tensor:
        if self.transform_input:
            x_ch0 = torch.unsqueeze(x[:, 0], 1) * (0.229 / 0.5) + (0.485 - 0.5) / 0.5
            x_ch1 = torch.unsqueeze(x[:, 1], 1) * (0.224 / 0.5) + (0.456 - 0.5) / 0.5
            x_ch2 = torch.unsqueeze(x[:, 2], 1) * (0.225 / 0.5) + (0.406 - 0.5) / 0.5
            x = torch.cat((x_ch0, x_ch1, x_ch2), 1)
        return x

    def _forward(self, x: Tensor) -> Tuple[Tensor, Optional[Tensor]]:
        # N x 3 x 299 x 299
        x = self.Conv2d_1a_3x3(x)
        # N x 32 x 149 x 149
        x = self.Conv2d_2a_3x3(x)
        # N x 32 x 147 x 147
        x = self.Conv2d_2b_3x3(x)
        # N x 64 x 147 x 147
        x = self.maxpool1(x)
        # N x 64 x 73 x 73
        x = self.Conv2d_3b_1x1(x)
        # N x 80 x 73 x 73
        x = self.Conv2d_4a_3x3(x)
        # N x 192 x 71 x 71
        x = self.maxpool2(x)
        # N x 192 x 35 x 35
        x = self.Mixed_5b(x)
        # N x 256 x 35 x 35
        x = self.Mixed_5c(x)
        # N x 288 x 35 x 35
        x = self.Mixed_5d(x)
        # N x 288 x 35 x 35
        x = self.Mixed_6a(x)
        # N x 768 x 17 x 17
        x = self.Mixed_6b(x)
        # N x 768 x 17 x 17
        x = self.Mixed_6c(x)
        # N x 768 x 17 x 17
        x = self.Mixed_6d(x)
        # N x 768 x 17 x 17
        x = self.Mixed_6e(x)
        # N x 768 x 17 x 17
        aux: Optional[Tensor] = None
        if self.AuxLogits is not None:
            if self.training:
                aux = self.AuxLogits(x)
        # N x 768 x 17 x 17
        x = self.Mixed_7a(x)
        # N x 1280 x 8 x 8
        x = self.Mixed_7b(x)
        # N x 2048 x 8 x 8
        x = self.Mixed_7c(x)
        # N x 2048 x 8 x 8
        # Adaptive average pooling
        x = self.avgpool(x)
        # N x 2048 x 1 x 1
        x = self.dropout(x)
        # N x 2048 x 1 x 1
        x = torch.flatten(x, 1)
        # N x 2048
        x = self.fc(x)
        # N x 1000 (num_classes)
        return x, aux

    @torch.jit.unused
    def eager_outputs(self, x: Tensor, aux: Optional[Tensor]) -> InceptionOutputs:
        if self.training and self.aux_logits:
            return InceptionOutputs(x, aux)
        else:
            return x  # type: ignore[return-value]

    def forward(self, x: Tensor) -> InceptionOutputs:
        x = self._transform_input(x)
        x, aux = self._forward(x)
        aux_defined = self.training and self.aux_logits
        if torch.jit.is_scripting():
            if not aux_defined:
                warnings.warn("Scripted Inception3 always returns Inception3 Tuple")
            return InceptionOutputs(x, aux)
        else:
            return self.eager_outputs(x, aux)


class InceptionA(nn.Module):
    def __init__(
        self, in_channels: int, pool_features: int, conv_block: Optional[Callable[..., nn.Module]] = None
    ) -> None:
        super().__init__()
        if conv_block is None:
            conv_block = BasicConv2d
        self.branch1x1 = conv_block(in_channels, 64, kernel_size=1)

        self.branch5x5_1 = conv_block(in_channels, 48, kernel_size=1)
        self.branch5x5_2 = conv_block(48, 64, kernel_size=5, padding=2)

        self.branch3x3dbl_1 = conv_block(in_channels, 64, kernel_size=1)
        self.branch3x3dbl_2 = conv_block(64, 96, kernel_size=3, padding=1)
        self.branch3x3dbl_3 = conv_block(96, 96, kernel_size=3, padding=1)

        self.branch_pool = conv_block(in_channels, pool_features, kernel_size=1)

    def _forward(self, x: Tensor) -> List[Tensor]:
        branch1x1 = self.branch1x1(x)

        branch5x5 = self.branch5x5_1(x)
        branch5x5 = self.branch5x5_2(branch5x5)

        branch3x3dbl = self.branch3x3dbl_1(x)
        branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
        branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl)

        branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1)
        branch_pool = self.branch_pool(branch_pool)

        outputs = [branch1x1, branch5x5, branch3x3dbl, branch_pool]
        return outputs

    def forward(self, x: Tensor) -> Tensor:
        outputs = self._forward(x)
        return torch.cat(outputs, 1)


class InceptionB(nn.Module):
    def __init__(self, in_channels: int, conv_block: Optional[Callable[..., nn.Module]] = None) -> None:
        super().__init__()
        if conv_block is None:
            conv_block = BasicConv2d
        self.branch3x3 = conv_block(in_channels, 384, kernel_size=3, stride=2)

        self.branch3x3dbl_1 = conv_block(in_channels, 64, kernel_size=1)
        self.branch3x3dbl_2 = conv_block(64, 96, kernel_size=3, padding=1)
        self.branch3x3dbl_3 = conv_block(96, 96, kernel_size=3, stride=2)

    def _forward(self, x: Tensor) -> List[Tensor]:
        branch3x3 = self.branch3x3(x)

        branch3x3dbl = self.branch3x3dbl_1(x)
        branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
        branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl)

        branch_pool = F.max_pool2d(x, kernel_size=3, stride=2)

        outputs = [branch3x3, branch3x3dbl, branch_pool]
        return outputs

    def forward(self, x: Tensor) -> Tensor:
        outputs = self._forward(x)
        return torch.cat(outputs, 1)


class InceptionC(nn.Module):
    def __init__(
        self, in_channels: int, channels_7x7: int, conv_block: Optional[Callable[..., nn.Module]] = None
    ) -> None:
        super().__init__()
        if conv_block is None:
            conv_block = BasicConv2d
        self.branch1x1 = conv_block(in_channels, 192, kernel_size=1)

        c7 = channels_7x7
        self.branch7x7_1 = conv_block(in_channels, c7, kernel_size=1)
        self.branch7x7_2 = conv_block(c7, c7, kernel_size=(1, 7), padding=(0, 3))
        self.branch7x7_3 = conv_block(c7, 192, kernel_size=(7, 1), padding=(3, 0))

        self.branch7x7dbl_1 = conv_block(in_channels, c7, kernel_size=1)
        self.branch7x7dbl_2 = conv_block(c7, c7, kernel_size=(7, 1), padding=(3, 0))
        self.branch7x7dbl_3 = conv_block(c7, c7, kernel_size=(1, 7), padding=(0, 3))
        self.branch7x7dbl_4 = conv_block(c7, c7, kernel_size=(7, 1), padding=(3, 0))
        self.branch7x7dbl_5 = conv_block(c7, 192, kernel_size=(1, 7), padding=(0, 3))

        self.branch_pool = conv_block(in_channels, 192, kernel_size=1)

    def _forward(self, x: Tensor) -> List[Tensor]:
        branch1x1 = self.branch1x1(x)

        branch7x7 = self.branch7x7_1(x)
        branch7x7 = self.branch7x7_2(branch7x7)
        branch7x7 = self.branch7x7_3(branch7x7)

        branch7x7dbl = self.branch7x7dbl_1(x)
        branch7x7dbl = self.branch7x7dbl_2(branch7x7dbl)
        branch7x7dbl = self.branch7x7dbl_3(branch7x7dbl)
        branch7x7dbl = self.branch7x7dbl_4(branch7x7dbl)
        branch7x7dbl = self.branch7x7dbl_5(branch7x7dbl)

        branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1)
        branch_pool = self.branch_pool(branch_pool)

        outputs = [branch1x1, branch7x7, branch7x7dbl, branch_pool]
        return outputs

    def forward(self, x: Tensor) -> Tensor:
        outputs = self._forward(x)
        return torch.cat(outputs, 1)


class InceptionD(nn.Module):
    def __init__(self, in_channels: int, conv_block: Optional[Callable[..., nn.Module]] = None) -> None:
        super().__init__()
        if conv_block is None:
            conv_block = BasicConv2d
        self.branch3x3_1 = conv_block(in_channels, 192, kernel_size=1)
        self.branch3x3_2 = conv_block(192, 320, kernel_size=3, stride=2)

        self.branch7x7x3_1 = conv_block(in_channels, 192, kernel_size=1)
        self.branch7x7x3_2 = conv_block(192, 192, kernel_size=(1, 7), padding=(0, 3))
        self.branch7x7x3_3 = conv_block(192, 192, kernel_size=(7, 1), padding=(3, 0))
        self.branch7x7x3_4 = conv_block(192, 192, kernel_size=3, stride=2)

    def _forward(self, x: Tensor) -> List[Tensor]:
        branch3x3 = self.branch3x3_1(x)
        branch3x3 = self.branch3x3_2(branch3x3)

        branch7x7x3 = self.branch7x7x3_1(x)
        branch7x7x3 = self.branch7x7x3_2(branch7x7x3)
        branch7x7x3 = self.branch7x7x3_3(branch7x7x3)
        branch7x7x3 = self.branch7x7x3_4(branch7x7x3)

        branch_pool = F.max_pool2d(x, kernel_size=3, stride=2)
        outputs = [branch3x3, branch7x7x3, branch_pool]
        return outputs

    def forward(self, x: Tensor) -> Tensor:
        outputs = self._forward(x)
        return torch.cat(outputs, 1)


class InceptionE(nn.Module):
    def __init__(self, in_channels: int, conv_block: Optional[Callable[..., nn.Module]] = None) -> None:
        super().__init__()
        if conv_block is None:
            conv_block = BasicConv2d
        self.branch1x1 = conv_block(in_channels, 320, kernel_size=1)

        self.branch3x3_1 = conv_block(in_channels, 384, kernel_size=1)
        self.branch3x3_2a = conv_block(384, 384, kernel_size=(1, 3), padding=(0, 1))
        self.branch3x3_2b = conv_block(384, 384, kernel_size=(3, 1), padding=(1, 0))

        self.branch3x3dbl_1 = conv_block(in_channels, 448, kernel_size=1)
        self.branch3x3dbl_2 = conv_block(448, 384, kernel_size=3, padding=1)
        self.branch3x3dbl_3a = conv_block(384, 384, kernel_size=(1, 3), padding=(0, 1))
        self.branch3x3dbl_3b = conv_block(384, 384, kernel_size=(3, 1), padding=(1, 0))

        self.branch_pool = conv_block(in_channels, 192, kernel_size=1)

    def _forward(self, x: Tensor) -> List[Tensor]:
        branch1x1 = self.branch1x1(x)

        branch3x3 = self.branch3x3_1(x)
        branch3x3 = [
            self.branch3x3_2a(branch3x3),
            self.branch3x3_2b(branch3x3),
        ]
        branch3x3 = torch.cat(branch3x3, 1)

        branch3x3dbl = self.branch3x3dbl_1(x)
        branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
        branch3x3dbl = [
            self.branch3x3dbl_3a(branch3x3dbl),
            self.branch3x3dbl_3b(branch3x3dbl),
        ]
        branch3x3dbl = torch.cat(branch3x3dbl, 1)

        branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1)
        branch_pool = self.branch_pool(branch_pool)

        outputs = [branch1x1, branch3x3, branch3x3dbl, branch_pool]
        return outputs

    def forward(self, x: Tensor) -> Tensor:
        outputs = self._forward(x)
        return torch.cat(outputs, 1)


class InceptionAux(nn.Module):
    def __init__(
        self, in_channels: int, num_classes: int, conv_block: Optional[Callable[..., nn.Module]] = None
    ) -> None:
        super().__init__()
        if conv_block is None:
            conv_block = BasicConv2d
        self.conv0 = conv_block(in_channels, 128, kernel_size=1)
        self.conv1 = conv_block(128, 768, kernel_size=5)
        self.conv1.stddev = 0.01  # type: ignore[assignment]
        self.fc = nn.Linear(768, num_classes)
        self.fc.stddev = 0.001  # type: ignore[assignment]

    def forward(self, x: Tensor) -> Tensor:
        # N x 768 x 17 x 17
        x = F.avg_pool2d(x, kernel_size=5, stride=3)
        # N x 768 x 5 x 5
        x = self.conv0(x)
        # N x 128 x 5 x 5
        x = self.conv1(x)
        # N x 768 x 1 x 1
        # Adaptive average pooling
        x = F.adaptive_avg_pool2d(x, (1, 1))
        # N x 768 x 1 x 1
        x = torch.flatten(x, 1)
        # N x 768
        x = self.fc(x)
        # N x 1000
        return x


class BasicConv2d(nn.Module):
    def __init__(self, in_channels: int, out_channels: int, **kwargs: Any) -> None:
        super().__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs)
        self.bn = nn.BatchNorm2d(out_channels, eps=0.001)

    def forward(self, x: Tensor) -> Tensor:
        x = self.conv(x)
        x = self.bn(x)
        return F.relu(x, inplace=True)


class Inception_V3_Weights(WeightsEnum):
    IMAGENET1K_V1 = Weights(
        url="https://download.pytorch.org/models/inception_v3_google-0cc3c7bd.pth",
        transforms=partial(ImageClassification, crop_size=299, resize_size=342),
        meta={
            "num_params": 27161264,
            "min_size": (75, 75),
            "categories": _IMAGENET_CATEGORIES,
            "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#inception-v3",
            "_metrics": {
                "ImageNet-1K": {
                    "acc@1": 77.294,
                    "acc@5": 93.450,
                }
            },
            "_ops": 5.713,
            "_file_size": 103.903,
            "_docs": """These weights are ported from the original paper.""",
        },
    )
    DEFAULT = IMAGENET1K_V1


#@register_model()
#@handle_legacy_interface(weights=("pretrained", Inception_V3_Weights.IMAGENET1K_V1))
def inception_v3(*, weights: Optional[Inception_V3_Weights] = None, progress: bool = True, **kwargs: Any) -> Inception3:
    """
    Inception v3 model architecture from
    `Rethinking the Inception Architecture for Computer Vision <http://arxiv.org/abs/1512.00567>`_.
    .. note::
        **Important**: In contrast to the other models the inception_v3 expects tensors with a size of
        N x 3 x 299 x 299, so ensure your images are sized accordingly.
    Args:
        weights (:class:`~torchvision.models.Inception_V3_Weights`, optional): The
            pretrained weights for the model. See
            :class:`~torchvision.models.Inception_V3_Weights` below for
            more details, and possible values. By default, no pre-trained
            weights are used.
        progress (bool, optional): If True, displays a progress bar of the
            download to stderr. Default is True.
        **kwargs: parameters passed to the ``torchvision.models.Inception3``
            base class. Please refer to the `source code
            <https://github.com/pytorch/vision/blob/main/torchvision/models/inception.py>`_
            for more details about this class.
    .. autoclass:: torchvision.models.Inception_V3_Weights
        :members:
    """
    weights = Inception_V3_Weights.verify(weights)

    original_aux_logits = kwargs.get("aux_logits", True)
    if weights is not None:
        if "transform_input" not in kwargs:
            _ovewrite_named_param(kwargs, "transform_input", True)
        _ovewrite_named_param(kwargs, "aux_logits", True)
        _ovewrite_named_param(kwargs, "init_weights", False)
        _ovewrite_named_param(kwargs, "num_classes", len(weights.meta["categories"]))

    model = Inception3(**kwargs)

    if weights is not None:
        model.load_state_dict(weights.get_state_dict(progress=progress))
        if not original_aux_logits:
            model.aux_logits = False
            model.AuxLogits = None

    return model


# The dictionary below is internal implementation detail and will be removed in v0.15
from torchvision.models._utils import _ModelURLs


model_urls = _ModelURLs(
    {
        # Inception v3 ported from TensorFlow
        "inception_v3_google": Inception_V3_Weights.IMAGENET1K_V1.url,
    }
)

In [4]:
batch_size = batchSize()
newmodel = Inception3(init_weights=True, transform_input=[batch_size,3,299,299])
newmodel.eval()
#torch.nn.Sequential(*(list(model.children())[:-1]))
#newmodel.fc = torch.nn.Linear(in_features=2048,out_features=152)
#print(newmodel)
#model.features[branch_pool] =  torch.nn.AdaptiveAvgPool2d(512)


Inception3(
  (Conv2d_1a_3x3): BasicConv2d(
    (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_2a_3x3): BasicConv2d(
    (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_2b_3x3): BasicConv2d(
    (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (Conv2d_3b_1x1): BasicConv2d(
    (conv): Conv2d(64, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(80, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_4a_3x3): BasicConv2d(
    (conv): Conv2d(80, 192, kernel_size=(3, 3), stri

In [5]:
#print(newmodel.15.fc)

In [6]:
import os
import warnings

warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns
import soundfile as sf

import librosa
import librosa.display
import IPython.display as ipd

import time
import random

plt.style.use("ggplot")

#%load_ext lab_black
#%load_ext autoreload
#%autoreload 2

In [7]:
librosa.__version__

'0.9.2'

In [8]:
import cv2
from PIL import Image

In [9]:
#path = 'BirdCLEF2022/train_audio/afrsil1/XC125458.ogg'
#filename_og, sample_r = torchaudio.load('/home/coglab/miniconda3/etc/BirdCLEF2022/train_audio/afrsil1/XC125458.ogg')
#rel_name = 'afrsil1'


#filename = torch.Tensor.numpy(filename_og)
#filename = filename[:1,:160000]
#waveform, sample_rate
#print_stats(waveform, sample_rate=sample_rate)
#plot_waveform(waveform, sample_rate)
#plot_specgram(waveform, sample_rate)
#play_audio(waveform, sample_rate)

In [10]:
def create_mel_spectrogram(audio_file, **spec_params):
    sr, hop_length, n_fft, n_mels, fmin, fmax = [
        spec_params[k] for k in ["sr", "hop_length", "n_fft", "n_mels", "fmin", "fmax"]
    ]
    audio, _ = librosa.core.load(audio_file, sr=sr, mono=True)
    melspec = librosa.feature.melspectrogram(
        audio,
        sr=sr,
        n_fft=n_fft,
        hop_length=hop_length,
        n_mels=n_mels,
        fmin=fmin,
        fmax=fmax,
        power=1,
    )
    return melspec


def pcen_bird(melspec, **spec_params):
    """
    parameters are taken from [1]:
        - [1] Lostanlen, et. al. Per-Channel Energy Normalization: Why and How. IEEE Signal Processing Letters, 26(1), 39-43.
    """
    sr, hop_length = [spec_params[k] for k in ["sr", "hop_length"]]
    return librosa.pcen(
        melspec * (2 ** 31),
        time_constant=0.06,
        eps=1e-6,
        gain=0.8,
        power=0.25,
        bias=10,
        sr=sr,
        hop_length=hop_length,
    )


def mel2audio(melspec, **spec_params):
    n_fft, sr, hop_length = [spec_params[k] for k in ["n_fft", "sr", "hop_length"]]
    return librosa.feature.inverse.mel_to_audio(
        melspec, sr=sr, n_fft=n_fft, hop_length=hop_length, power=1
    )


def get_fullpath(filename, audio_path="/home/coglab/miniconda3/etc/BirdCLEF2022/train_audio/"):
    return f"{audio_path}/{filename}"


def play_audio(audio_file):
    display(ipd.Audio(audio_file))


def gen_spec_and_audio(audio_name):
    out_file = audio_name.replace("/", "_")[:-4] + ".wav"
    img = plot_spectrograms(audio_name)
    ##print("source audio:")
    play_audio(get_fullpath(audio_name))
    return img

In [11]:
def plot_spectrograms(
    audio_name,
    spec_params=dict(
        sr=32_000, hop_length=320, n_fft=1024, n_mels=128, fmin=20, fmax=14_000
    ),
):
    sr, hop_length, fmin, fmax, n_mels = [
        spec_params[k] for k in ["sr", "hop_length", "fmin", "fmax", "n_mels"]
    ]
    #############print(f"parameters: {spec_params}")
    audio_file = get_fullpath(audio_name)
    if not os.path.isfile(audio_file):
        raise FileNotFoundError
    melspec = create_mel_spectrogram(audio_file, **spec_params)
    log_melspec = librosa.amplitude_to_db(melspec, ref=np.max)
    pcen_melspec = pcen_bird(melspec, **spec_params)

    return pcen_melspec

In [12]:
def train_model(model, dataloaders, criterion, optimizer, num_epochs, is_inception=True):

    val_acc_history = []

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    
    loopcount = 0
    
    since = time.time()
    batch = batchSize()
    
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in (dataloaders): #dataloaders[phase]
                #print("full set", inputs.shape)
                breaker = False
                loopcount +=1
                print("loop number :",loopcount)
                
                if  inputs.shape != torch.Size([batch, 3, 299, 299]):
                    print("Epoc end, batch smaller ", inputs.shape)
                    breaker = True
                    break
                for x in inputs:
                    if x.shape != torch.Size([3, 299, 299]):
                        print("Epoc end, missing shape ", x.shape)
                        breaker = True
                        break
                if breaker != False:
                    break
                inputs = inputs.to(device)
                labels = labels.type(torch.LongTensor)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    # Get model outputs and calculate loss
                    # Special case for inception because in training it has an auxiliary output. In train
                    #   mode we calculate the loss by summing the final output and the auxiliary output
                    #   but in testing we only consider the final output.
                    if is_inception and phase == 'train':
                        #print(inputs.size())
                        # From https://discuss.pytorch.org/t/how-to-optimize-inception-model-with-auxiliary-classifiers/7958
                        #outs = torch.tensor([4,152], dtype=torch.float)
                        #outs = outs.to(device)
                        outputs, aux_outputs = model(inputs)#outs,outs#model(inputs)#torchvision.models.inception_v3(inputs) #model(inputs)#152,152
                        #print(labels)
                        loss1 = criterion(outputs, labels)
                        loss2 = criterion(aux_outputs, labels)
                        loss = loss1 + 0.4*loss2
                    else:
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len((dataloaders).dataset) #dataloaders[phase]
            epoch_acc = running_corrects.double() / len((dataloaders).dataset) #dataloaders[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'val':
                val_acc_history.append(epoch_acc)

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, val_acc_history

In [13]:
# import pandas as pd

# df = pd.read_csv('miniconda3/etc/BirdCLEF2022/train_metadata.csv', low_memory=False)

# from sklearn.model_selection import train_test_split

# print(df.size)

# # Let's say we want to split the data in 80:10:10 for train:valid:test dataset
# train_size=0.8

# X = df.drop(columns = ['primary_label']).copy()
# y = df['primary_label']

# # In the first step we will split the data in training and remaining dataset
# X_train, X_rem, y_train, y_rem = train_test_split(X,y, train_size=0.8)

# # Now since we want the valid and test size to be equal (10% each of overall data). 
# # we have to define valid_size=0.5 (that is 50% of remaining data)
# test_size = 0.5
# X_valid, X_test, y_valid, y_test = train_test_split(X_rem,y_rem, test_size=0.5)

# print(X_train.shape), print(y_train.shape)
# print(X_valid.shape), print(y_valid.shape)
# print(X_test.shape), print(y_test.shape)

In [14]:
# Top level data directory. Here we assume the format of the directory conforms
#   to the ImageFolder structure
data_dir = "/home/coglab/miniconda3/etc/BirdCLEF2022/train_audio/"

# Models to choose from [resnet, alexnet, vgg, squeezenet, densenet, inception]
model_name = "inception"

# Number of classes in the dataset
num_classes = 152


# Flag for feature extracting. When False, we finetune the whole model,
#   when True we only update the reshaped layer params
feature_extract = True

In [15]:
from sklearn import preprocessing
csv_file = '/home/coglab/miniconda3/etc/BirdCLEF2022/train_metadata.csv'
dest = '/home/coglab/miniconda3/etc/BirdCLEF2022/train_use.csv'
le = preprocessing.LabelEncoder()
our_csv = pd.read_csv(csv_file)
le.fit(our_csv.primary_label)
le.classes_
our_csv['primary_label'] = le.transform(our_csv.primary_label) 
our_csv.to_csv(dest)

In [16]:
# class BirdsDataset(Dataset):
#     def __init__(self, csv_file, root_dir, transform=None):
#         self.annotations = pd.read_csv(csv_file)
#         self.root_dir = root_dir
#         self.transform = transform

#     def __len__(self):
#         return len(self.annotations)

#     def __getitem__(self, index):
#         path = self.annotations.iloc[index,13]
#         print(path)
#         img = plot_spectrograms(path)
#         img3 = cv2.cvtColor(np.float32(img), cv2.COLOR_GRAY2RGB) 
#         formatted = (img3 * 255 / np.max(img3)).astype('uint8')
#         PIL_image = Image.fromarray(formatted).convert('RGB')
#         #image = PIL_image
#         input_image = PIL_image
        
        
#         preprocess = transforms.Compose([
#                transforms.Resize(299),
#                transforms.CenterCrop(299),
#                transforms.ToTensor(),
#                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
#            ])
#         image = preprocess(input_image)
#         #print(type(image))
#         #input_tensor = preprocess(input_image)
#         #image_big = input_tensor.unsqueeze(0)
#         #image = torch.squeeze(image_big)
#         y_label = torch.tensor(int(self.annotations.iloc[index, 1]),dtype = torch.float)

#         if self.transform:
#             image = self.transform(img)
                
#         #print(image.size())
#         #print(y_label.size())
#         #print(y_label)
#         return (image, y_label)


# # Set device
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")#("cpu")#("cuda" if torch.cuda.is_available() else "cpu")

# # # Hyperparameters
# # in_channel = 3
# # num_classes = 2
# learning_rate = 3e-4
# batch_size = batchSize()
# # num_epochs = 10

# # Load Data
# dataset = BirdsDataset(
#     csv_file="/home/coglab/miniconda3/etc/BirdCLEF2022/train_use.csv", #train_metadata.csv"
#     root_dir="/home/coglab/miniconda3/etc/BirdCLEF2022/train_audio",
#     #transform=transforms.ToTensor(),
# )

# # Dataset is actually a lot larger ~25k images, just took out 10 pictures
# # to upload to Github. It's enough to understand the structure and scale
# # if you got more images.

# proportions = [.010, .090]
# lengths = [int(p * len(dataset)) for p in proportions]
# lengths[-1] = len(dataset) - sum(lengths[:-1])
# #tr_dataset, vl_dataset, = random_split(dataset, lengths)


# train_set, test_set = torch.utils.data.random_split(dataset, lengths) # train 80% 154 460 valid 20% 38 616
# train_loader = DataLoader(dataset=train_set, batch_size=batch_size, shuffle=True)
# test_loader = DataLoader(dataset=test_set, batch_size=batch_size, shuffle=True)

# # Model
# #model = torchvision.models.googlenet(weights="DEFAULT")

# ############################ freeze all layers, change final linear layer with num_classes
# for param in newmodel.parameters():
#     param.requires_grad = False

# ########################## final layer is not frozen
# newmodel.fc = nn.Linear(in_features=2048, out_features=num_classes)
# newmodel.to(device)

# # Loss and optimizer
# criterion = torch.nn.CrossEntropyLoss()
# optimizer = torch.optim.Adam(newmodel.parameters(), lr=learning_rate, weight_decay=1e-5)

In [2]:
class BirdsDataset_betterTrain(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.annotations = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, index):
        path = self.annotations.iloc[index,13]
        print(path)
        img = plot_spectrograms(path)
        
        
        maximum_start = len(img[0])       #3 second block
        if maximum_start>300:
            maximum_start = maximum_start-300
            #print(maximum_start)
            s = random.randint(0,maximum_start)   
            img = img[:,s:s+300]                  #3 second block end
            #maximum_start = len(img[0])

        
        img3 = cv2.cvtColor(np.float32(img), cv2.COLOR_GRAY2RGB) 
        formatted = (img3 * 255 / np.max(img3)).astype('uint8')
        PIL_image = Image.fromarray(formatted).convert('RGB')

        #path_img = "/home/coglab/miniconda3/etc/images_pcen/" + path[-11:-4] + ".jpg" 
        #PIL_image.save(path_img)#saving imagesimg
        
        #image = PIL_image
        input_image = PIL_image
        
        
        preprocess = transforms.Compose([
               transforms.Resize(299),
               transforms.CenterCrop(299),
               transforms.ToTensor(),
               transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
           ])
        image = preprocess(input_image)
        y_label = torch.tensor(int(self.annotations.iloc[index, 1]),dtype = torch.float)
        
        
        
        
        if self.transform:
            image = self.transform(img)
                
        #print(image.size())
        #print(y_label.size())
        #print(y_label)
        return (image, y_label)


# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")#("cpu")#("cuda" if torch.cuda.is_available() else "cpu")

# # Hyperparameters
# in_channel = 3
# num_classes = 2
learning_rate = 3e-4
batch_size = batchSize()
# num_epochs = 10

# Load Data
dataset = BirdsDataset_betterTrain(
    csv_file="/home/coglab/miniconda3/etc/BirdCLEF2022/train_use.csv", #train_metadata.csv"
    root_dir="/home/coglab/miniconda3/etc/BirdCLEF2022/train_audio",
    #transform=transforms.ToTensor(),
)

# Dataset is actually a lot larger ~25k images, just took out 10 pictures
# to upload to Github. It's enough to understand the structure and scale
# if you got more images.

proportions = [.8, .2]
lengths = [int(p * len(dataset)) for p in proportions]
lengths[-1] = len(dataset) - sum(lengths[:-1])
#tr_dataset, vl_dataset, = random_split(dataset, lengths)


train_set, test_set = torch.utils.data.random_split(dataset, lengths) # train 80% 154 460 valid 20% 38 616
train_loader = DataLoader(dataset=train_set, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_set, batch_size=batch_size, shuffle=True)

# Model
#model = torchvision.models.googlenet(weights="DEFAULT")

############################ freeze all layers, change final linear layer with num_classes
for param in newmodel.parameters():
    param.requires_grad = False

########################## final layer is not frozen
newmodel.fc = nn.Linear(in_features=2048, out_features=num_classes)
newmodel.to(device)

# Loss and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(newmodel.parameters(), lr=learning_rate, weight_decay=1e-5)

NameError: name 'Dataset' is not defined

In [55]:
# transform = transforms.Compose(
#     [transforms.ToTensor(),
#      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

# transform = transforms.Compose([
#     transforms.Resize(299),
#     transforms.CenterCrop(299),
#     transforms.ToTensor(),
#     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
# ])

# trainset = torchvision.datasets(root=data_dir, train=True,
#                                         download=True, transform=transform)
# trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
#                                           shuffle=True, num_workers=2)

# testset = torchvision.datasets.CIFAR10(root=data_dir, train=False,
#                                        download=True, transform=transform)
# testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
#                                          shuffle=False, num_workers=2)

#batch_size = 1

classes = 'afrsil1 akekee akepa1 akiapo akikik amewig aniani apapan arcter \
                      barpet bcnher belkin1 bkbplo bknsti bkwpet blkfra blknod bongul \
                      brant brnboo brnnod brnowl brtcur bubsan buffle bulpet burpar buwtea \
                      cacgoo1 calqua cangoo canvas caster1 categr chbsan chemun chukar cintea \
                      comgal1 commyn compea comsan comwax coopet crehon dunlin elepai ercfra eurwig \
                      fragul gadwal gamqua glwgul gnwtea golphe grbher3 grefri gresca gryfra gwfgoo \
                      hawama hawcoo hawcre hawgoo hawhaw hawpet1 hoomer houfin houspa hudgod iiwi incter1 \
                      jabwar japqua kalphe kauama laugul layalb lcspet leasan leater1 lessca lesyel lobdow lotjae \
                      madpet magpet1 mallar3 masboo mauala maupar merlin mitpar moudov norcar norhar2 normoc norpin \
                      norsho nutman oahama omao osprey pagplo palila parjae pecsan peflov perfal pibgre pomjae puaioh \
                      reccar redava redjun redpha1 refboo rempar rettro ribgul rinduc rinphe rocpig rorpar rudtur ruff \
                      saffin sander semplo sheowl shtsan skylar snogoo sooshe sooter1 sopsku1 sora spodov sposan \
                      towsol wantat1 warwhe1 wesmea wessan wetshe whfibi whiter whttro wiltur yebcar yefcan zebdov'.split()

#classes = ('plane', 'car', 'bird', 'cat',
#           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [56]:
#img = plot_spectrograms("wesmea/XC452089.ogg")
 
# img = plot_spectrograms("wesmea/XC452089.ogg")
# img3 = cv2.cvtColor(np.float32(img), cv2.COLOR_GRAY2RGB) 
# formatted = (img3 * 255 / np.max(img3)).astype('uint8')
# PIL_image = Image.fromarray(formatted).convert('RGB')
# #image = PIL_image
# input_image = PIL_image
        
        
# preprocess = transforms.Compose([
#         transforms.Resize(299),
#         transforms.CenterCrop(299),
#         transforms.ToTensor(),
#         transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
#     ])
# image = preprocess(input_image)
# inputs = image
# outputs, aux_outputs = newmodel(inputs)

In [57]:
#newmodel(inputs)

In [58]:
#newmodel(inputs) = [batch_size,3,299,299]
#output, aux_output = model(input)
since = time.time()

torch.backends.cudnn.enabled = False
newmodel.to(device)
bird_model, hist = train_model(newmodel, train_loader, criterion, optimizer, num_epochs=epocCount(), is_inception=True)

path_weight = '/home/coglab/miniconda3/etc/BirdCLEF2022/'

torch.save(bird_model.state_dict(), '/home/coglab/miniconda3/etc/BirdCLEF2022/bird_weights.pt')
torch.save(bird_model, '/home/coglab/miniconda3/etc/BirdCLEF2022/bird_model.pt')

print("model saved. ", path_weight)

torch.save(hist,'/home/coglab/miniconda3/etc/BirdCLEF2022/bird_model_logm.txt')

Epoch 0/2
----------
osprey/XC466204.ogg
chukar/XC505423.ogg
norcar/XC184580.ogg
comsan/XC556785.ogg
commyn/XC294325.ogg
rinphe/XC422829.ogg
bcnher/XC235426.ogg
sposan/XC498129.ogg
rinduc/XC143031.ogg
norcar/XC142500.ogg
norcar/XC336998.ogg
rorpar/XC541890.ogg
skylar/XC594472.ogg
gwfgoo/XC595870.ogg
lobdow/XC161342.ogg
bcnher/XC642133.ogg
wesmea/XC459756.ogg
pibgre/XC596469.ogg
hawcre/XC122132.ogg
normoc/XC321905.ogg
afrsil1/XC317039.ogg
sora/XC174359.ogg
comsan/XC569699.ogg
norsho/XC645672.ogg
houfin/XC465584.ogg
canvas/XC169220.ogg
skylar/XC182197.ogg
skylar/XC638042.ogg
comsan/XC484232.ogg
redjun/XC447977.ogg
eurwig/XC362197.ogg
cangoo/XC378732.ogg
mallar3/XC634632.ogg
wiltur/XC365158.ogg
gwfgoo/XC465612.ogg
bcnher/XC587880.ogg
skylar/XC319034.ogg
comwax/XC202499.ogg
mallar3/XC443150.ogg
snogoo/XC143622.ogg
comsan/XC650270.ogg
dunlin/XC526646.ogg
gamqua/XC636288.ogg
bcnher/XC361996.ogg
cangoo/XC177509.ogg
lobdow/XC161341.ogg
omao/XC175493.ogg
hoomer/XC372597.ogg
arcter/XC660413.ogg


KeyboardInterrupt: 

In [53]:
#import matplotlib.image as mpimg
#image = mpimg.imread("chelsea-the-cat.png")

#class BirdsDataset(Dataset):
#    def __init__(self, csv_file, root_dir, transform=None):
##        self.annotations = pd.read_csv(csv_file)
#        self.root_dir = root_dir
#        self.transform = transform

# df = our_csv#pd.read_csv("/home/coglab/miniconda3/etc/BirdCLEF2022/train_use.csv"), #train_metadata.csv"
# root_dir="/home/coglab/miniconda3/etc/BirdCLEF2022/train_audio",

# path = our_csv.iloc[index,13]
# print(path)
# img = plot_spectrograms(path)
# img3 = cv2.cvtColor(np.float32(img), cv2.COLOR_GRAY2RGB) 
# formatted = (img3 * 255 / np.max(img3)).astype('uint8')
# PIL_image = Image.fromarray(formatted).convert('RGB')
#         #image = PIL_image
# input_image = PIL_image
        
        
# preprocess = transforms.Compose([
#                transforms.Resize(299),
#                transforms.CenterCrop(299),
#                transforms.ToTensor(),
#                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
#            ])
# image = preprocess(input_image)
#         #print(type(image))
#         #input_tensor = preprocess(input_image)
#         #image_big = input_tensor.unsqueeze(0)
#         #image = torch.squeeze(image_big)
# y_label = torch.tensor(int(self.annotations.iloc[index, 0]),dtype = torch.float)

# if self.transform:
#     image = self.transform(img)
                
#         #print(image.size())
#         #print(y_label.size())
# print(y_label)


In [39]:
# img = plot_spectrograms('/brnowl/XC138613.ogg')
# img = np.array(img)
# img2 = np.float32(img)
# img3 = cv2.cvtColor(img2, cv2.COLOR_GRAY2RGB) 
# PIL_image = Image.fromarray(img3)
# img.pcen_melspec
# img3 = cv2.cvtColor(np.float32(img), cv2.COLOR_GRAY2RGB) 
# print(img3)

img = plot_spectrograms('brnowl/XC138613.ogg')
print(img.shape)

# img3 = cv2.cvtColor(np.float32(img), cv2.COLOR_GRAY2RGB) 
# formatted = (img3 * 255 / np.max(img3)).astype('uint8')
# PIL_image = Image.fromarray(formatted).convert('RGB')
# print(PIL_image)

(128, 1790)


In [40]:
#PIL_image = Image.fromarray(np.uint8(img3)).convert('RGB')
#print(img.size)
#PIL_image = Image.fromarray(img3.astype('uint8'),  mode="RGB")
#PIL_image.sh
#cv2.imshow("kuva",img3)

# img3 = cv2.cvtColor(np.float32(img), cv2.COLOR_GRAY2RGB) 
# formatted = (img3 * 255 / np.max(img3)).astype('uint8')
# PIL_image = Image.fromarray(formatted).convert('RGB')

In [41]:
# Download an example image from the pytorch website
# The thing here thats troubling is that I dont see the connection from the PCEN 
# to the CNN

#TODO: copy the colab sample and go try use the audio as an input :thinking:


In [42]:
# # sample execution (requires torchvision)
# from torchvision import transforms
# input_image = PIL_image
# preprocess = transforms.Compose([
#     transforms.Resize(299),
#     transforms.CenterCrop(299),
#     transforms.ToTensor(),
#     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
# ])
# input_tensor = preprocess(input_image)
# input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model

# # move the input and model to GPU for speed if available
# if torch.cuda.is_available():
#     input_batch = input_batch.to('cuda')
#     model.to('cuda')

# with torch.no_grad():
#   output = model(input_batch)
# # Tensor of shape 1000, with confidence scores over Imagenet's 1000 classes
# #print(output[0])
# # The output has unnormalized scores. To get probabilities, you can run a softmax on it.
# probabilities = torch.nn.functional.softmax(output[0], dim=0)
# #print(probabilities)

In [29]:
# Download ImageNet labels
#!wget https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt

In [30]:
# # Read the categories
# with open("imagenet_classes.txt", "r") as f:
#     categories = [s.strip() for s in f.readlines()]
# # Show top categories per image
# top5_prob, top5_catid = torch.topk(probabilities, 5)
# for i in range(top5_prob.size(0)):
#     print(categories[top5_catid[i]], top5_prob[i].item())

In [31]:
import time
since = time.time()
time_elapsed = time.time() - since

print(time_elapsed)

2.5033950805664062e-05


In [32]:
import random
img = plot_spectrograms('brant/XC588758.ogg')
print(img[1].shape)
maximum_start = len(img[0])
maximum_start = maximum_start-300
print(maximum_start)
s = random.randint(1,maximum_start)
print(s, s+300, maximum_start)
print(img.shape)
img = img[:,s:s+300]
print(img.shape)

(2472,)
2172
1296 1596 2172
(128, 2472)
(128, 300)


In [None]:

maximum_start = len(img[0])-300
s = random.randint(0,maximum_start)
img = img[:,s:s+300]

In [None]:
print(img[0].shape)

In [None]:
print(img[:,0:1].shape)

In [None]:
H = torch.load('miniconda3/etc/BirdCLEF2022/bird_model.pt')

#print("Model's state_dict:")
#for param_tensor in H.state_dict():
#    print(param_tensor, "\t", H.state_dict()[param_tensor].size())

#print()


#H = H.load_state_dict()#(state_dic, strict=False)
#load_state_dict(sd, strict=False)

#hist
hist = Tensor.cpu(hist) 
plt.style.use("ggplot")
plt.figure()
#plt.plot(np.arange(0, len(H.history["loss"])), H.history["loss"], label="train_loss")
#plt.plot(np.arange(0, len(H.history["val_loss"])), H.history["val_loss"], label="val_loss")
#plt.plot(np.arange(0, len(H.history["accuracy"])), H.history["accuracy"], label="train_acc")
plt.plot(np.arange(0, len(hist)), hist, label="val_acc")
plt.title("Training Loss and Accuracy")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="lower left")
plt.show()