In [1]:
# Convenience and saving
ABRIDGED_RUN = False # Set to True to train and validate on 10% of the data, for quick funcitonality tests etc
SAVE_AFTER_TRAINING = True # Save the model when you are done
SAVE_CHECKPOINTS = True # Save the model after every epoch
REPORT_TRAINING_LOSS_PER_EPOCH = True # Track the training loss each epoch, and write it to a file after training
REPORT_VALIDATION_LOSS_PER_EPOCH = True # Lets us make a nice learning curve after training

# Training hyperparameters
BATCH_SIZE = 256 # Number of samples per batch while training our network
NUM_EPOCHS = 20 # Number of epochs to train our network
LEARNING_RATE = 0.001 # Learning rate for our optimizer

# Directories
DATA_DIR = "../data/"
AUDIO_DIR_DCASE = DATA_DIR + "ff1010bird_wav/"
CHECKPOINT_DIR = "checkpoints/" # Checkpoints, models, and training data will be saved here
MODEL_NAME = None

# Preprocessing info
SAMPLE_RATE = 32000 # All our audio uses this sample rate
SAMPLE_LENGTH = 5 # Duration we want to crop our audio to

In [2]:
# Basic imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import librosa
import os
import IPython.display as ipd
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm
from pathlib import Path

In [3]:
# Torch imports
import torch
import torchaudio
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Subset, random_split
from torch.utils.data import Dataset
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
from torchaudio.transforms import MelSpectrogram, Resample
from IPython.display import Audio
import torch.optim as optim

In [4]:
dcase = pd.read_csv(DATA_DIR+'ff1010bird_metadata_2018.csv')

In [5]:
# Create a filepath column
dcase['filepath'] = AUDIO_DIR_DCASE + dcase['itemid'].astype(str)+'.wav'

In [7]:
dcase_train, dcase_test = train_test_split(dcase, test_size = 0.2, random_state=123, stratify=dcase['hasbird'])

In [11]:
model=torch.load('final.pt', map_location=torch.device('cpu'))

In [12]:
model

OrderedDict([('conv1.0.weight',
              tensor([[[[ 0.1472,  0.2955,  0.3132],
                        [ 0.2458,  0.3542,  0.0345],
                        [ 0.0153,  0.1927,  0.2010]]],
              
              
                      [[[ 0.1636, -0.0446, -0.0544],
                        [ 0.0392,  0.2912, -0.2556],
                        [ 0.2675, -0.0944,  0.2696]]],
              
              
                      [[[-0.1810, -0.0748, -0.0716],
                        [-0.0143,  0.3172, -0.2197],
                        [ 0.2394, -0.0991, -0.2853]]],
              
              
                      [[[ 0.3158, -0.1057,  0.0962],
                        [ 0.1851, -0.1559, -0.1963],
                        [ 0.0252,  0.1295,  0.1347]]],
              
              
                      [[[-0.2474,  0.2031,  0.1768],
                        [ 0.0752, -0.0866,  0.1966],
                        [-0.0155,  0.1701,  0.1982]]],
              
              
             