In [53]:
# import the necessary packages
from torch.nn import Module
from torch.nn import Conv2d
from torch.nn import Linear
from torch.nn import MaxPool2d
from torch.nn import ReLU
from torch.nn import LogSoftmax
from torch import flatten
import numpy as np
import pandas as pd
import torch
import os
from torch.utils.data import Dataset
from sklearn.metrics import classification_report
from torch.utils.data import random_split
from torch.utils.data import DataLoader
from torchvision.transforms import ToTensor
from torchvision.datasets import KMNIST
from torch.optim import Adam
from torch import nn
import matplotlib.pyplot as plt
import time

In [51]:
class LeNet(Module):
	def __init__(self, numChannels, classes):
		# call the parent constructor
		super(LeNet, self).__init__()

		# initialize first set of CONV => RELU => POOL layers
		self.conv1 = Conv2d(in_channels=numChannels, out_channels=20,
			kernel_size=(5, 5))
		self.relu1 = ReLU()
		self.maxpool1 = MaxPool2d(kernel_size=(2, 2), stride=(2, 2))

		# initialize second set of CONV => RELU => POOL layers
		self.conv2 = Conv2d(in_channels=20, out_channels=50,
			kernel_size=(5, 5))
		self.relu2 = ReLU()
		self.maxpool2 = MaxPool2d(kernel_size=(2, 2), stride=(2, 2))

		# initialize first (and only) set of FC => RELU layers
		self.fc1 = Linear(in_features=800, out_features=500)
		self.relu3 = ReLU()

		# initialize our softmax classifier
		self.fc2 = Linear(in_features=500, out_features=classes)
		self.logSoftmax = LogSoftmax(dim=1)

	def forward(self, x):
		# pass the input through our first set of CONV => RELU =>
		# POOL layers
		x = self.conv1(x)
		x = self.relu1(x)
		x = self.maxpool1(x)
		# pass the output from the previous layer through the second
		# set of CONV => RELU => POOL layers
		x = self.conv2(x)
		x = self.relu2(x)
		x = self.maxpool2(x)
		# flatten the output from the previous layer and pass it
		# through our only set of FC => RELU layers
		x = flatten(x, 1)
		x = self.fc1(x)
		x = self.relu3(x)
		# pass the output to our softmax classifier to get our output
		# predictions
		x = self.fc2(x)
		output = self.logSoftmax(x)
		# return the output predictions
		return output

In [69]:
class CustomDataset(Dataset):
	def __init__(self, annotations_file, img_dir):
		self.img_labels = pd.read_csv(annotations_file)
		self.img_dir = img_dir


	def __len__(self):
		return len(self.img_labels)

	def __get_classes__(self):
		return len(self.img_labels['label'].unique())


	def __getitem__(self, idx):
		img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
		image = np.load(img_path)
		label = self.img_labels.iloc[idx, 1]
		return image, label


In [42]:
AUDIO_PATH = '../birdclef-2022-data/train_audio'
IMAGE_PATH = '../birdclef-2022-data/train_images/'

# define training hyperparameters
INIT_LR = 1e-3
BATCH_SIZE = 128
EPOCHS = 10
# define the train and val splits
TRAIN_SPLIT = 0.75
VAL_SPLIT = 1 - TRAIN_SPLIT
# set the device we will be using to train the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [56]:
# collect data about data structure
all_data = []
for primary_label in os.listdir(IMAGE_PATH):
    all_data += [primary_label + '/' + x for x in os.listdir(IMAGE_PATH + primary_label)]

base_data = {'file_name': [], 'label': []}
for item in all_data:
    base_data['file_name'].append(item)
    base_data['label'].append(item.split('/')[0])

results = pd.DataFrame(base_data, columns = ['file_name', 'label'])
print(results.head())
results.to_csv("annotated_data.csv", index=False)

afrsil1/XC125458_0.npy
                file_name    label
0  afrsil1/XC125458_0.npy  afrsil1
1  afrsil1/XC125458_1.npy  afrsil1
2  afrsil1/XC125458_2.npy  afrsil1
3  afrsil1/XC175522_0.npy  afrsil1
4  afrsil1/XC175522_1.npy  afrsil1


In [59]:
# load data
numTrainSamples = round(len(all_data) * TRAIN_SPLIT)
numValSamples = round(len(all_data) * VAL_SPLIT)

data_set = CustomDataset("annotated_data.csv", IMAGE_PATH)

(trainData, valData) = random_split(data_set, [numTrainSamples, numValSamples],generator=torch.Generator().manual_seed(42))

print(f"len training: {len(trainData)}, len test: {len(valData)}")

len training: 108632, len test: 36211


In [60]:
trainDataLoader = DataLoader(trainData, shuffle=True, batch_size=BATCH_SIZE)
valDataLoader = DataLoader(valData, batch_size=BATCH_SIZE)

trainSteps = len(trainDataLoader.dataset) // BATCH_SIZE
valSteps = len(valDataLoader.dataset) // BATCH_SIZE

In [70]:
# initialize the LeNet model
print("[INFO] initializing the LeNet model...")
model = LeNet(
	numChannels=3,
	classes=(data_set.__get_classes__()).to(device))
# initialize our optimizer and loss function
opt = Adam(model.parameters(), lr=INIT_LR)
lossFn = nn.NLLLoss()
# initialize a dictionary to store training history
H = {
	"train_loss": [],
	"train_acc": [],
	"val_loss": [],
	"val_acc": []
}

[INFO] initializing the LeNet model...


AttributeError: 'CustomDataset' object has no attribute '__get_classes__'

In [None]:

# measure how long training is going to take
print("[INFO] training the network...")
startTime = time.time()

# loop over our epochs
for e in range(0, EPOCHS):
	# set the model in training mode
	model.train()
	# initialize the total training and validation loss
	totalTrainLoss = 0
	totalValLoss = 0
	# initialize the number of correct predictions in the training
	# and validation step
	trainCorrect = 0
	valCorrect = 0
	# loop over the training set
	for (x, y) in trainDataLoader:
		# send the input to the device
		(x, y) = (x.to(device), y.to(device))
		# perform a forward pass and calculate the training loss
		pred = model(x)
		loss = lossFn(pred, y)
		# zero out the gradients, perform the backpropagation step,
		# and update the weights
		opt.zero_grad()
		loss.backward()
		opt.step()
		# add the loss to the total training loss so far and
		# calculate the number of correct predictions
		totalTrainLoss += loss
		trainCorrect += (pred.argmax(1) == y).type(torch.float).sum().item()

	# switch off autograd for evaluation
	with torch.no_grad():
		# set the model in evaluation mode
		model.eval()
		# loop over the validation set
		for (x, y) in valDataLoader:
			# send the input to the device
			(x, y) = (x.to(device), y.to(device))
			# make the predictions and calculate the validation loss
			pred = model(x)
			totalValLoss += lossFn(pred, y)
			# calculate the number of correct predictions
			valCorrect += (pred.argmax(1) == y).type(torch.float).sum().item()

In [None]:
path_name = '../birdclef-2022-data/train_images/afrsil1/XC125458_0.npy'
spec = np.load(path_name)

print(spec.shape)
print(np.array_equal(spec[0], spec[1]))
print(spec)