In [1]:
import sys
import os
sys.path.append('../')

import torch

from torch.utils.data import Dataset, DataLoader
import torchaudio
import librosa
import pandas as pd
import numpy as np
from tqdm import tqdm

from glob import glob

from ml_base.model import BaselineBirdClassifier

In [2]:
TRAIN_DATA_PATH = os.path.realpath('../data/train_data_s3/')
MODEL_SAVE_PATH = os.path.realpath('../data/models')
VAL_FRAC = 0.1
BATCH_SIZE = 16
SAMPLE_LEN_SEC = 10
SAMPLE_RATE = 32000
EPOCHS_COUNT = 2
EVAL_EVERY_EPOCHS = 10

In [3]:
all_files = glob(os.path.join(TRAIN_DATA_PATH, '**/*.ogg'))

In [4]:
len(all_files)

1021

In [5]:
len(glob(os.path.join(TRAIN_DATA_PATH, '*')))

149

In [6]:
all_df = pd.DataFrame({'file_path': all_files})
all_df['class'] = all_df['file_path'].apply(lambda filepath: os.path.basename(os.path.dirname(filepath)))

In [7]:
CLASS2ID = {classname: i for i, classname in enumerate(all_df['class'].unique())}
ID2CLASS = {i: classname for classname, i in CLASS2ID.items()}

In [8]:
len(CLASS2ID)

149

In [9]:
all_df['class_id'] = all_df['class'].apply(CLASS2ID.get)

In [10]:
all_df

Unnamed: 0,file_path,class,class_id
0,E:\_UNIVER\UCU\2 sem\MLOps\bird-project\data\t...,asbfly,0
1,E:\_UNIVER\UCU\2 sem\MLOps\bird-project\data\t...,ashdro1,1
2,E:\_UNIVER\UCU\2 sem\MLOps\bird-project\data\t...,ashdro1,1
3,E:\_UNIVER\UCU\2 sem\MLOps\bird-project\data\t...,ashpri1,2
4,E:\_UNIVER\UCU\2 sem\MLOps\bird-project\data\t...,ashpri1,2
...,...,...,...
1016,E:\_UNIVER\UCU\2 sem\MLOps\bird-project\data\t...,zitcis1,148
1017,E:\_UNIVER\UCU\2 sem\MLOps\bird-project\data\t...,zitcis1,148
1018,E:\_UNIVER\UCU\2 sem\MLOps\bird-project\data\t...,zitcis1,148
1019,E:\_UNIVER\UCU\2 sem\MLOps\bird-project\data\t...,zitcis1,148


In [11]:
val_df = all_df.sample(int(VAL_FRAC * len(all_df)))
train_df = all_df.loc[~all_df.index.isin(val_df.index)]
len(train_df), len(val_df)

(919, 102)

In [12]:
class AudioDataset(Dataset):
    def __init__(self, paths, labels=None, sample_len=SAMPLE_LEN_SEC, sr=SAMPLE_RATE):
        assert labels is None or len(paths) == len(labels), "Data and targets should be of the same samples count"
        self.paths = paths
        self.labels = labels
        self.sample_len = sample_len
        self.sr = sr

    def __getitem__(self, i):
        audio, sr = librosa.load(self.paths[i], sr=self.sr)

        if self.sample_len is not None:
            desired_len = self.sample_len * sr
            if len(audio) >desired_len:
                audio = audio[:desired_len]
            else:
                audio =  np.pad(audio, (0, desired_len - len(audio)))

        if self.labels is not None:
            return audio, self.labels[i]
        else:
            return audio

    def __len__(self):
        return len(self.paths)

In [13]:
train_ds = AudioDataset(train_df['file_path'].tolist(), train_df['class_id'].tolist())
val_ds = AudioDataset(val_df['file_path'].tolist(), val_df['class_id'].tolist(), sample_len=None)

In [14]:
train_ds[3]

(array([-3.1401648e-06,  2.0947300e-06, -5.2232599e-06, ...,
         0.0000000e+00,  0.0000000e+00,  0.0000000e+00], dtype=float32),
 2)

In [15]:
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=1)

In [16]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

device(type='cuda')

In [17]:
model = BaselineBirdClassifier(len(CLASS2ID), sr=SAMPLE_RATE).to(device)

In [18]:
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.RAdam(model.parameters(), lr=1e-3)

In [19]:
batch_num = 0

min_eval_loss = np.inf
for epoch in tqdm(range(EPOCHS_COUNT), desc='Epoch'):
    running_loss = 0.
    last_loss = 0.

    for audios, labels in train_loader:
        audios = audios.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(audios)

        loss = loss_fn(outputs, labels)
        loss.backward()

        optimizer.step()

        running_loss += loss.item()
        if batch_num % EVAL_EVERY_EPOCHS == EVAL_EVERY_EPOCHS - 1:
            last_loss = running_loss / EVAL_EVERY_EPOCHS
            print(f'Batch {batch_num + 1}. Loss: {last_loss:.6f}.', end=' ')
            running_loss = 0.

            model.eval()
            eval_running_loss = 0.
            with torch.no_grad():
                for audios, labels in val_loader:
                    audios = audios.to(device)
                    labels = labels.to(device)

                    outputs = model(audios)
                    loss = loss_fn(outputs, labels)

                    eval_running_loss += loss.item()
            
            print(f'Val loss: {eval_running_loss/len(val_ds):.6f}.')

            if eval_running_loss < min_eval_loss:
                min_eval_loss = eval_running_loss
                print("Saving the model")

                torch.save(model.state_dict(), os.path.join(MODEL_SAVE_PATH, f'baseline-{len(CLASS2ID)}.pt'))

            model.train()
        batch_num += 1

Epoch:   0%|          | 0/2 [00:00<?, ?it/s]

Batch 10. Loss: 5.009478. Val loss: 5.011031.
Saving the model
Batch 20. Loss: 5.013801. Val loss: 5.010960.
Saving the model
Batch 30. Loss: 5.007778. Val loss: 5.010893.
Saving the model
Batch 40. Loss: 5.003049. Val loss: 5.010844.
Saving the model
Batch 50. Loss: 5.001632. Val loss: 5.010756.
Saving the model


Epoch:  50%|█████     | 1/2 [01:39<01:39, 99.31s/it]

Batch 60. Loss: 1.002049. Val loss: 5.010612.
Saving the model
Batch 70. Loss: 5.003828. Val loss: 5.010432.
Saving the model
Batch 80. Loss: 5.004921. Val loss: 5.010246.
Saving the model
Batch 90. Loss: 5.006346. Val loss: 5.009971.
Saving the model
Batch 100. Loss: 5.004671. Val loss: 5.009740.
Saving the model
Batch 110. Loss: 5.005181. Val loss: 5.009430.
Saving the model


Epoch: 100%|██████████| 2/2 [03:28<00:00, 104.19s/it]
