In [1]:
import mlflow

In [2]:
mlflow.login("databricks")

2024/06/09 14:34:29 INFO mlflow.utils.credentials: No valid Databricks credentials found, please enter your credentials...
2024/06/09 14:34:44 INFO mlflow.utils.credentials: Successfully connected to MLflow hosted tracking server! Host: https://community.cloud.databricks.com.


In [4]:
# mlflow.set_experiment("/Users/shevtsov.pn@ucu.edu.ua/check-databricks-ce-connection")

# with mlflow.start_run():

#     mlflow.log_metric("foo", 1)

#     mlflow.log_metric("bar", 2)

In [5]:
import sys
import os
sys.path.append('../')

import torch
import onnx
import librosa
import pandas as pd
import numpy as np

from glob import glob
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
from time import time
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score

from ml_base.model import BaselineBirdClassifier

In [6]:
TRAIN_DATA_PATH = os.path.realpath('../data/train_data_s3/')
MODEL_SAVE_PATH = os.path.realpath('../data/models')
VAL_FRAC = 0.1
BATCH_SIZE = 16
SAMPLE_LEN_SEC = 10
SAMPLE_RATE = 32000
EPOCHS_COUNT = 2
EVAL_EVERY_STEPS = 20
LEARNING_RATE = 1e-3

In [7]:
all_files = glob(os.path.join(TRAIN_DATA_PATH, '**/*.ogg'))

all_df = pd.DataFrame({'file_path': all_files})
all_df['class'] = all_df['file_path'].apply(lambda filepath: os.path.basename(os.path.dirname(filepath)))

CLASS2ID = {classname: i for i, classname in enumerate(all_df['class'].unique())}
ID2CLASS = {i: classname for classname, i in CLASS2ID.items()}

all_df['class_id'] = all_df['class'].apply(CLASS2ID.get)

val_df = all_df.sample(int(VAL_FRAC * len(all_df)))
train_df = all_df.loc[~all_df.index.isin(val_df.index)]

In [8]:
class AudioDataset(Dataset):
    def __init__(self, paths, labels=None, sample_len=SAMPLE_LEN_SEC, sr=SAMPLE_RATE):
        assert labels is None or len(paths) == len(labels), "Data and targets should be of the same samples count"
        self.paths = paths
        self.labels = labels
        self.sample_len = sample_len
        self.sr = sr

    def __getitem__(self, i):
        audio, sr = librosa.load(self.paths[i], sr=self.sr)

        if self.sample_len is not None:
            desired_len = self.sample_len * sr
            if len(audio) >desired_len:
                audio = audio[:desired_len]
            else:
                audio =  np.pad(audio, (0, desired_len - len(audio)))

        if self.labels is not None:
            return audio, self.labels[i]
        else:
            return audio

    def __len__(self):
        return len(self.paths)

In [9]:
train_ds = AudioDataset(train_df['file_path'].tolist(), train_df['class_id'].tolist())
val_ds = AudioDataset(val_df['file_path'].tolist(), val_df['class_id'].tolist(), sample_len=None)

In [10]:
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=1)

In [10]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

model = BaselineBirdClassifier(len(CLASS2ID), sr=SAMPLE_RATE).to(device)

loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.RAdam(model.parameters(), lr=LEARNING_RATE)

STFT kernels created, time used = 0.0250 seconds


In [12]:
os.environ['AWS_ACCESS_KEY_ID'] = ''
os.environ['AWS_SECRET_ACCESS_KEY'] = ''

artifact_uri = f"s3://bird-project-artifacts/test2"

mlflow.create_experiment("/Users/shevtsov.pn@ucu.edu.ua/test3", artifact_location=artifact_uri)

RestException: RESOURCE_ALREADY_EXISTS: Node named 'test3' already exists

In [76]:
def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    return np.exp(x) / np.sum(np.exp(x), axis=1, keepdims=True)

In [77]:
mlflow.set_experiment("/Users/shevtsov.pn@ucu.edu.ua/test2")

with mlflow.start_run() as run:

    mlflow.log_params({
        "learning_rate": LEARNING_RATE,
        "batch_size": BATCH_SIZE,
        "epochs_count": EPOCHS_COUNT,
        "sample_rate": SAMPLE_RATE
    })

    batch_num = 0

    min_eval_loss = np.inf
    corresp_train_loss = np.inf
    best_loss_metrics = None

    training_start_time = time()

    for epoch in tqdm(range(EPOCHS_COUNT), desc='Epoch'):
        running_loss = 0.
        last_loss = 0.

        for audios, labels in train_loader:
            audios = audios.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            outputs = model(audios)

            loss = loss_fn(outputs, labels)
            loss.backward()

            optimizer.step()

            running_loss += loss.item()
            if batch_num % EVAL_EVERY_STEPS == EVAL_EVERY_STEPS - 1:
                last_loss = running_loss / EVAL_EVERY_STEPS
                print(f'Batch {batch_num + 1}. Loss: {last_loss:.6f}.', end=' ')
                running_loss = 0.

                model.eval()
                eval_running_loss = 0.
                outputs_list = []
                labels_list = []
                with torch.no_grad():
                    for audios, labels in val_loader:
                        audios = audios.to(device)
                        labels = labels.to(device)

                        outputs = model(audios)
                        loss = loss_fn(outputs, labels)

                        eval_running_loss += loss.item()
                        outputs_list.append(outputs.cpu().numpy())
                        labels_list.append(labels.cpu().numpy())
                
                eval_running_loss = eval_running_loss/len(val_ds)

                print(f'Val loss: {eval_running_loss:.6f}.')                

                if eval_running_loss < min_eval_loss:
                    min_eval_loss = eval_running_loss
                    corresp_train_loss = last_loss
                    print("Saving the model")

                    outputs = np.concatenate(outputs_list, axis=0)
                    labels = np.concatenate(labels_list, axis=0)

                    accuracy = accuracy_score(labels, outputs.argmax(axis=1))
                    f1 = f1_score(labels, outputs.argmax(axis=1), average='macro', zero_division=1)
                    prec = precision_score(labels, outputs.argmax(axis=1), average='macro', zero_division=1)
                    rec = recall_score(labels, outputs.argmax(axis=1), average='macro', zero_division=1)
                    
                    best_loss_metrics = {
                        "accuracy": accuracy,
                        "macro_f1": f1,
                        "macro_precision": prec,
                        "macro_recall": rec,
                    }

                    torch.save(model.state_dict(), os.path.join(MODEL_SAVE_PATH, f'baseline-{len(CLASS2ID)}.pt'))

                model.train()
            batch_num += 1

    mlflow.log_metric("train_time_sec", time() - training_start_time)
    mlflow.log_metric("min_val_loss", min_eval_loss)
    mlflow.log_metric("train_loss", last_loss)
    mlflow.log_metrics(best_loss_metrics)

    print("Exporting to ONNX")

    model.load_state_dict(torch.load(os.path.join(MODEL_SAVE_PATH, f'baseline-{len(CLASS2ID)}.pt'), map_location=torch.device('cpu')))
    model.eval()

    torch_input = torch.randn(8, SAMPLE_RATE*SAMPLE_LEN_SEC)
    torch.onnx.export(model.cpu(),
                    torch_input,
                    os.path.join(MODEL_SAVE_PATH, f'baseline-{len(CLASS2ID)}.onnx'),
                    export_params=True,
                    do_constant_folding=True,
                    input_names = ['input'],
                    output_names = ['output'],
                    dynamic_axes={'input' : {0: 'batch_size', 1: 'sample_length'},
                                'output' : {0: 'batch_size'}}
    )

    print("ONNX export finished")

    onnx_model = onnx.load(os.path.join(MODEL_SAVE_PATH, f'baseline-{len(CLASS2ID)}.onnx'))
    onnx.checker.check_model(onnx_model)

    print("ONNX model checked")

    mlflow.log_artifact(os.path.join(MODEL_SAVE_PATH, f'baseline-{len(CLASS2ID)}.onnx'))


Epoch:   0%|          | 0/2 [00:00<?, ?it/s]

Batch 20. Loss: 5.003870. Val loss: 5.005724.
Saving the model
labels=array([120,  38,  46,   4,  61,   2,  16,  54,  72,  36, 115,  61,  68,
         8,  46,  86,   6,  51,  90,  63,  13,  17, 148,  67,  68, 120,
       111,  17,  61,  67, 148,   8, 113,  21,  67,  63, 123, 108,  91,
        93,  39, 107,  91,  62,  78, 134,  34,  59,  70,  48, 118,  36,
        25,  72, 123,  90,  90,  48,  93,  34,  34,  44, 100,  13,  70,
        72,  12, 113,  33,  64,  67,  95, 113,  33,  64,  64,   5, 148,
        13,  51,  39,  68,  86,  35, 119, 121,  56,  68,  71,  85,  91,
       121, 119,  32,   4,  54,  72, 120,  12, 146,  46,  46], dtype=int64), softmax(outputs)[0]=array([0.00636793, 0.00658233, 0.00667149, 0.00631394, 0.00666545,
       0.00657114, 0.00694311, 0.00626425, 0.00706255, 0.0070194 ,
       0.00714848, 0.00671908, 0.00687607, 0.0069673 , 0.00689273,
       0.0069852 , 0.0066798 , 0.00702383, 0.0065198 , 0.00641801,
       0.00690161, 0.00700131, 0.00659009, 0.0069406 , 0.0068

Epoch:  50%|█████     | 1/2 [00:54<00:54, 54.03s/it]

Batch 60. Loss: 0.500447. Val loss: 5.004598.
Saving the model
labels=array([120,  38,  46,   4,  61,   2,  16,  54,  72,  36, 115,  61,  68,
         8,  46,  86,   6,  51,  90,  63,  13,  17, 148,  67,  68, 120,
       111,  17,  61,  67, 148,   8, 113,  21,  67,  63, 123, 108,  91,
        93,  39, 107,  91,  62,  78, 134,  34,  59,  70,  48, 118,  36,
        25,  72, 123,  90,  90,  48,  93,  34,  34,  44, 100,  13,  70,
        72,  12, 113,  33,  64,  67,  95, 113,  33,  64,  64,   5, 148,
        13,  51,  39,  68,  86,  35, 119, 121,  56,  68,  71,  85,  91,
       121, 119,  32,   4,  54,  72, 120,  12, 146,  46,  46], dtype=int64), softmax(outputs)[0]=array([0.00636045, 0.00657959, 0.00666862, 0.00630538, 0.00667165,
       0.00655599, 0.00693749, 0.00624224, 0.00705813, 0.00701659,
       0.00715256, 0.00672537, 0.00687988, 0.00696962, 0.00687528,
       0.00697037, 0.00669463, 0.0070204 , 0.0065125 , 0.00640585,
       0.0068904 , 0.00697492, 0.00660059, 0.00694855, 0.0068

Epoch: 100%|██████████| 2/2 [01:59<00:00, 59.88s/it]


Exporting to ONNX


  if self.num_samples < self.pad_amount:
  if return_spec:
  _C._jit_pass_onnx_node_shape_type_inference(node, params_dict, opset_version)
  _C._jit_pass_onnx_graph_shape_type_inference(
  _C._jit_pass_onnx_graph_shape_type_inference(


ONNX export finished
ONNX model checked
