In [1]:
# run in kaggle to fetch repo

from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()

GITHUB_TOKEN = user_secrets.get_secret("GITHUB_MORSE_TOKEN")
USER = "SwedishSquid"
REPO_NAME = 'KC25_morse'
CLONE_URL = f"https://{USER}:{GITHUB_TOKEN}@github.com/{USER}/{REPO_NAME}.git"
get_ipython().system(f"git clone {CLONE_URL}")

import sys
sys.path.append("/kaggle/working/KC25_morse/src")

import morse

Cloning into 'KC25_morse'...
remote: Enumerating objects: 31, done.[K
remote: Counting objects: 100% (31/31), done.[K
remote: Compressing objects: 100% (26/26), done.[K
remote: Total 31 (delta 4), reused 28 (delta 1), pack-reused 0 (from 0)[K
Receiving objects: 100% (31/31), 4.75 MiB | 8.55 MiB/s, done.
Resolving deltas: 100% (4/4), done.


In [2]:
print('here')

here


In [3]:
!pip install Levenshtein

Collecting Levenshtein
  Downloading levenshtein-0.27.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)
Collecting rapidfuzz<4.0.0,>=3.9.0 (from Levenshtein)
  Downloading rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Downloading levenshtein-0.27.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (161 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m161.7/161.7 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m42.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: rapidfuzz, Levenshtein
Successfully installed Levenshtein-0.27.1 rapidfuzz-3.13.0


In [4]:
import torch
from torch import nn
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pathlib import Path
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import Levenshtein

from morse.models import MySomething
from morse.my_datasets import ListDataset, load_tensors, filenames_to_torch
from morse.samplers import LongCTCSampler
from morse.augmentations import rotation_transform
from morse.text_helpers import Vectorizer

In [5]:
import wandb
import os
from kaggle_secrets import UserSecretsClient

secret_value_0 = UserSecretsClient().get_secret('WANDB_API_KEY')
os.environ["WANDB_API_KEY"] = secret_value_0

common_wandb_kvals = {
    'project': 'KC25',
    'entity': 'fishwere',
}

# let there be no noise
os.environ["WANDB_SILENT"] = "true"

In [6]:
dev_flag = False

all_data_dir = '/kaggle/input/kc25-preprocessed-data'
labels_dir = '/kaggle/input/kc25-dataset-copy'

In [7]:
full_train_df = pd.read_csv(Path(labels_dir, 'train.csv'))
test_df = pd.read_csv(Path(labels_dir, 'test.csv'))
full_train_df.head()

Unnamed: 0,id,message
0,1.opus,03ЩУЫЛПИГХ
1,2.opus,ЪЛТ0ДС6А3Г
2,3.opus,5ЭКЫБЗХЯН
3,4.opus,ЖЫЦОИ68КФ
4,5.opus,32Ю7МЫ ЗЛ


In [8]:
index_to_letter = sorted(set(''.join(full_train_df['message'])))
pad_value = 0
print(index_to_letter)
letter_to_index = dict([(letter, i) for i, letter in enumerate(index_to_letter)])
dictionary_size = len(index_to_letter)
print(dictionary_size)
print(letter_to_index)

vectorizer = Vectorizer(letter_to_index, index_to_letter)
print(vectorizer.text_transform('ПРИВЕТ #'))

[' ', '#', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'А', 'Б', 'В', 'Г', 'Д', 'Е', 'Ж', 'З', 'И', 'Й', 'К', 'Л', 'М', 'Н', 'О', 'П', 'Р', 'С', 'Т', 'У', 'Ф', 'Х', 'Ц', 'Ч', 'Ш', 'Щ', 'Ъ', 'Ы', 'Ь', 'Э', 'Ю', 'Я']
44
{' ': 0, '#': 1, '0': 2, '1': 3, '2': 4, '3': 5, '4': 6, '5': 7, '6': 8, '7': 9, '8': 10, '9': 11, 'А': 12, 'Б': 13, 'В': 14, 'Г': 15, 'Д': 16, 'Е': 17, 'Ж': 18, 'З': 19, 'И': 20, 'Й': 21, 'К': 22, 'Л': 23, 'М': 24, 'Н': 25, 'О': 26, 'П': 27, 'Р': 28, 'С': 29, 'Т': 30, 'У': 31, 'Ф': 32, 'Х': 33, 'Ц': 34, 'Ч': 35, 'Ш': 36, 'Щ': 37, 'Ъ': 38, 'Ы': 39, 'Ь': 40, 'Э': 41, 'Ю': 42, 'Я': 43}
tensor([27, 28, 20, 14, 17, 30,  0,  1])


In [9]:
def load_data(data_dir):
    train_index, val_index = train_test_split(np.arange(full_train_df.shape[0]), test_size=1/6, shuffle=True, 
                                           random_state=42)
    if dev_flag:
        train_index = train_index[:1000]    # to save memory
        val_index = val_index[:1000]

    print(train_index.shape, val_index.shape)

    train_features = list(tqdm(load_tensors(data_dir, filenames_to_torch(list(full_train_df.iloc[train_index]['id'])))))
    val_features = list(tqdm(load_tensors(data_dir, filenames_to_torch(list(full_train_df.iloc[val_index]['id'])))))
    train_labels = list(full_train_df.iloc[train_index]['message'])
    val_labels = list(full_train_df.iloc[val_index]['message'])

    assert len(train_features) == len(train_labels)
    assert len(val_features) == len(val_labels)

    trainset = ListDataset(train_features, train_labels, transform=rotation_transform)
    valset = ListDataset(val_features, val_labels)

    return trainset, valset

    # print(len(trainset), len(valset))
    # plt.imshow(valset[0][0])
    # print(valset[0][1])

In [10]:
dataset_names = [
    'melspec_nfft1024_nc64',
    'melspec_nfft256_nc16',
    'melspec_nfft256_nc64',
    'melspec_nfft512_nc32',
    'melspec_nfft512_nc64',
]

dataset_input_sizes = [
    64,
    16,
    64,
    32,
    64,
]

datasets = [load_data(Path(all_data_dir, name)) for name in dataset_names]

(25000,) (5000,)


25000it [02:14, 185.87it/s]
5000it [00:26, 186.32it/s]


(25000,) (5000,)


25000it [02:10, 191.92it/s]
5000it [00:26, 188.67it/s]


(25000,) (5000,)


25000it [02:43, 152.77it/s]
5000it [00:33, 150.07it/s]


(25000,) (5000,)


25000it [02:12, 188.86it/s]
5000it [00:26, 190.39it/s]


(25000,) (5000,)


25000it [02:22, 174.87it/s]
5000it [00:27, 180.77it/s]


In [11]:
d_name_to_index = dict([(name, i) for i, name in enumerate(dataset_names)])
d_name_to_index

{'melspec_nfft1024_nc64': 0,
 'melspec_nfft256_nc16': 1,
 'melspec_nfft256_nc64': 2,
 'melspec_nfft512_nc32': 3,
 'melspec_nfft512_nc64': 4}

# train

In [12]:
device = 0 if torch.cuda.is_available() else 'cpu'
device

0

In [13]:
def batch_text_transform(texts):
    vecs, lengths = vectorizer.batch_text_transform(texts, pad_value=pad_value)
    return vecs + 1, lengths

In [14]:
def calculate_target_metric(valset, model):
    model.eval()
    with torch.no_grad():
        distance_buffer = []
        for features, labels in tqdm([valset[i] for i in range(250)]):
            features = features.to(device)
            outs = model(features[None]).squeeze().to('cpu')
            probs = F.softmax(outs, dim=0)
            seqs, likelihood = LongCTCSampler.sample(probs, beam_size=10)
            text = vectorizer.from_tensor(torch.tensor(seqs) - 1)
            decoded_message = text
            dist = Levenshtein.distance(decoded_message, labels)
            distance_buffer.append(dist)
        mean_dist = np.mean(distance_buffer)
    return mean_dist

In [15]:
import optuna
import time


def objective(trial: optuna.Trial):
    n_epochs = 30
    d_name = trial.suggest_categorical('dataset', dataset_names)
    n_pooled_blocks = trial.suggest_categorical('n_pooled_blocks', [3, 4, 5])
    n_head_blocks = trial.suggest_int('n_head_blocks', 0, 5)
    batch_size = 128
    lr = trial.suggest_categorical('lr', [1e-2, 5e-3, 1e-3, 5e-4])
    inner_size = trial.suggest_categorical('inner_size', [32, 64, 128])
    step_gamma = 0.33
    p_dropout = 0.15
    input_size = dataset_input_sizes[d_name_to_index[d_name]]

    group = 'FeatureSpaceSearch'
    run_name = None

    config = {
        'n_epochs': n_epochs,
        'batch_size': batch_size,
        'lr': lr,
        'n_pooled_blocks': n_pooled_blocks,
        'n_head_blocks': n_head_blocks,
        'inner_size': inner_size,
        'step_gamma': step_gamma,
        'p_dropout': p_dropout,
        'dataset': d_name,
    }

    model = MySomething(n_pooled_blocks = n_pooled_blocks, n_head_blocks = n_head_blocks,
                    input_size = input_size, inner_size = inner_size, output_size = dictionary_size + 1,
                    p_dropout = p_dropout).to(device)
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10, 20], gamma=step_gamma)
    ctc_loss = nn.CTCLoss()

    trainset, valset = datasets[d_name_to_index[d_name]]

    train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True)
    val_loader = torch.utils.data.DataLoader(valset, batch_size=batch_size, shuffle=False)

    train_start_time = time.perf_counter()
    final_loss = 100
    with wandb.init(
            **common_wandb_kvals,
            group=group,
            config=config,
            name=run_name,
            ) as run:
        for epoch in range(n_epochs):
            model.train()
            train_loss_buffer = []
            for features, labels in tqdm(train_loader):
                features = features.to(device)
                targets, target_lengths = batch_text_transform(labels)
                targets, target_lengths = targets.to(device), target_lengths.to(torch.int32).to(device)
                outs = model(features).transpose(0, 2).transpose(1, 2)
                inputs = F.log_softmax(outs, dim=2)
                input_lengths = torch.full(size=(inputs.shape[1],), fill_value=inputs.shape[0], dtype=torch.int32).to(device)
                loss = ctc_loss(inputs, targets, input_lengths, target_lengths)
                loss.backward()
                optimizer.step()
                optimizer.zero_grad()
                train_loss_buffer.append(loss.detach())
            scheduler.step()
            model.eval()
            test_loss_buffer = []
            with torch.no_grad():
                for features, labels in tqdm(val_loader):
                    features = features.to(device)
                    targets, target_lengths = batch_text_transform(labels)
                    targets, target_lengths = targets.to(device), target_lengths.to(torch.int32).to(device)
                    outs = model(features).transpose(0, 2).transpose(1, 2)
                    inputs = F.log_softmax(outs, dim=2)
                    input_lengths = torch.full(size=(inputs.shape[1],), fill_value=inputs.shape[0], dtype=torch.int32).to(device)
                    loss = ctc_loss(inputs, targets, input_lengths, target_lengths)
                    test_loss_buffer.append(loss.detach())
            train_loss_value = torch.mean(torch.stack(train_loss_buffer)).item()
            test_loss_value = torch.mean(torch.stack(test_loss_buffer)).item()
            final_loss = test_loss_value
            wandb.log({
                'train_loss': train_loss_value,
                'test_loss': test_loss_value,
                'lr': scheduler.get_last_lr()[0],
            })
        print('calculating target metric')
        target_metric = calculate_target_metric(valset, model)
        time_spent_on_train = time.perf_counter() - train_start_time
        wandb.log({
            'Levenshtein_distance': target_metric,
            # 'final_loss': final_loss,
            'mean_epoch_duration': time_spent_on_train / n_epochs,
        })
    return final_loss

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=20)

[I 2025-04-17 17:08:24,450] A new study created in memory with name: no-name-92770782-49e4-40a2-aae2-ad07b0939c71
100%|██████████| 196/196 [00:06<00:00, 31.36it/s]
100%|██████████| 40/40 [00:00<00:00, 52.52it/s]
100%|██████████| 196/196 [00:05<00:00, 37.01it/s]
100%|██████████| 40/40 [00:00<00:00, 68.77it/s]
100%|██████████| 196/196 [00:04<00:00, 41.96it/s]
100%|██████████| 40/40 [00:00<00:00, 79.96it/s]
100%|██████████| 196/196 [00:05<00:00, 36.82it/s]
100%|██████████| 40/40 [00:00<00:00, 76.44it/s]
100%|██████████| 196/196 [00:05<00:00, 37.50it/s]
100%|██████████| 40/40 [00:00<00:00, 62.69it/s]
100%|██████████| 196/196 [00:04<00:00, 42.60it/s]
100%|██████████| 40/40 [00:00<00:00, 70.69it/s]
100%|██████████| 196/196 [00:04<00:00, 42.29it/s]
100%|██████████| 40/40 [00:00<00:00, 75.93it/s]
100%|██████████| 196/196 [00:04<00:00, 41.64it/s]
100%|██████████| 40/40 [00:00<00:00, 76.44it/s]
100%|██████████| 196/196 [00:04<00:00, 41.62it/s]
100%|██████████| 40/40 [00:00<00:00, 68.58it/s]
100%

calculating target metric


100%|██████████| 250/250 [00:11<00:00, 22.51it/s]
[W 2025-04-17 17:11:25,345] Trial 0 failed with parameters: {'dataset': 'melspec_nfft512_nc64', 'n_pooled_blocks': 5, 'n_head_blocks': 1, 'lr': 0.01, 'inner_size': 32} because of the following error: The value nan is not acceptable.
[W 2025-04-17 17:11:25,346] Trial 0 failed with value nan.
100%|██████████| 196/196 [00:06<00:00, 30.51it/s]
100%|██████████| 40/40 [00:00<00:00, 72.29it/s]
100%|██████████| 196/196 [00:06<00:00, 30.68it/s]
100%|██████████| 40/40 [00:00<00:00, 73.31it/s]
100%|██████████| 196/196 [00:06<00:00, 30.65it/s]
100%|██████████| 40/40 [00:00<00:00, 73.22it/s]
100%|██████████| 196/196 [00:06<00:00, 30.49it/s]
100%|██████████| 40/40 [00:00<00:00, 72.30it/s]
100%|██████████| 196/196 [00:06<00:00, 30.63it/s]
100%|██████████| 40/40 [00:00<00:00, 72.60it/s]
100%|██████████| 196/196 [00:06<00:00, 30.65it/s]
100%|██████████| 40/40 [00:00<00:00, 73.62it/s]
100%|██████████| 196/196 [00:06<00:00, 30.64it/s]
100%|██████████| 40/

calculating target metric


100%|██████████| 250/250 [00:17<00:00, 14.05it/s]
[I 2025-04-17 17:15:15,504] Trial 1 finished with value: 0.3779948651790619 and parameters: {'dataset': 'melspec_nfft256_nc16', 'n_pooled_blocks': 5, 'n_head_blocks': 0, 'lr': 0.001, 'inner_size': 64}. Best is trial 1 with value: 0.3779948651790619.
100%|██████████| 196/196 [00:04<00:00, 47.81it/s]
100%|██████████| 40/40 [00:00<00:00, 96.81it/s]
100%|██████████| 196/196 [00:03<00:00, 49.59it/s]
100%|██████████| 40/40 [00:00<00:00, 98.56it/s]
100%|██████████| 196/196 [00:03<00:00, 49.67it/s]
100%|██████████| 40/40 [00:00<00:00, 98.62it/s]
100%|██████████| 196/196 [00:03<00:00, 49.36it/s]
100%|██████████| 40/40 [00:00<00:00, 94.43it/s]
100%|██████████| 196/196 [00:03<00:00, 49.11it/s]
100%|██████████| 40/40 [00:00<00:00, 98.06it/s]
100%|██████████| 196/196 [00:03<00:00, 49.39it/s]
100%|██████████| 40/40 [00:00<00:00, 96.94it/s]
100%|██████████| 196/196 [00:03<00:00, 49.55it/s]
100%|██████████| 40/40 [00:00<00:00, 98.54it/s]
100%|█████████

calculating target metric


100%|██████████| 250/250 [00:17<00:00, 14.07it/s]
[I 2025-04-17 17:17:48,482] Trial 2 finished with value: 0.2274440973997116 and parameters: {'dataset': 'melspec_nfft1024_nc64', 'n_pooled_blocks': 3, 'n_head_blocks': 0, 'lr': 0.005, 'inner_size': 128}. Best is trial 2 with value: 0.2274440973997116.
100%|██████████| 196/196 [00:04<00:00, 39.34it/s]
100%|██████████| 40/40 [00:00<00:00, 78.25it/s]
100%|██████████| 196/196 [00:04<00:00, 40.09it/s]
100%|██████████| 40/40 [00:00<00:00, 80.77it/s]
100%|██████████| 196/196 [00:04<00:00, 39.90it/s]
100%|██████████| 40/40 [00:00<00:00, 80.34it/s]
100%|██████████| 196/196 [00:04<00:00, 39.86it/s]
100%|██████████| 40/40 [00:00<00:00, 79.49it/s]
100%|██████████| 196/196 [00:04<00:00, 39.55it/s]
100%|██████████| 40/40 [00:00<00:00, 78.54it/s]
100%|██████████| 196/196 [00:04<00:00, 39.68it/s]
100%|██████████| 40/40 [00:00<00:00, 81.67it/s]
100%|██████████| 196/196 [00:04<00:00, 39.94it/s]
100%|██████████| 40/40 [00:00<00:00, 78.91it/s]
100%|███████

calculating target metric


100%|██████████| 250/250 [00:10<00:00, 22.77it/s]
[W 2025-04-17 17:20:46,530] Trial 3 failed with parameters: {'dataset': 'melspec_nfft512_nc64', 'n_pooled_blocks': 5, 'n_head_blocks': 2, 'lr': 0.0005, 'inner_size': 32} because of the following error: The value nan is not acceptable.
[W 2025-04-17 17:20:46,531] Trial 3 failed with value nan.
100%|██████████| 196/196 [00:05<00:00, 34.98it/s]
100%|██████████| 40/40 [00:00<00:00, 99.65it/s]
100%|██████████| 196/196 [00:05<00:00, 35.64it/s]
100%|██████████| 40/40 [00:00<00:00, 101.14it/s]
100%|██████████| 196/196 [00:05<00:00, 35.60it/s]
100%|██████████| 40/40 [00:00<00:00, 103.64it/s]
100%|██████████| 196/196 [00:05<00:00, 35.53it/s]
100%|██████████| 40/40 [00:00<00:00, 103.27it/s]
100%|██████████| 196/196 [00:05<00:00, 35.48it/s]
100%|██████████| 40/40 [00:00<00:00, 100.60it/s]
100%|██████████| 196/196 [00:05<00:00, 35.37it/s]
100%|██████████| 40/40 [00:00<00:00, 89.23it/s]
100%|██████████| 196/196 [00:05<00:00, 35.30it/s]
100%|█████████

calculating target metric


100%|██████████| 250/250 [00:04<00:00, 56.03it/s]
[W 2025-04-17 17:23:53,899] Trial 4 failed with parameters: {'dataset': 'melspec_nfft1024_nc64', 'n_pooled_blocks': 5, 'n_head_blocks': 4, 'lr': 0.005, 'inner_size': 64} because of the following error: The value nan is not acceptable.
[W 2025-04-17 17:23:53,900] Trial 4 failed with value nan.
100%|██████████| 196/196 [00:13<00:00, 14.76it/s]
100%|██████████| 40/40 [00:01<00:00, 34.54it/s]
100%|██████████| 196/196 [00:13<00:00, 15.06it/s]
100%|██████████| 40/40 [00:01<00:00, 34.87it/s]
100%|██████████| 196/196 [00:12<00:00, 15.10it/s]
100%|██████████| 40/40 [00:01<00:00, 35.09it/s]
100%|██████████| 196/196 [00:12<00:00, 15.11it/s]
100%|██████████| 40/40 [00:01<00:00, 34.77it/s]
100%|██████████| 196/196 [00:12<00:00, 15.11it/s]
100%|██████████| 40/40 [00:01<00:00, 32.94it/s]
100%|██████████| 196/196 [00:12<00:00, 15.11it/s]
100%|██████████| 40/40 [00:01<00:00, 34.76it/s]
100%|██████████| 196/196 [00:12<00:00, 15.11it/s]
100%|██████████| 4

calculating target metric


100%|██████████| 250/250 [01:05<00:00,  3.80it/s]
[I 2025-04-17 17:32:08,365] Trial 5 finished with value: 0.5061451196670532 and parameters: {'dataset': 'melspec_nfft256_nc64', 'n_pooled_blocks': 3, 'n_head_blocks': 2, 'lr': 0.0005, 'inner_size': 128}. Best is trial 2 with value: 0.2274440973997116.
100%|██████████| 196/196 [00:06<00:00, 29.51it/s]
100%|██████████| 40/40 [00:00<00:00, 71.29it/s]
100%|██████████| 196/196 [00:06<00:00, 29.57it/s]
100%|██████████| 40/40 [00:00<00:00, 71.44it/s]
100%|██████████| 196/196 [00:06<00:00, 29.53it/s]
100%|██████████| 40/40 [00:00<00:00, 71.70it/s]
100%|██████████| 196/196 [00:06<00:00, 29.59it/s]
100%|██████████| 40/40 [00:00<00:00, 71.48it/s]
100%|██████████| 196/196 [00:06<00:00, 29.45it/s]
100%|██████████| 40/40 [00:00<00:00, 71.89it/s]
100%|██████████| 196/196 [00:06<00:00, 29.54it/s]
100%|██████████| 40/40 [00:00<00:00, 71.35it/s]
100%|██████████| 196/196 [00:06<00:00, 29.60it/s]
100%|██████████| 40/40 [00:00<00:00, 71.04it/s]
100%|███████

calculating target metric


100%|██████████| 250/250 [00:17<00:00, 13.91it/s]
[I 2025-04-17 17:36:06,756] Trial 6 finished with value: 0.3459568917751312 and parameters: {'dataset': 'melspec_nfft256_nc16', 'n_pooled_blocks': 5, 'n_head_blocks': 2, 'lr': 0.001, 'inner_size': 64}. Best is trial 2 with value: 0.2274440973997116.
100%|██████████| 196/196 [00:11<00:00, 16.83it/s]
100%|██████████| 40/40 [00:00<00:00, 46.83it/s]
100%|██████████| 196/196 [00:11<00:00, 16.85it/s]
100%|██████████| 40/40 [00:00<00:00, 47.40it/s]
100%|██████████| 196/196 [00:11<00:00, 16.85it/s]
100%|██████████| 40/40 [00:00<00:00, 47.35it/s]
100%|██████████| 196/196 [00:11<00:00, 16.84it/s]
100%|██████████| 40/40 [00:00<00:00, 47.47it/s]
100%|██████████| 196/196 [00:11<00:00, 16.84it/s]
100%|██████████| 40/40 [00:00<00:00, 46.82it/s]
100%|██████████| 196/196 [00:11<00:00, 16.86it/s]
100%|██████████| 40/40 [00:00<00:00, 47.40it/s]
100%|██████████| 196/196 [00:11<00:00, 16.85it/s]
100%|██████████| 40/40 [00:00<00:00, 47.41it/s]
100%|█████████

calculating target metric


100%|██████████| 250/250 [00:17<00:00, 13.93it/s]
[I 2025-04-17 17:42:43,538] Trial 7 finished with value: 0.3047594726085663 and parameters: {'dataset': 'melspec_nfft256_nc16', 'n_pooled_blocks': 5, 'n_head_blocks': 3, 'lr': 0.0005, 'inner_size': 128}. Best is trial 2 with value: 0.2274440973997116.
100%|██████████| 196/196 [00:04<00:00, 45.19it/s]
100%|██████████| 40/40 [00:00<00:00, 92.81it/s]
100%|██████████| 196/196 [00:04<00:00, 45.26it/s]
100%|██████████| 40/40 [00:00<00:00, 94.32it/s]
100%|██████████| 196/196 [00:04<00:00, 45.53it/s]
100%|██████████| 40/40 [00:00<00:00, 95.67it/s]
100%|██████████| 196/196 [00:04<00:00, 45.37it/s]
100%|██████████| 40/40 [00:00<00:00, 95.34it/s]
100%|██████████| 196/196 [00:04<00:00, 45.52it/s]
100%|██████████| 40/40 [00:00<00:00, 95.22it/s]
100%|██████████| 196/196 [00:04<00:00, 45.36it/s]
100%|██████████| 40/40 [00:00<00:00, 96.03it/s]
100%|██████████| 196/196 [00:04<00:00, 45.05it/s]
100%|██████████| 40/40 [00:00<00:00, 94.76it/s]
100%|███████

calculating target metric


100%|██████████| 250/250 [00:18<00:00, 13.84it/s]
[I 2025-04-17 17:45:28,507] Trial 8 finished with value: 0.25941202044487 and parameters: {'dataset': 'melspec_nfft512_nc32', 'n_pooled_blocks': 4, 'n_head_blocks': 1, 'lr': 0.005, 'inner_size': 64}. Best is trial 2 with value: 0.2274440973997116.
100%|██████████| 196/196 [00:03<00:00, 51.64it/s]
100%|██████████| 40/40 [00:00<00:00, 97.73it/s]
100%|██████████| 196/196 [00:03<00:00, 52.01it/s]
100%|██████████| 40/40 [00:00<00:00, 97.74it/s]
100%|██████████| 196/196 [00:03<00:00, 51.87it/s]
100%|██████████| 40/40 [00:00<00:00, 96.80it/s]
100%|██████████| 196/196 [00:03<00:00, 52.07it/s]
100%|██████████| 40/40 [00:00<00:00, 97.36it/s]
100%|██████████| 196/196 [00:03<00:00, 52.01it/s]
100%|██████████| 40/40 [00:00<00:00, 97.40it/s]
100%|██████████| 196/196 [00:03<00:00, 51.64it/s]
100%|██████████| 40/40 [00:00<00:00, 96.30it/s]
100%|██████████| 196/196 [00:03<00:00, 51.49it/s]
100%|██████████| 40/40 [00:00<00:00, 97.34it/s]
100%|██████████|

calculating target metric


100%|██████████| 250/250 [00:33<00:00,  7.44it/s]
[I 2025-04-17 17:48:11,603] Trial 9 finished with value: 0.8871387839317322 and parameters: {'dataset': 'melspec_nfft512_nc32', 'n_pooled_blocks': 3, 'n_head_blocks': 0, 'lr': 0.0005, 'inner_size': 64}. Best is trial 2 with value: 0.2274440973997116.
100%|██████████| 196/196 [00:07<00:00, 26.31it/s]
100%|██████████| 40/40 [00:00<00:00, 63.56it/s]
100%|██████████| 196/196 [00:07<00:00, 26.32it/s]
100%|██████████| 40/40 [00:00<00:00, 64.03it/s]
100%|██████████| 196/196 [00:07<00:00, 26.20it/s]
100%|██████████| 40/40 [00:00<00:00, 64.30it/s]
100%|██████████| 196/196 [00:07<00:00, 26.22it/s]
100%|██████████| 40/40 [00:00<00:00, 63.17it/s]
100%|██████████| 196/196 [00:07<00:00, 26.23it/s]
100%|██████████| 40/40 [00:00<00:00, 63.67it/s]
100%|██████████| 196/196 [00:07<00:00, 26.23it/s]
100%|██████████| 40/40 [00:00<00:00, 63.56it/s]
100%|██████████| 196/196 [00:07<00:00, 26.17it/s]
100%|██████████| 40/40 [00:00<00:00, 63.46it/s]
100%|████████

calculating target metric


100%|██████████| 250/250 [01:06<00:00,  3.76it/s]
[I 2025-04-17 17:53:25,195] Trial 10 finished with value: 0.5841630101203918 and parameters: {'dataset': 'melspec_nfft256_nc16', 'n_pooled_blocks': 3, 'n_head_blocks': 5, 'lr': 0.0005, 'inner_size': 64}. Best is trial 2 with value: 0.2274440973997116.
100%|██████████| 196/196 [00:04<00:00, 42.37it/s]
100%|██████████| 40/40 [00:00<00:00, 73.50it/s]
100%|██████████| 196/196 [00:04<00:00, 43.08it/s]
100%|██████████| 40/40 [00:00<00:00, 74.84it/s]
100%|██████████| 196/196 [00:04<00:00, 42.40it/s]
100%|██████████| 40/40 [00:00<00:00, 73.43it/s]
100%|██████████| 196/196 [00:04<00:00, 42.87it/s]
100%|██████████| 40/40 [00:00<00:00, 74.39it/s]
100%|██████████| 196/196 [00:04<00:00, 42.07it/s]
100%|██████████| 40/40 [00:00<00:00, 61.63it/s]
100%|██████████| 196/196 [00:04<00:00, 42.08it/s]
100%|██████████| 40/40 [00:00<00:00, 71.63it/s]
100%|██████████| 196/196 [00:04<00:00, 43.02it/s]
100%|██████████| 40/40 [00:00<00:00, 74.12it/s]
100%|███████

calculating target metric


100%|██████████| 250/250 [00:34<00:00,  7.29it/s]
[I 2025-04-17 17:56:37,028] Trial 11 finished with value: 0.5723289251327515 and parameters: {'dataset': 'melspec_nfft512_nc64', 'n_pooled_blocks': 3, 'n_head_blocks': 0, 'lr': 0.001, 'inner_size': 64}. Best is trial 2 with value: 0.2274440973997116.
100%|██████████| 196/196 [00:04<00:00, 47.79it/s]
100%|██████████| 40/40 [00:00<00:00, 104.91it/s]
100%|██████████| 196/196 [00:04<00:00, 48.43it/s]
100%|██████████| 40/40 [00:00<00:00, 103.49it/s]
100%|██████████| 196/196 [00:04<00:00, 47.73it/s]
100%|██████████| 40/40 [00:00<00:00, 105.92it/s]
100%|██████████| 196/196 [00:04<00:00, 48.11it/s]
100%|██████████| 40/40 [00:00<00:00, 106.55it/s]
100%|██████████| 196/196 [00:04<00:00, 48.49it/s]
100%|██████████| 40/40 [00:00<00:00, 104.95it/s]
100%|██████████| 196/196 [00:04<00:00, 43.58it/s]
100%|██████████| 40/40 [00:00<00:00, 104.59it/s]
100%|██████████| 196/196 [00:04<00:00, 47.52it/s]
100%|██████████| 40/40 [00:00<00:00, 104.45it/s]
100%|█

calculating target metric


100%|██████████| 250/250 [00:34<00:00,  7.18it/s]
[I 2025-04-17 17:59:31,305] Trial 12 finished with value: 0.3015981614589691 and parameters: {'dataset': 'melspec_nfft512_nc32', 'n_pooled_blocks': 3, 'n_head_blocks': 5, 'lr': 0.01, 'inner_size': 32}. Best is trial 2 with value: 0.2274440973997116.
100%|██████████| 196/196 [00:04<00:00, 42.13it/s]
100%|██████████| 40/40 [00:00<00:00, 87.81it/s]
100%|██████████| 196/196 [00:04<00:00, 41.87it/s]
100%|██████████| 40/40 [00:00<00:00, 88.57it/s]
100%|██████████| 196/196 [00:04<00:00, 40.70it/s]
100%|██████████| 40/40 [00:00<00:00, 89.21it/s]
100%|██████████| 196/196 [00:04<00:00, 42.03it/s]
100%|██████████| 40/40 [00:00<00:00, 88.34it/s]
100%|██████████| 196/196 [00:04<00:00, 42.20it/s]
100%|██████████| 40/40 [00:00<00:00, 89.44it/s]
100%|██████████| 196/196 [00:04<00:00, 42.22it/s]
100%|██████████| 40/40 [00:00<00:00, 87.82it/s]
100%|██████████| 196/196 [00:04<00:00, 42.24it/s]
100%|██████████| 40/40 [00:00<00:00, 90.19it/s]
100%|█████████

calculating target metric


100%|██████████| 250/250 [00:10<00:00, 23.04it/s]
[W 2025-04-17 18:02:19,572] Trial 13 failed with parameters: {'dataset': 'melspec_nfft1024_nc64', 'n_pooled_blocks': 4, 'n_head_blocks': 3, 'lr': 0.005, 'inner_size': 128} because of the following error: The value nan is not acceptable.
[W 2025-04-17 18:02:19,573] Trial 13 failed with value nan.
100%|██████████| 196/196 [00:04<00:00, 40.81it/s]
100%|██████████| 40/40 [00:00<00:00, 87.24it/s]
100%|██████████| 196/196 [00:04<00:00, 41.76it/s]
100%|██████████| 40/40 [00:00<00:00, 89.11it/s]
100%|██████████| 196/196 [00:04<00:00, 41.93it/s]
100%|██████████| 40/40 [00:00<00:00, 86.75it/s]
100%|██████████| 196/196 [00:04<00:00, 41.85it/s]
100%|██████████| 40/40 [00:00<00:00, 88.30it/s]
100%|██████████| 196/196 [00:04<00:00, 42.29it/s]
100%|██████████| 40/40 [00:00<00:00, 89.19it/s]
100%|██████████| 196/196 [00:04<00:00, 41.89it/s]
100%|██████████| 40/40 [00:00<00:00, 87.19it/s]
100%|██████████| 196/196 [00:04<00:00, 39.97it/s]
100%|██████████

calculating target metric


100%|██████████| 250/250 [00:11<00:00, 21.79it/s]
[W 2025-04-17 18:05:09,447] Trial 14 failed with parameters: {'dataset': 'melspec_nfft1024_nc64', 'n_pooled_blocks': 4, 'n_head_blocks': 3, 'lr': 0.005, 'inner_size': 128} because of the following error: The value nan is not acceptable.
[W 2025-04-17 18:05:09,448] Trial 14 failed with value nan.
100%|██████████| 196/196 [00:04<00:00, 42.15it/s]
100%|██████████| 40/40 [00:00<00:00, 88.37it/s]
100%|██████████| 196/196 [00:04<00:00, 42.32it/s]
100%|██████████| 40/40 [00:00<00:00, 85.77it/s]
100%|██████████| 196/196 [00:04<00:00, 42.28it/s]
100%|██████████| 40/40 [00:00<00:00, 89.03it/s]
100%|██████████| 196/196 [00:04<00:00, 42.37it/s]
100%|██████████| 40/40 [00:00<00:00, 86.87it/s]
100%|██████████| 196/196 [00:04<00:00, 41.58it/s]
100%|██████████| 40/40 [00:00<00:00, 78.99it/s]
100%|██████████| 196/196 [00:04<00:00, 41.88it/s]
100%|██████████| 40/40 [00:00<00:00, 89.22it/s]
100%|██████████| 196/196 [00:04<00:00, 42.29it/s]
100%|██████████

calculating target metric


100%|██████████| 250/250 [00:10<00:00, 22.88it/s]
[W 2025-04-17 18:07:57,515] Trial 15 failed with parameters: {'dataset': 'melspec_nfft1024_nc64', 'n_pooled_blocks': 4, 'n_head_blocks': 3, 'lr': 0.005, 'inner_size': 128} because of the following error: The value nan is not acceptable.
[W 2025-04-17 18:07:57,516] Trial 15 failed with value nan.
100%|██████████| 196/196 [00:04<00:00, 40.25it/s]
100%|██████████| 40/40 [00:00<00:00, 89.40it/s]
100%|██████████| 196/196 [00:04<00:00, 40.57it/s]
100%|██████████| 40/40 [00:00<00:00, 87.03it/s]
100%|██████████| 196/196 [00:04<00:00, 40.33it/s]
100%|██████████| 40/40 [00:00<00:00, 85.31it/s]
100%|██████████| 196/196 [00:04<00:00, 40.64it/s]
100%|██████████| 40/40 [00:00<00:00, 86.99it/s]
100%|██████████| 196/196 [00:04<00:00, 40.53it/s]
100%|██████████| 40/40 [00:00<00:00, 86.70it/s]
100%|██████████| 196/196 [00:04<00:00, 40.69it/s]
100%|██████████| 40/40 [00:00<00:00, 87.79it/s]
100%|██████████| 196/196 [00:04<00:00, 40.51it/s]
100%|██████████

calculating target metric


100%|██████████| 250/250 [00:11<00:00, 22.70it/s]
[W 2025-04-17 18:10:51,503] Trial 16 failed with parameters: {'dataset': 'melspec_nfft1024_nc64', 'n_pooled_blocks': 4, 'n_head_blocks': 4, 'lr': 0.005, 'inner_size': 128} because of the following error: The value nan is not acceptable.
[W 2025-04-17 18:10:51,505] Trial 16 failed with value nan.
100%|██████████| 196/196 [00:04<00:00, 42.18it/s]
100%|██████████| 40/40 [00:00<00:00, 89.42it/s]
100%|██████████| 196/196 [00:04<00:00, 42.18it/s]
100%|██████████| 40/40 [00:00<00:00, 87.69it/s]
100%|██████████| 196/196 [00:04<00:00, 42.36it/s]
100%|██████████| 40/40 [00:00<00:00, 89.89it/s]
100%|██████████| 196/196 [00:04<00:00, 42.29it/s]
100%|██████████| 40/40 [00:00<00:00, 89.16it/s]
100%|██████████| 196/196 [00:04<00:00, 42.41it/s]
100%|██████████| 40/40 [00:00<00:00, 89.66it/s]
100%|██████████| 196/196 [00:04<00:00, 42.35it/s]
100%|██████████| 40/40 [00:00<00:00, 88.31it/s]
100%|██████████| 196/196 [00:04<00:00, 41.52it/s]
100%|██████████

calculating target metric


100%|██████████| 250/250 [00:11<00:00, 22.45it/s]
[W 2025-04-17 18:13:39,338] Trial 17 failed with parameters: {'dataset': 'melspec_nfft1024_nc64', 'n_pooled_blocks': 4, 'n_head_blocks': 3, 'lr': 0.005, 'inner_size': 128} because of the following error: The value nan is not acceptable.
[W 2025-04-17 18:13:39,339] Trial 17 failed with value nan.
100%|██████████| 196/196 [00:04<00:00, 41.95it/s]
100%|██████████| 40/40 [00:00<00:00, 89.40it/s]
100%|██████████| 196/196 [00:04<00:00, 42.35it/s]
100%|██████████| 40/40 [00:00<00:00, 86.52it/s]
100%|██████████| 196/196 [00:04<00:00, 42.39it/s]
100%|██████████| 40/40 [00:00<00:00, 88.23it/s]
100%|██████████| 196/196 [00:04<00:00, 42.21it/s]
100%|██████████| 40/40 [00:00<00:00, 86.40it/s]
100%|██████████| 196/196 [00:04<00:00, 41.89it/s]
100%|██████████| 40/40 [00:00<00:00, 88.94it/s]
100%|██████████| 196/196 [00:04<00:00, 42.05it/s]
100%|██████████| 40/40 [00:00<00:00, 88.25it/s]
100%|██████████| 196/196 [00:04<00:00, 42.32it/s]
100%|██████████

calculating target metric


100%|██████████| 250/250 [00:10<00:00, 22.86it/s]
[W 2025-04-17 18:16:27,079] Trial 18 failed with parameters: {'dataset': 'melspec_nfft1024_nc64', 'n_pooled_blocks': 4, 'n_head_blocks': 3, 'lr': 0.005, 'inner_size': 128} because of the following error: The value nan is not acceptable.
[W 2025-04-17 18:16:27,080] Trial 18 failed with value nan.
100%|██████████| 196/196 [00:04<00:00, 42.08it/s]
100%|██████████| 40/40 [00:00<00:00, 89.33it/s]
100%|██████████| 196/196 [00:04<00:00, 42.32it/s]
100%|██████████| 40/40 [00:00<00:00, 89.39it/s]
100%|██████████| 196/196 [00:04<00:00, 41.74it/s]
100%|██████████| 40/40 [00:00<00:00, 89.39it/s]
100%|██████████| 196/196 [00:04<00:00, 42.17it/s]
100%|██████████| 40/40 [00:00<00:00, 88.69it/s]
100%|██████████| 196/196 [00:04<00:00, 42.14it/s]
100%|██████████| 40/40 [00:00<00:00, 90.29it/s]
100%|██████████| 196/196 [00:04<00:00, 42.27it/s]
100%|██████████| 40/40 [00:00<00:00, 87.97it/s]
100%|██████████| 196/196 [00:04<00:00, 42.28it/s]
100%|██████████

calculating target metric


100%|██████████| 250/250 [00:10<00:00, 22.96it/s]
[W 2025-04-17 18:19:15,091] Trial 19 failed with parameters: {'dataset': 'melspec_nfft1024_nc64', 'n_pooled_blocks': 4, 'n_head_blocks': 3, 'lr': 0.005, 'inner_size': 128} because of the following error: The value nan is not acceptable.
[W 2025-04-17 18:19:15,092] Trial 19 failed with value nan.


In [16]:
print('hurray')

hurray
