- Setup the environment

In [1]:
import sys
import os
cur_dir = os.getcwd()
# Add the current directory to system path
sys.path.append(cur_dir)

- Load input data

Create sample data

In [2]:
import pandas as pd
import numpy as np
# Prepare sample data
timestamp = pd.date_range(start='2020-01-01', periods=300, freq='D')
time_series = pd.DataFrame({'values': np.random.randn(300)}, index=timestamp)
labels = pd.DataFrame({'label': np.random.randint(0, 3, size=300)}, index=timestamp)
ext_features = pd.DataFrame({
    'feature1': np.random.randn(300),
    'feature2': np.random.randn(300)
}, index=timestamp)

combined_data = time_series.join(labels).join(ext_features)

Train / Validation / Test split

In [3]:
# Train / Validation / Test Split
train_data = combined_data[:100]
val_data = combined_data[100:200]
test_data = combined_data[200:]

# Read targets
train_target = train_data.label.values
val_target = val_data.label.values
test_target = test_data.label.values

# Read features
train_features = train_data[['feature1', 'feature2']].values
val_features = val_data[['feature1', 'feature2']].values
test_features = test_data[['feature1', 'feature2']].values

In [4]:
from pl_model_utils import TimeSeriesDataModule
# Instantiate data module and model
data_module = TimeSeriesDataModule(
    train_target, train_features,
    val_target, val_features,
    test_target, test_features,
    batch_size=16
)

# Setup the data for model
data_module.setup()

  from .autonotebook import tqdm as notebook_tqdm


## Classification with Basic Autoencoder

- Compile simple Autoencoder for Classification

In [5]:
from pl_autoencoder_classifiers import AutoencoderClassifier
ae_model = AutoencoderClassifier(context_length=1, num_classes=3, num_features=2)

In [6]:
from pytorch_lightning.callbacks import ModelCheckpoint
# Define a checkpoint callback to save the best model
checkpoint_callback = ModelCheckpoint(
    monitor='train_loss',
    mode='min',
    save_top_k=1,
    save_last=True,
)

- Train the Autoencoder

In [7]:
from pytorch_lightning import Trainer
# Train the model
trainer = Trainer(max_epochs=20, callbacks=[checkpoint_callback])
trainer.fit(ae_model, data_module.train_dataloader())

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type       | Params
------------------------------------------
0 | encoder    | Sequential | 8.8 K 
1 | decoder    | Sequential | 8.4 K 
2 | classifier | Sequential | 2.2 K 
3 | softmax    | Softmax    | 0     
------------------------------------------
19.4 K    Trainable params
0         Non-trainable params
19.4 K    Total params
0.078     Total estimated model params size (MB)
  rank_zero_warn(
  rank_zero_warn(


Epoch 19: 100%|██████████| 7/7 [00:00<00:00, 80.14it/s, v_num=41] 

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 7/7 [00:00<00:00, 62.01it/s, v_num=41]


- Cross-Validation

In [8]:
from pl_model_utils import cross_validate_model
# Perform cross-validation
cv_results = cross_validate_model(data_module.val_features, data_module.val_target, AutoencoderClassifier, context_length=1, num_classes=3, num_features=2)

TypeError: __init__() got an unexpected keyword argument 'num_heads'

- Predictions with MC Dropout Uncertainty

In [17]:
from pl_model_utils import mc_dropout_predictions
# Perform MC Dropout predictions
mc_predictions = mc_dropout_predictions(ae_model, data_module.test_dataloader())
# Calculate mean and standard deviation for uncertainty estimates
mean_predictions = mc_predictions.mean(axis=0)
std_predictions = mc_predictions.std(axis=0)
# Convert mean predictions to class labels
predicted_labels = np.argmax(mean_predictions, axis=1)

## Classification with Attention-Enchanced Autoencoder

## Hyperparameter Tuning

In [9]:
from pytorch_lightning.callbacks import EarlyStopping
from pl_autoencoder_classifiers import AutoencoderAttentionClassifier
from pl_model_utils import TimeSeriesDataset
import pytorch_lightning as pl
import torch
from sklearn.model_selection import TimeSeriesSplit
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import log_loss
import optuna
import numpy as np

def ae_attention_objective(trial):
    context_length = 1
    num_classes = 3
    num_features = 2

    # Suggest hyperparameters
    lr = trial.suggest_categorical('lr', [1e-5, 1e-3, 1e-2])
    num_heads = trial.suggest_categorical('num_heads', [1, 2, 4])
    dropout_prob = trial.suggest_categorical('dropout_prob', [0.1, 0.3, 0.5])
    hidden_units = trial.suggest_categorical('hidden_units', [64, 128, 256])
    embed_dim = trial.suggest_categorical('embed_dim', [32, 64, 128])
    classifier_units = trial.suggest_categorical('classifier_units', [16, 32, 64])
    batch_size = trial.suggest_categorical('batch_size', [16, 32, 64])

    # Initialize the model with suggested hyperparameters
    model = AutoencoderAttentionClassifier(
        context_length=context_length,
        num_classes=num_classes,
        num_features=num_features,
        num_heads=num_heads,
        dropout_prob=dropout_prob,
        hidden_units=hidden_units,
        embed_dim=embed_dim,
        classifier_units=classifier_units,
        lr=lr
    )

    # Assuming you have your dataset in `X` and `y`
    X, y = data_module.val_features, data_module.val_target
    # X = np.array(X_train)  # Ensure X_train is a NumPy array
    # y = np.array(y_train)  # Ensure y_train is a NumPy array

    # Time series split
    tscv = TimeSeriesSplit(n_splits=5)
    cv_scores = []

    for train_index, val_index in tscv.split(X):
        X_train_fold, X_val_fold = X[train_index], X[val_index]
        y_train_fold, y_val_fold = y[train_index], y[val_index]

        # Create DataLoader for the training and validation fold
        train_dataset = TimeSeriesDataset(
            torch.tensor(y_train_fold, dtype=torch.float32), 
            torch.tensor(X_train_fold, dtype=torch.float32)
            )
        
        val_dataset = TimeSeriesDataset(
            torch.tensor(y_val_fold, dtype=torch.float32), 
            torch.tensor(X_val_fold, dtype=torch.float32)
            )
        
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

        # Initialize trainer
        trainer = pl.Trainer(
            max_epochs=10,
            callbacks=[EarlyStopping(monitor='train_loss', patience=3, mode='min')],
            logger=False,
            enable_checkpointing=False
        )

        # Train the model
        trainer.fit(model, train_loader)

        # Validate the model

        model.eval()
        all_preds = []
        all_targets = []
        with torch.no_grad():
            for batch in val_loader:
                targets, features = batch
                _, classification = model(targets, features)
                # preds = torch.argmax(classification, dim=1)
                all_preds.extend(classification.cpu().numpy())
                all_targets.extend(targets.cpu().numpy())

        # val_predictions = trainer.predict(model, val_loader)
        # val_predictions = torch.cat([x for x in val_predictions], dim=0).numpy()
        
        val_loss = log_loss(all_targets, all_preds)
        cv_scores.append(val_loss)

    return np.mean(cv_scores)

In [10]:
# Run the Optuna study
study = optuna.create_study(direction='minimize')
study.optimize(ae_attention_objective, n_trials=100)

[I 2024-07-17 17:46:29,096] A new study created in memory with name: no-name-df67b1ea-18ca-4025-a4de-62baeb978f75
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 1.1 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
52.1 K    Trainable params
0         Non-trainable params
52.1 K    Total params
0.208     Total estimated model params size (MB)


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 65.25it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 54.47it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 1.1 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
52.1 K    Trainable params
0         Non-trainable params
52.1 K    Total params
0.208     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 6: 100%|██████████| 2/2 [00:00<00:00, 49.36it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 1.1 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
52.1 K    Trainable params
0         Non-trainable params
52.1 K    Total params
0.208     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 48.06it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 43.40it/s]

GPU available: False, used: False





TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 1.1 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
52.1 K    Trainable params
0         Non-trainable params
52.1 K    Total params
0.208     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 5: 100%|██████████| 3/3 [00:00<00:00, 55.92it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 1.1 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
52.1 K    Trainable params
0         Non-trainable params
52.1 K    Total params
0.208     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 4: 100%|██████████| 3/3 [00:00<00:00, 51.84it/s]

[I 2024-07-17 17:46:31,215] Trial 0 finished with value: 3.825281085309706 and parameters: {'lr': 0.01, 'num_heads': 4, 'dropout_prob': 0.3, 'hidden_units': 256, 'embed_dim': 64, 'classifier_units': 16, 'batch_size': 32}. Best is trial 0 with value: 3.825281085309706.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 8.8 K 
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 8.4 K 
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
38.2 K    Trainable params
0         Non-trainable params
38.2 K    Total params
0.153     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 6: 100%|██████████| 2/2 [00:00<00:00, 65.83it/s] 


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 8.8 K 
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 8.4 K 
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
38.2 K    Trainable params
0         Non-trainable params
38.2 K    Total params
0.153     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 3/3 [00:00<00:00, 74.01it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 8.8 K 
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 8.4 K 
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
38.2 K    Trainable params
0         Non-trainable params
38.2 K    Total params
0.153     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 4/4 [00:00<00:00, 63.32it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 4/4 [00:00<00:00, 57.33it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 8.8 K 
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 8.4 K 
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
38.2 K    Trainable params
0         Non-trainable params
38.2 K    Total params
0.153     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 5/5 [00:00<00:00, 59.84it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 8.8 K 
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 8.4 K 
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
38.2 K    Trainable params
0         Non-trainable params
38.2 K    Total params
0.153     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 5: 100%|██████████| 6/6 [00:00<00:00, 77.61it/s]


[I 2024-07-17 17:46:33,873] Trial 1 finished with value: 1.1083560701727004 and parameters: {'lr': 1e-05, 'num_heads': 2, 'dropout_prob': 0.3, 'hidden_units': 128, 'embed_dim': 64, 'classifier_units': 64, 'batch_size': 16}. Best is trial 1 with value: 1.1083560701727004.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
53.2 K    Trainable params
0         Non-trainable params
53.2 K    Total params
0.213     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 64.64it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 52.65it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
53.2 K    Trainable params
0         Non-trainable params
53.2 K    Total params
0.213     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 64.71it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 56.73it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
53.2 K    Trainable params
0         Non-trainable params
53.2 K    Total params
0.213     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 79.05it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 69.21it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
53.2 K    Trainable params
0         Non-trainable params
53.2 K    Total params
0.213     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 7: 100%|██████████| 3/3 [00:00<00:00, 62.99it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
53.2 K    Trainable params
0         Non-trainable params
53.2 K    Total params
0.213     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 73.89it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 68.50it/s]

[I 2024-07-17 17:46:36,150] Trial 2 finished with value: 0.2628563712940483 and parameters: {'lr': 0.001, 'num_heads': 4, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 64, 'classifier_units': 32, 'batch_size': 32}. Best is trial 2 with value: 0.2628563712940483.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.0 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 16.6 K
3 | classifier | Sequential         | 4.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
103 K     Trainable params
0         Non-trainable params
103 K     Total params
0.416     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 70.31it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 61.66it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.0 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 16.6 K
3 | classifier | Sequential         | 4.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
103 K     Trainable params
0         Non-trainable params
103 K     Total params
0.416     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 66.62it/s] 

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 59.05it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.0 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 16.6 K
3 | classifier | Sequential         | 4.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
103 K     Trainable params
0         Non-trainable params
103 K     Total params
0.416     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 4/4 [00:00<00:00, 65.95it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.0 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 16.6 K
3 | classifier | Sequential         | 4.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
103 K     Trainable params
0         Non-trainable params
103 K     Total params
0.416     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 5/5 [00:00<00:00, 70.06it/s] 

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 5/5 [00:00<00:00, 65.86it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.0 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 16.6 K
3 | classifier | Sequential         | 4.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
103 K     Trainable params
0         Non-trainable params
103 K     Total params
0.416     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 7: 100%|██████████| 6/6 [00:00<00:00, 73.24it/s]


[I 2024-07-17 17:46:39,278] Trial 3 finished with value: 0.49741824650347377 and parameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.5, 'hidden_units': 128, 'embed_dim': 128, 'classifier_units': 32, 'batch_size': 16}. Best is trial 2 with value: 0.2628563712940483.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 8.8 K 
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 8.4 K 
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
36.0 K    Trainable params
0         Non-trainable params
36.0 K    Total params
0.144     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 5: 100%|██████████| 1/1 [00:00<00:00, 51.87it/s]

GPU available: False, used: False





TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 8.8 K 
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 8.4 K 
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
36.0 K    Trainable params
0         Non-trainable params
36.0 K    Total params
0.144     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 68.19it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 58.25it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 8.8 K 
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 8.4 K 
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
36.0 K    Trainable params
0         Non-trainable params
36.0 K    Total params
0.144     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 70.11it/s] 

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 64.39it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 8.8 K 
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 8.4 K 
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
36.0 K    Trainable params
0         Non-trainable params
36.0 K    Total params
0.144     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 5: 100%|██████████| 3/3 [00:00<00:00, 71.45it/s] 


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 8.8 K 
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 8.4 K 
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
36.0 K    Trainable params
0         Non-trainable params
36.0 K    Total params
0.144     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 71.51it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 64.40it/s]

[I 2024-07-17 17:46:41,256] Trial 4 finished with value: 0.6473592468628939 and parameters: {'lr': 0.001, 'num_heads': 4, 'dropout_prob': 0.5, 'hidden_units': 128, 'embed_dim': 64, 'classifier_units': 32, 'batch_size': 32}. Best is trial 2 with value: 0.2628563712940483.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 4.6 K 
1 | attention  | MultiheadAttention | 4.2 K 
2 | decoder    | Sequential         | 4.4 K 
3 | classifier | Sequential         | 579   
4 | softmax    | Softmax            | 0     
--------------------------------------------------
13.8 K    Trainable params
0         Non-trainable params
13.8 K    Total params
0.055     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 80.99it/s] 

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 69.10it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 4.6 K 
1 | attention  | MultiheadAttention | 4.2 K 
2 | decoder    | Sequential         | 4.4 K 
3 | classifier | Sequential         | 579   
4 | softmax    | Softmax            | 0     
--------------------------------------------------
13.8 K    Trainable params
0         Non-trainable params
13.8 K    Total params
0.055     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 5: 100%|██████████| 3/3 [00:00<00:00, 86.61it/s] 


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 4.6 K 
1 | attention  | MultiheadAttention | 4.2 K 
2 | decoder    | Sequential         | 4.4 K 
3 | classifier | Sequential         | 579   
4 | softmax    | Softmax            | 0     
--------------------------------------------------
13.8 K    Trainable params
0         Non-trainable params
13.8 K    Total params
0.055     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 6: 100%|██████████| 4/4 [00:00<00:00, 84.72it/s] 


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 4.6 K 
1 | attention  | MultiheadAttention | 4.2 K 
2 | decoder    | Sequential         | 4.4 K 
3 | classifier | Sequential         | 579   
4 | softmax    | Softmax            | 0     
--------------------------------------------------
13.8 K    Trainable params
0         Non-trainable params
13.8 K    Total params
0.055     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 5/5 [00:00<00:00, 83.41it/s] 


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 4.6 K 
1 | attention  | MultiheadAttention | 4.2 K 
2 | decoder    | Sequential         | 4.4 K 
3 | classifier | Sequential         | 579   
4 | softmax    | Softmax            | 0     
--------------------------------------------------
13.8 K    Trainable params
0         Non-trainable params
13.8 K    Total params
0.055     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 6/6 [00:00<00:00, 79.30it/s] 

[I 2024-07-17 17:46:43,072] Trial 5 finished with value: 4.846898171504426 and parameters: {'lr': 0.01, 'num_heads': 2, 'dropout_prob': 0.3, 'hidden_units': 128, 'embed_dim': 32, 'classifier_units': 16, 'batch_size': 16}. Best is trial 2 with value: 0.2628563712940483.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 4.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
137 K     Trainable params
0         Non-trainable params
137 K     Total params
0.550     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 66.82it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 55.69it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 4.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
137 K     Trainable params
0         Non-trainable params
137 K     Total params
0.550     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 67.10it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 55.48it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 4.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
137 K     Trainable params
0         Non-trainable params
137 K     Total params
0.550     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 56.14it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 45.67it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 4.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
137 K     Trainable params
0         Non-trainable params
137 K     Total params
0.550     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 5: 100%|██████████| 2/2 [00:00<00:00, 70.21it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 4.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
137 K     Trainable params
0         Non-trainable params
137 K     Total params
0.550     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 5: 100%|██████████| 2/2 [00:00<00:00, 59.89it/s]

[I 2024-07-17 17:46:44,581] Trial 6 finished with value: 0.33566679588833914 and parameters: {'lr': 0.001, 'num_heads': 4, 'dropout_prob': 0.3, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 32, 'batch_size': 64}. Best is trial 2 with value: 0.2628563712940483.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
53.2 K    Trainable params
0         Non-trainable params
53.2 K    Total params
0.213     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 8: 100%|██████████| 2/2 [00:00<00:00, 68.59it/s] 


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
53.2 K    Trainable params
0         Non-trainable params
53.2 K    Total params
0.213     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 3/3 [00:00<00:00, 81.45it/s] 






GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
53.2 K    Trainable params
0         Non-trainable params
53.2 K    Total params
0.213     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 4: 100%|██████████| 4/4 [00:00<00:00, 67.30it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
53.2 K    Trainable params
0         Non-trainable params
53.2 K    Total params
0.213     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 5: 100%|██████████| 5/5 [00:00<00:00, 64.02it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
53.2 K    Trainable params
0         Non-trainable params
53.2 K    Total params
0.213     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 6/6 [00:00<00:00, 69.75it/s]


[I 2024-07-17 17:46:46,786] Trial 7 finished with value: 7.541220984850528 and parameters: {'lr': 0.01, 'num_heads': 4, 'dropout_prob': 0.5, 'hidden_units': 256, 'embed_dim': 64, 'classifier_units': 32, 'batch_size': 16}. Best is trial 2 with value: 0.2628563712940483.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.0 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 16.6 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
108 K     Trainable params
0         Non-trainable params
108 K     Total params
0.433     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 62.50it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 49.75it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.0 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 16.6 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
108 K     Trainable params
0         Non-trainable params
108 K     Total params
0.433     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 51.27it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 42.04it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.0 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 16.6 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
108 K     Trainable params
0         Non-trainable params
108 K     Total params
0.433     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 49.14it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 41.28it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.0 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 16.6 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
108 K     Trainable params
0         Non-trainable params
108 K     Total params
0.433     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 70.78it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 61.12it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.0 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 16.6 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
108 K     Trainable params
0         Non-trainable params
108 K     Total params
0.433     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 7: 100%|██████████| 2/2 [00:00<00:00, 58.13it/s]


[I 2024-07-17 17:46:48,711] Trial 8 finished with value: 0.311609679585559 and parameters: {'lr': 0.001, 'num_heads': 4, 'dropout_prob': 0.1, 'hidden_units': 128, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 64}. Best is trial 2 with value: 0.2628563712940483.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 68.90it/s] 

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 59.74it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 70.50it/s] 

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 64.46it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 4/4 [00:00<00:00, 66.63it/s] 

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 4/4 [00:00<00:00, 62.64it/s]

GPU available: False, used: False





TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 5: 100%|██████████| 5/5 [00:00<00:00, 77.11it/s] 


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 6/6 [00:00<00:00, 47.15it/s] 

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 6/6 [00:00<00:00, 44.69it/s]


[I 2024-07-17 17:46:52,149] Trial 9 finished with value: 0.2145850367803986 and parameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 64, 'classifier_units': 64, 'batch_size': 16}. Best is trial 9 with value: 0.2145850367803986.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 2.3 K 
1 | attention  | MultiheadAttention | 4.2 K 
2 | decoder    | Sequential         | 2.2 K 
3 | classifier | Sequential         | 2.3 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
11.0 K    Trainable params
0         Non-trainable params
11.0 K    Total params
0.044     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 6: 100%|██████████| 2/2 [00:00<00:00, 46.65it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 2.3 K 
1 | attention  | MultiheadAttention | 4.2 K 
2 | decoder    | Sequential         | 2.2 K 
3 | classifier | Sequential         | 2.3 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
11.0 K    Trainable params
0         Non-trainable params
11.0 K    Total params
0.044     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 6: 100%|██████████| 3/3 [00:00<00:00, 44.66it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 2.3 K 
1 | attention  | MultiheadAttention | 4.2 K 
2 | decoder    | Sequential         | 2.2 K 
3 | classifier | Sequential         | 2.3 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
11.0 K    Trainable params
0         Non-trainable params
11.0 K    Total params
0.044     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 4: 100%|██████████| 4/4 [00:00<00:00, 48.43it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 2.3 K 
1 | attention  | MultiheadAttention | 4.2 K 
2 | decoder    | Sequential         | 2.2 K 
3 | classifier | Sequential         | 2.3 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
11.0 K    Trainable params
0         Non-trainable params
11.0 K    Total params
0.044     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 8: 100%|██████████| 5/5 [00:00<00:00, 48.68it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 2.3 K 
1 | attention  | MultiheadAttention | 4.2 K 
2 | decoder    | Sequential         | 2.2 K 
3 | classifier | Sequential         | 2.3 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
11.0 K    Trainable params
0         Non-trainable params
11.0 K    Total params
0.044     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 7: 100%|██████████| 6/6 [00:00<00:00, 48.67it/s]

[I 2024-07-17 17:46:56,030] Trial 10 finished with value: 1.1252722474326784 and parameters: {'lr': 1e-05, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 64, 'embed_dim': 32, 'classifier_units': 64, 'batch_size': 16}. Best is trial 9 with value: 0.2145850367803986.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 56.45it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 42.19it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 41.47it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 36.04it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 48.84it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 42.61it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 47.64it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 42.88it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 6: 100%|██████████| 3/3 [00:00<00:00, 45.22it/s]


[I 2024-07-17 17:46:59,315] Trial 11 finished with value: 0.24129068870706707 and parameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 64, 'classifier_units': 64, 'batch_size': 32}. Best is trial 9 with value: 0.2145850367803986.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 40.51it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 31.00it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 45.10it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 38.72it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 46.91it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 40.98it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 41.44it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 37.79it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 44.32it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 40.18it/s]


[I 2024-07-17 17:47:02,959] Trial 12 finished with value: 0.31196286627638725 and parameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 64, 'classifier_units': 64, 'batch_size': 32}. Best is trial 9 with value: 0.2145850367803986.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 4.4 K 
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 4.2 K 
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
29.6 K    Trainable params
0         Non-trainable params
29.6 K    Total params
0.119     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 41.31it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 30.66it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 4.4 K 
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 4.2 K 
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
29.6 K    Trainable params
0         Non-trainable params
29.6 K    Total params
0.119     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 49.21it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 42.90it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 4.4 K 
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 4.2 K 
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
29.6 K    Trainable params
0         Non-trainable params
29.6 K    Total params
0.119     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 46.41it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 39.89it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 4.4 K 
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 4.2 K 
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
29.6 K    Trainable params
0         Non-trainable params
29.6 K    Total params
0.119     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 7: 100%|██████████| 3/3 [00:00<00:00, 42.85it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 4.4 K 
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 4.2 K 
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
29.6 K    Trainable params
0         Non-trainable params
29.6 K    Total params
0.119     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 3/3 [00:00<00:00, 40.50it/s]


[I 2024-07-17 17:47:05,917] Trial 13 finished with value: 0.3384241843310133 and parameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 64, 'embed_dim': 64, 'classifier_units': 64, 'batch_size': 32}. Best is trial 9 with value: 0.2145850367803986.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 43.08it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 31.60it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 43.07it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 33.15it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 34.25it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 27.64it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 54.34it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 47.76it/s]

GPU available: False, used: False





TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 42.80it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 38.22it/s]


[I 2024-07-17 17:47:08,693] Trial 14 finished with value: 0.3116994100143443 and parameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 64, 'classifier_units': 64, 'batch_size': 64}. Best is trial 9 with value: 0.2145850367803986.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 9.2 K 
1 | attention  | MultiheadAttention | 4.2 K 
2 | decoder    | Sequential         | 8.7 K 
3 | classifier | Sequential         | 2.3 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
24.5 K    Trainable params
0         Non-trainable params
24.5 K    Total params
0.098     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 2/2 [00:00<00:00, 40.36it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 9.2 K 
1 | attention  | MultiheadAttention | 4.2 K 
2 | decoder    | Sequential         | 8.7 K 
3 | classifier | Sequential         | 2.3 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
24.5 K    Trainable params
0         Non-trainable params
24.5 K    Total params
0.098     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 4: 100%|██████████| 3/3 [00:00<00:00, 43.60it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 9.2 K 
1 | attention  | MultiheadAttention | 4.2 K 
2 | decoder    | Sequential         | 8.7 K 
3 | classifier | Sequential         | 2.3 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
24.5 K    Trainable params
0         Non-trainable params
24.5 K    Total params
0.098     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 6: 100%|██████████| 4/4 [00:00<00:00, 74.71it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 9.2 K 
1 | attention  | MultiheadAttention | 4.2 K 
2 | decoder    | Sequential         | 8.7 K 
3 | classifier | Sequential         | 2.3 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
24.5 K    Trainable params
0         Non-trainable params
24.5 K    Total params
0.098     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 6: 100%|██████████| 5/5 [00:00<00:00, 72.91it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 9.2 K 
1 | attention  | MultiheadAttention | 4.2 K 
2 | decoder    | Sequential         | 8.7 K 
3 | classifier | Sequential         | 2.3 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
24.5 K    Trainable params
0         Non-trainable params
24.5 K    Total params
0.098     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 4: 100%|██████████| 6/6 [00:00<00:00, 70.09it/s]


[I 2024-07-17 17:47:11,412] Trial 15 finished with value: 1.1054897126470344 and parameters: {'lr': 1e-05, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 32, 'classifier_units': 64, 'batch_size': 16}. Best is trial 9 with value: 0.2145850367803986.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 1.1 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
52.1 K    Trainable params
0         Non-trainable params
52.1 K    Total params
0.208     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 62.84it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 48.75it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 1.1 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
52.1 K    Trainable params
0         Non-trainable params
52.1 K    Total params
0.208     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 67.15it/s] 

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 58.14it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 1.1 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
52.1 K    Trainable params
0         Non-trainable params
52.1 K    Total params
0.208     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 73.10it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 61.81it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 1.1 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
52.1 K    Trainable params
0         Non-trainable params
52.1 K    Total params
0.208     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 74.42it/s] 

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 67.52it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 1.1 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
52.1 K    Trainable params
0         Non-trainable params
52.1 K    Total params
0.208     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 3/3 [00:00<00:00, 62.58it/s]


[I 2024-07-17 17:47:13,513] Trial 16 finished with value: 0.28472063048473817 and parameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 64, 'classifier_units': 16, 'batch_size': 32}. Best is trial 9 with value: 0.2145850367803986.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 4.4 K 
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 4.2 K 
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
29.6 K    Trainable params
0         Non-trainable params
29.6 K    Total params
0.119     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 74.21it/s] 

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 56.23it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 4.4 K 
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 4.2 K 
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
29.6 K    Trainable params
0         Non-trainable params
29.6 K    Total params
0.119     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 77.57it/s] 

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 68.31it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 4.4 K 
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 4.2 K 
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
29.6 K    Trainable params
0         Non-trainable params
29.6 K    Total params
0.119     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 77.01it/s] 

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 62.59it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 4.4 K 
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 4.2 K 
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
29.6 K    Trainable params
0         Non-trainable params
29.6 K    Total params
0.119     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 69.66it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 62.43it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 4.4 K 
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 4.2 K 
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
29.6 K    Trainable params
0         Non-trainable params
29.6 K    Total params
0.119     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 4: 100%|██████████| 3/3 [00:00<00:00, 66.48it/s]


[I 2024-07-17 17:47:15,610] Trial 17 finished with value: 0.4367601101291384 and parameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 64, 'embed_dim': 64, 'classifier_units': 64, 'batch_size': 32}. Best is trial 9 with value: 0.2145850367803986.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 59.76it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 54.86it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 3/3 [00:00<00:00, 56.25it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 4/4 [00:00<00:00, 40.53it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 5/5 [00:00<00:00, 39.71it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 6/6 [00:00<00:00, 35.70it/s]


[I 2024-07-17 17:47:18,335] Trial 18 finished with value: 23.09204652528175 and parameters: {'lr': 0.01, 'num_heads': 2, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 16}. Best is trial 9 with value: 0.2145850367803986.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 9.2 K 
1 | attention  | MultiheadAttention | 4.2 K 
2 | decoder    | Sequential         | 8.7 K 
3 | classifier | Sequential         | 2.3 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
24.5 K    Trainable params
0         Non-trainable params
24.5 K    Total params
0.098     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 1/1 [00:00<00:00, 32.51it/s]






GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 9.2 K 
1 | attention  | MultiheadAttention | 4.2 K 
2 | decoder    | Sequential         | 8.7 K 
3 | classifier | Sequential         | 2.3 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
24.5 K    Trainable params
0         Non-trainable params
24.5 K    Total params
0.098     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 1/1 [00:00<00:00, 34.61it/s]






GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 9.2 K 
1 | attention  | MultiheadAttention | 4.2 K 
2 | decoder    | Sequential         | 8.7 K 
3 | classifier | Sequential         | 2.3 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
24.5 K    Trainable params
0         Non-trainable params
24.5 K    Total params
0.098     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 5: 100%|██████████| 1/1 [00:00<00:00, 38.88it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 9.2 K 
1 | attention  | MultiheadAttention | 4.2 K 
2 | decoder    | Sequential         | 8.7 K 
3 | classifier | Sequential         | 2.3 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
24.5 K    Trainable params
0         Non-trainable params
24.5 K    Total params
0.098     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 4: 100%|██████████| 2/2 [00:00<00:00, 36.02it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 9.2 K 
1 | attention  | MultiheadAttention | 4.2 K 
2 | decoder    | Sequential         | 8.7 K 
3 | classifier | Sequential         | 2.3 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
24.5 K    Trainable params
0         Non-trainable params
24.5 K    Total params
0.098     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 8: 100%|██████████| 2/2 [00:00<00:00, 39.26it/s]


[I 2024-07-17 17:47:20,339] Trial 19 finished with value: 1.1191742897936097 and parameters: {'lr': 1e-05, 'num_heads': 1, 'dropout_prob': 0.5, 'hidden_units': 256, 'embed_dim': 32, 'classifier_units': 64, 'batch_size': 64}. Best is trial 9 with value: 0.2145850367803986.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 4.4 K 
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 4.2 K 
3 | classifier | Sequential         | 1.1 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
26.4 K    Trainable params
0         Non-trainable params
26.4 K    Total params
0.105     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 41.49it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 36.35it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 4.4 K 
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 4.2 K 
3 | classifier | Sequential         | 1.1 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
26.4 K    Trainable params
0         Non-trainable params
26.4 K    Total params
0.105     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 44.30it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 40.70it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 4.4 K 
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 4.2 K 
3 | classifier | Sequential         | 1.1 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
26.4 K    Trainable params
0         Non-trainable params
26.4 K    Total params
0.105     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 4/4 [00:00<00:00, 47.67it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 4/4 [00:00<00:00, 45.00it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 4.4 K 
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 4.2 K 
3 | classifier | Sequential         | 1.1 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
26.4 K    Trainable params
0         Non-trainable params
26.4 K    Total params
0.105     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 5/5 [00:00<00:00, 62.35it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 5/5 [00:00<00:00, 58.70it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 4.4 K 
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 4.2 K 
3 | classifier | Sequential         | 1.1 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
26.4 K    Trainable params
0         Non-trainable params
26.4 K    Total params
0.105     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 6/6 [00:00<00:00, 48.53it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 6/6 [00:00<00:00, 46.65it/s]

[I 2024-07-17 17:47:25,619] Trial 20 finished with value: 0.41871283865831305 and parameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 64, 'embed_dim': 64, 'classifier_units': 16, 'batch_size': 16}. Best is trial 9 with value: 0.2145850367803986.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
53.2 K    Trainable params
0         Non-trainable params
53.2 K    Total params
0.213     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 37.42it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 31.24it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
53.2 K    Trainable params
0         Non-trainable params
53.2 K    Total params
0.213     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 40.83it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 37.72it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
53.2 K    Trainable params
0         Non-trainable params
53.2 K    Total params
0.213     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 48.81it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 43.04it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
53.2 K    Trainable params
0         Non-trainable params
53.2 K    Total params
0.213     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 5: 100%|██████████| 3/3 [00:00<00:00, 48.47it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
53.2 K    Trainable params
0         Non-trainable params
53.2 K    Total params
0.213     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 8: 100%|██████████| 3/3 [00:00<00:00, 39.00it/s]

[I 2024-07-17 17:47:28,981] Trial 21 finished with value: 0.2325607548094193 and parameters: {'lr': 0.001, 'num_heads': 4, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 64, 'classifier_units': 32, 'batch_size': 32}. Best is trial 9 with value: 0.2145850367803986.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
53.2 K    Trainable params
0         Non-trainable params
53.2 K    Total params
0.213     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 40.37it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 34.26it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
53.2 K    Trainable params
0         Non-trainable params
53.2 K    Total params
0.213     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 48.93it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 42.20it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
53.2 K    Trainable params
0         Non-trainable params
53.2 K    Total params
0.213     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 38.08it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 31.99it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
53.2 K    Trainable params
0         Non-trainable params
53.2 K    Total params
0.213     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 5: 100%|██████████| 3/3 [00:00<00:00, 34.80it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
53.2 K    Trainable params
0         Non-trainable params
53.2 K    Total params
0.213     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 45.89it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 40.90it/s]


[I 2024-07-17 17:47:32,773] Trial 22 finished with value: 0.3339064796881317 and parameters: {'lr': 0.001, 'num_heads': 4, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 64, 'classifier_units': 32, 'batch_size': 32}. Best is trial 9 with value: 0.2145850367803986.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
53.2 K    Trainable params
0         Non-trainable params
53.2 K    Total params
0.213     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 37.61it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 31.17it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
53.2 K    Trainable params
0         Non-trainable params
53.2 K    Total params
0.213     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 41.17it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 35.92it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
53.2 K    Trainable params
0         Non-trainable params
53.2 K    Total params
0.213     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 40.10it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 36.19it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
53.2 K    Trainable params
0         Non-trainable params
53.2 K    Total params
0.213     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 41.75it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 38.48it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
53.2 K    Trainable params
0         Non-trainable params
53.2 K    Total params
0.213     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 45.14it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 42.63it/s]


[I 2024-07-17 17:47:36,437] Trial 23 finished with value: 0.3023341387485059 and parameters: {'lr': 0.001, 'num_heads': 2, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 64, 'classifier_units': 32, 'batch_size': 32}. Best is trial 9 with value: 0.2145850367803986.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 36.87it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 29.74it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 44.04it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 38.96it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 55.42it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 50.27it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 4: 100%|██████████| 3/3 [00:00<00:00, 41.43it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 7: 100%|██████████| 3/3 [00:00<00:00, 41.72it/s]


[I 2024-07-17 17:47:39,548] Trial 24 finished with value: 0.26077531725798253 and parameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 64, 'classifier_units': 64, 'batch_size': 32}. Best is trial 9 with value: 0.2145850367803986.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
53.2 K    Trainable params
0         Non-trainable params
53.2 K    Total params
0.213     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 41.91it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 33.07it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
53.2 K    Trainable params
0         Non-trainable params
53.2 K    Total params
0.213     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 45.25it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 39.63it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
53.2 K    Trainable params
0         Non-trainable params
53.2 K    Total params
0.213     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 47.50it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 39.33it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
53.2 K    Trainable params
0         Non-trainable params
53.2 K    Total params
0.213     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 45.37it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 40.88it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
53.2 K    Trainable params
0         Non-trainable params
53.2 K    Total params
0.213     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 46.15it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 41.10it/s]


[I 2024-07-17 17:47:43,041] Trial 25 finished with value: 0.335423145906064 and parameters: {'lr': 0.001, 'num_heads': 4, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 64, 'classifier_units': 32, 'batch_size': 32}. Best is trial 9 with value: 0.2145850367803986.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 46.35it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 36.87it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 46.05it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 39.68it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 39.74it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 35.51it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 13.47it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 13.16it/s]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(



Epoch 4: 100%|██████████| 3/3 [00:00<00:00, 41.53it/s]

[I 2024-07-17 17:47:46,270] Trial 26 finished with value: 0.266045326904529 and parameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 64, 'classifier_units': 64, 'batch_size': 32}. Best is trial 9 with value: 0.2145850367803986.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
53.2 K    Trainable params
0         Non-trainable params
53.2 K    Total params
0.213     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 41.35it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 32.08it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
53.2 K    Trainable params
0         Non-trainable params
53.2 K    Total params
0.213     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 67.09it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 56.82it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
53.2 K    Trainable params
0         Non-trainable params
53.2 K    Total params
0.213     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 63.50it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 56.59it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
53.2 K    Trainable params
0         Non-trainable params
53.2 K    Total params
0.213     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 78.23it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 70.86it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
53.2 K    Trainable params
0         Non-trainable params
53.2 K    Total params
0.213     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 8: 100%|██████████| 3/3 [00:00<00:00, 69.28it/s]

[I 2024-07-17 17:47:48,818] Trial 27 finished with value: 0.2740490051183773 and parameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 64, 'classifier_units': 32, 'batch_size': 32}. Best is trial 9 with value: 0.2145850367803986.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 2.1 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
135 K     Trainable params
0         Non-trainable params
135 K     Total params
0.541     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 61.85it/s] 

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 48.32it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 2.1 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
135 K     Trainable params
0         Non-trainable params
135 K     Total params
0.541     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 1/1 [00:00<00:00, 42.97it/s]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 2.1 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
135 K     Trainable params
0         Non-trainable params
135 K     Total params
0.541     Total estimated model params size (MB)
  rank_zero_warn(



Epoch 3: 100%|██████████| 1/1 [00:00<00:00, 46.62it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 2.1 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
135 K     Trainable params
0         Non-trainable params
135 K     Total params
0.541     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 58.10it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 52.07it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 2.1 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
135 K     Trainable params
0         Non-trainable params
135 K     Total params
0.541     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 2/2 [00:00<00:00, 51.85it/s]

[I 2024-07-17 17:47:50,328] Trial 28 finished with value: 5.704488703891032 and parameters: {'lr': 0.01, 'num_heads': 2, 'dropout_prob': 0.5, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 16, 'batch_size': 64}. Best is trial 9 with value: 0.2145850367803986.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 2.3 K 
1 | attention  | MultiheadAttention | 4.2 K 
2 | decoder    | Sequential         | 2.2 K 
3 | classifier | Sequential         | 2.3 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
11.0 K    Trainable params
0         Non-trainable params
11.0 K    Total params
0.044     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 76.64it/s] 

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 64.71it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 2.3 K 
1 | attention  | MultiheadAttention | 4.2 K 
2 | decoder    | Sequential         | 2.2 K 
3 | classifier | Sequential         | 2.3 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
11.0 K    Trainable params
0         Non-trainable params
11.0 K    Total params
0.044     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 4: 100%|██████████| 3/3 [00:00<00:00, 70.14it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 2.3 K 
1 | attention  | MultiheadAttention | 4.2 K 
2 | decoder    | Sequential         | 2.2 K 
3 | classifier | Sequential         | 2.3 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
11.0 K    Trainable params
0         Non-trainable params
11.0 K    Total params
0.044     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 8: 100%|██████████| 4/4 [00:00<00:00, 72.25it/s] 


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 2.3 K 
1 | attention  | MultiheadAttention | 4.2 K 
2 | decoder    | Sequential         | 2.2 K 
3 | classifier | Sequential         | 2.3 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
11.0 K    Trainable params
0         Non-trainable params
11.0 K    Total params
0.044     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 7: 100%|██████████| 5/5 [00:00<00:00, 70.61it/s] 


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 2.3 K 
1 | attention  | MultiheadAttention | 4.2 K 
2 | decoder    | Sequential         | 2.2 K 
3 | classifier | Sequential         | 2.3 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
11.0 K    Trainable params
0         Non-trainable params
11.0 K    Total params
0.044     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 6: 100%|██████████| 6/6 [00:00<00:00, 74.86it/s]

[I 2024-07-17 17:47:52,943] Trial 29 finished with value: 1.1161627722061627 and parameters: {'lr': 1e-05, 'num_heads': 4, 'dropout_prob': 0.3, 'hidden_units': 64, 'embed_dim': 32, 'classifier_units': 64, 'batch_size': 16}. Best is trial 9 with value: 0.2145850367803986.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 1.1 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
52.1 K    Trainable params
0         Non-trainable params
52.1 K    Total params
0.208     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 57.12it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 47.55it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 1.1 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
52.1 K    Trainable params
0         Non-trainable params
52.1 K    Total params
0.208     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 71.72it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 62.08it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 1.1 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
52.1 K    Trainable params
0         Non-trainable params
52.1 K    Total params
0.208     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 2/2 [00:00<00:00, 40.62it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 1.1 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
52.1 K    Trainable params
0         Non-trainable params
52.1 K    Total params
0.208     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 4: 100%|██████████| 3/3 [00:00<00:00, 42.91it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 1.1 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
52.1 K    Trainable params
0         Non-trainable params
52.1 K    Total params
0.208     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 8: 100%|██████████| 3/3 [00:00<00:00, 41.91it/s]

[I 2024-07-17 17:47:55,484] Trial 30 finished with value: 1.2562753628038212 and parameters: {'lr': 0.01, 'num_heads': 4, 'dropout_prob': 0.3, 'hidden_units': 256, 'embed_dim': 64, 'classifier_units': 16, 'batch_size': 32}. Best is trial 9 with value: 0.2145850367803986.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 47.54it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 34.68it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 44.61it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 38.17it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 41.31it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 36.22it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 7: 100%|██████████| 3/3 [00:00<00:00, 43.62it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 5: 100%|██████████| 3/3 [00:00<00:00, 40.91it/s]

[I 2024-07-17 17:47:58,723] Trial 31 finished with value: 0.26083722789739483 and parameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 64, 'classifier_units': 64, 'batch_size': 32}. Best is trial 9 with value: 0.2145850367803986.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00,  6.50it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00,  6.24it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 38.57it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 34.58it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 43.01it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 38.84it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 7: 100%|██████████| 3/3 [00:00<00:00, 41.19it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 47.85it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 42.72it/s]


[I 2024-07-17 17:48:02,307] Trial 32 finished with value: 0.31975252200768955 and parameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 64, 'classifier_units': 64, 'batch_size': 32}. Best is trial 9 with value: 0.2145850367803986.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 40.69it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 33.83it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 49.28it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 42.72it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 42.32it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 37.57it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 6: 100%|██████████| 3/3 [00:00<00:00, 43.27it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 5: 100%|██████████| 3/3 [00:00<00:00, 44.70it/s]

[I 2024-07-17 17:48:05,411] Trial 33 finished with value: 0.30807236443790714 and parameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 64, 'classifier_units': 64, 'batch_size': 32}. Best is trial 9 with value: 0.2145850367803986.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 40.13it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 33.19it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 43.38it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 38.32it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 44.72it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 39.22it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 6: 100%|██████████| 3/3 [00:00<00:00, 42.89it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 6: 100%|██████████| 3/3 [00:00<00:00, 44.25it/s]

[I 2024-07-17 17:48:08,617] Trial 34 finished with value: 0.28271483965811195 and parameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 64, 'classifier_units': 64, 'batch_size': 32}. Best is trial 9 with value: 0.2145850367803986.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 8.8 K 
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 8.4 K 
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
38.2 K    Trainable params
0         Non-trainable params
38.2 K    Total params
0.153     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 35.83it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 28.47it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 8.8 K 
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 8.4 K 
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
38.2 K    Trainable params
0         Non-trainable params
38.2 K    Total params
0.153     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 50.59it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 41.88it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 8.8 K 
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 8.4 K 
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
38.2 K    Trainable params
0         Non-trainable params
38.2 K    Total params
0.153     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 46.79it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 39.28it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 8.8 K 
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 8.4 K 
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
38.2 K    Trainable params
0         Non-trainable params
38.2 K    Total params
0.153     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 43.96it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 40.39it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 8.8 K 
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 8.4 K 
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
38.2 K    Trainable params
0         Non-trainable params
38.2 K    Total params
0.153     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 3/3 [00:00<00:00, 43.13it/s]


[I 2024-07-17 17:48:11,779] Trial 35 finished with value: 0.3409548341372168 and parameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 128, 'embed_dim': 64, 'classifier_units': 64, 'batch_size': 32}. Best is trial 9 with value: 0.2145850367803986.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
53.2 K    Trainable params
0         Non-trainable params
53.2 K    Total params
0.213     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 41.43it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 37.98it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
53.2 K    Trainable params
0         Non-trainable params
53.2 K    Total params
0.213     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 56.04it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 48.77it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
53.2 K    Trainable params
0         Non-trainable params
53.2 K    Total params
0.213     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 7: 100%|██████████| 4/4 [00:00<00:00, 41.73it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
53.2 K    Trainable params
0         Non-trainable params
53.2 K    Total params
0.213     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 7: 100%|██████████| 5/5 [00:00<00:00, 45.82it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
53.2 K    Trainable params
0         Non-trainable params
53.2 K    Total params
0.213     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 6: 100%|██████████| 6/6 [00:00<00:00, 45.12it/s]


[I 2024-07-17 17:48:16,387] Trial 36 finished with value: 0.2801207295404994 and parameters: {'lr': 0.001, 'num_heads': 2, 'dropout_prob': 0.3, 'hidden_units': 256, 'embed_dim': 64, 'classifier_units': 32, 'batch_size': 16}. Best is trial 9 with value: 0.2145850367803986.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 8.8 K 
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 8.4 K 
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
38.2 K    Trainable params
0         Non-trainable params
38.2 K    Total params
0.153     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 44.17it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 32.84it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 8.8 K 
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 8.4 K 
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
38.2 K    Trainable params
0         Non-trainable params
38.2 K    Total params
0.153     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 46.43it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 40.94it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 8.8 K 
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 8.4 K 
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
38.2 K    Trainable params
0         Non-trainable params
38.2 K    Total params
0.153     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 41.36it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 35.48it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 8.8 K 
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 8.4 K 
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
38.2 K    Trainable params
0         Non-trainable params
38.2 K    Total params
0.153     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 3/3 [00:00<00:00, 47.23it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 8.8 K 
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 8.4 K 
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
38.2 K    Trainable params
0         Non-trainable params
38.2 K    Total params
0.153     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 4: 100%|██████████| 3/3 [00:00<00:00, 42.21it/s]


[I 2024-07-17 17:48:19,353] Trial 37 finished with value: 0.7250761804082936 and parameters: {'lr': 0.001, 'num_heads': 4, 'dropout_prob': 0.5, 'hidden_units': 128, 'embed_dim': 64, 'classifier_units': 64, 'batch_size': 32}. Best is trial 9 with value: 0.2145850367803986.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 4.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
137 K     Trainable params
0         Non-trainable params
137 K     Total params
0.550     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 5: 100%|██████████| 2/2 [00:00<00:00, 39.24it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 4.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
137 K     Trainable params
0         Non-trainable params
137 K     Total params
0.550     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 5: 100%|██████████| 3/3 [00:00<00:00, 43.53it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 4.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
137 K     Trainable params
0         Non-trainable params
137 K     Total params
0.550     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 6: 100%|██████████| 4/4 [00:00<00:00, 40.44it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 4.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
137 K     Trainable params
0         Non-trainable params
137 K     Total params
0.550     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 7: 100%|██████████| 5/5 [00:00<00:00, 48.38it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 4.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
137 K     Trainable params
0         Non-trainable params
137 K     Total params
0.550     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 6/6 [00:00<00:00, 44.61it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 6/6 [00:00<00:00, 43.02it/s]


[I 2024-07-17 17:48:23,855] Trial 38 finished with value: 1.092792756686184 and parameters: {'lr': 1e-05, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 32, 'batch_size': 16}. Best is trial 9 with value: 0.2145850367803986.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
53.2 K    Trainable params
0         Non-trainable params
53.2 K    Total params
0.213     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 58.98it/s] 

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 49.51it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
53.2 K    Trainable params
0         Non-trainable params
53.2 K    Total params
0.213     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 64.75it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 55.46it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
53.2 K    Trainable params
0         Non-trainable params
53.2 K    Total params
0.213     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 66.68it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 59.65it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
53.2 K    Trainable params
0         Non-trainable params
53.2 K    Total params
0.213     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 70.88it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 64.77it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 2.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
53.2 K    Trainable params
0         Non-trainable params
53.2 K    Total params
0.213     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 68.83it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 63.29it/s]

[I 2024-07-17 17:48:26,329] Trial 39 finished with value: 0.3383194780579354 and parameters: {'lr': 0.001, 'num_heads': 4, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 64, 'classifier_units': 32, 'batch_size': 32}. Best is trial 9 with value: 0.2145850367803986.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 4.6 K 
1 | attention  | MultiheadAttention | 4.2 K 
2 | decoder    | Sequential         | 4.4 K 
3 | classifier | Sequential         | 2.3 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
15.5 K    Trainable params
0         Non-trainable params
15.5 K    Total params
0.062     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 72.24it/s] 

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 63.99it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 4.6 K 
1 | attention  | MultiheadAttention | 4.2 K 
2 | decoder    | Sequential         | 4.4 K 
3 | classifier | Sequential         | 2.3 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
15.5 K    Trainable params
0         Non-trainable params
15.5 K    Total params
0.062     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 72.46it/s] 

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 66.09it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 4.6 K 
1 | attention  | MultiheadAttention | 4.2 K 
2 | decoder    | Sequential         | 4.4 K 
3 | classifier | Sequential         | 2.3 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
15.5 K    Trainable params
0         Non-trainable params
15.5 K    Total params
0.062     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 8: 100%|██████████| 4/4 [00:00<00:00, 69.02it/s] 


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 4.6 K 
1 | attention  | MultiheadAttention | 4.2 K 
2 | decoder    | Sequential         | 4.4 K 
3 | classifier | Sequential         | 2.3 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
15.5 K    Trainable params
0         Non-trainable params
15.5 K    Total params
0.062     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 5/5 [00:00<00:00, 72.04it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 4.6 K 
1 | attention  | MultiheadAttention | 4.2 K 
2 | decoder    | Sequential         | 4.4 K 
3 | classifier | Sequential         | 2.3 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
15.5 K    Trainable params
0         Non-trainable params
15.5 K    Total params
0.062     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 6/6 [00:00<00:00, 80.73it/s]

[I 2024-07-17 17:48:28,765] Trial 40 finished with value: 2.1423536861242303 and parameters: {'lr': 0.01, 'num_heads': 1, 'dropout_prob': 0.3, 'hidden_units': 128, 'embed_dim': 32, 'classifier_units': 64, 'batch_size': 16}. Best is trial 9 with value: 0.2145850367803986.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 58.29it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 44.65it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 67.19it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 61.05it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 58.29it/s] 

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 52.22it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 6: 100%|██████████| 3/3 [00:00<00:00, 60.38it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 6: 100%|██████████| 3/3 [00:00<00:00, 41.28it/s]


[I 2024-07-17 17:48:31,183] Trial 41 finished with value: 0.2685410893419337 and parameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 64, 'classifier_units': 64, 'batch_size': 32}. Best is trial 9 with value: 0.2145850367803986.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 37.03it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 28.86it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 11.19it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 10.89it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 42.24it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 36.40it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 7: 100%|██████████| 3/3 [00:00<00:00, 40.76it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 48.31it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 43.74it/s]

[I 2024-07-17 17:48:34,702] Trial 42 finished with value: 0.34106235931538353 and parameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 64, 'classifier_units': 64, 'batch_size': 32}. Best is trial 9 with value: 0.2145850367803986.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 37.13it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 30.34it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 43.46it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 38.46it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 46.16it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 38.41it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 41.31it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 37.72it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 52.59it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 47.43it/s]

[I 2024-07-17 17:48:38,464] Trial 43 finished with value: 0.2736863209705603 and parameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 64, 'classifier_units': 64, 'batch_size': 32}. Best is trial 9 with value: 0.2145850367803986.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 34.23it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 28.11it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 45.12it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 38.98it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 43.15it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 37.93it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 45.10it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 40.33it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 7: 100%|██████████| 3/3 [00:00<00:00, 41.61it/s]


[I 2024-07-17 17:48:41,952] Trial 44 finished with value: 0.26981340312402124 and parameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 64, 'classifier_units': 64, 'batch_size': 32}. Best is trial 9 with value: 0.2145850367803986.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 4: 100%|██████████| 1/1 [00:00<00:00, 37.34it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 1/1 [00:00<00:00, 29.27it/s]






GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 43.35it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 33.90it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 40.50it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 36.78it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.5 K
1 | attention  | MultiheadAttention | 16.6 K
2 | decoder    | Sequential         | 16.9 K
3 | classifier | Sequential         | 4.4 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
55.4 K    Trainable params
0         Non-trainable params
55.4 K    Total params
0.221     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 7: 100%|██████████| 2/2 [00:00<00:00, 37.76it/s]


[I 2024-07-17 17:48:44,463] Trial 45 finished with value: 0.7377225082818052 and parameters: {'lr': 0.001, 'num_heads': 4, 'dropout_prob': 0.5, 'hidden_units': 256, 'embed_dim': 64, 'classifier_units': 64, 'batch_size': 64}. Best is trial 9 with value: 0.2145850367803986.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 32.43it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 27.41it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 40.86it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 36.26it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 32.09it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 27.49it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 35.96it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 33.90it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 5: 100%|██████████| 3/3 [00:00<00:00, 34.27it/s]


[I 2024-07-17 17:48:48,526] Trial 46 finished with value: 0.19394654738365386 and parameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 32}. Best is trial 46 with value: 0.19394654738365386.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 8.6 K 
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 8.3 K 
3 | classifier | Sequential         | 2.1 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
85.1 K    Trainable params
0         Non-trainable params
85.1 K    Total params
0.340     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 40.55it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 29.33it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 8.6 K 
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 8.3 K 
3 | classifier | Sequential         | 2.1 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
85.1 K    Trainable params
0         Non-trainable params
85.1 K    Total params
0.340     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 25.77it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 23.54it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 8.6 K 
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 8.3 K 
3 | classifier | Sequential         | 2.1 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
85.1 K    Trainable params
0         Non-trainable params
85.1 K    Total params
0.340     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 40.12it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 36.18it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 8.6 K 
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 8.3 K 
3 | classifier | Sequential         | 2.1 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
85.1 K    Trainable params
0         Non-trainable params
85.1 K    Total params
0.340     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 6: 100%|██████████| 3/3 [00:00<00:00, 41.56it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 8.6 K 
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 8.3 K 
3 | classifier | Sequential         | 2.1 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
85.1 K    Trainable params
0         Non-trainable params
85.1 K    Total params
0.340     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 42.64it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 39.30it/s]


[I 2024-07-17 17:48:52,017] Trial 47 finished with value: 0.3379355234081705 and parameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 64, 'embed_dim': 128, 'classifier_units': 16, 'batch_size': 32}. Best is trial 46 with value: 0.19394654738365386.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.0 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 16.6 K
3 | classifier | Sequential         | 4.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
103 K     Trainable params
0         Non-trainable params
103 K     Total params
0.416     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 8: 100%|██████████| 2/2 [00:00<00:00, 37.42it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.0 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 16.6 K
3 | classifier | Sequential         | 4.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
103 K     Trainable params
0         Non-trainable params
103 K     Total params
0.416     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 5: 100%|██████████| 3/3 [00:00<00:00, 37.23it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.0 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 16.6 K
3 | classifier | Sequential         | 4.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
103 K     Trainable params
0         Non-trainable params
103 K     Total params
0.416     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 6: 100%|██████████| 4/4 [00:00<00:00, 40.58it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.0 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 16.6 K
3 | classifier | Sequential         | 4.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
103 K     Trainable params
0         Non-trainable params
103 K     Total params
0.416     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 5/5 [00:00<00:00, 46.83it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.0 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 16.6 K
3 | classifier | Sequential         | 4.2 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
103 K     Trainable params
0         Non-trainable params
103 K     Total params
0.416     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 4: 100%|██████████| 6/6 [00:00<00:00, 47.91it/s]


[I 2024-07-17 17:48:55,747] Trial 48 finished with value: 1.1452058147971338 and parameters: {'lr': 1e-05, 'num_heads': 2, 'dropout_prob': 0.1, 'hidden_units': 128, 'embed_dim': 128, 'classifier_units': 32, 'batch_size': 16}. Best is trial 46 with value: 0.19394654738365386.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 33.42it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 26.33it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 36.79it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 28.60it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 35.25it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 27.63it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 6: 100%|██████████| 2/2 [00:00<00:00, 38.34it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 7: 100%|██████████| 2/2 [00:00<00:00, 30.97it/s]


[I 2024-07-17 17:48:58,666] Trial 49 finished with value: 0.2181680900682398 and parameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 64}. Best is trial 46 with value: 0.19394654738365386.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 38.09it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 30.89it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 50.44it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 37.88it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 34.84it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 27.45it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 31.14it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 28.92it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 6: 100%|██████████| 2/2 [00:00<00:00, 53.54it/s]

[I 2024-07-17 17:49:01,766] Trial 50 finished with value: 0.24096010375063198 and parameters: {'lr': 0.001, 'num_heads': 4, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 64}. Best is trial 46 with value: 0.19394654738365386.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 64.49it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 53.43it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 53.70it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 42.47it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 58.68it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 44.70it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 8: 100%|██████████| 2/2 [00:00<00:00, 50.74it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 56.46it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 49.49it/s]


[I 2024-07-17 17:49:03,906] Trial 51 finished with value: 0.22942472770444336 and parameters: {'lr': 0.001, 'num_heads': 4, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 64}. Best is trial 46 with value: 0.19394654738365386.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 47.73it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 39.28it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 65.22it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 50.53it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 58.98it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 47.75it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 5: 100%|██████████| 2/2 [00:00<00:00, 48.29it/s]

GPU available: False, used: False





TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 58.99it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 49.57it/s]


[I 2024-07-17 17:49:06,092] Trial 52 finished with value: 0.22378121843032087 and parameters: {'lr': 0.001, 'num_heads': 4, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 64}. Best is trial 46 with value: 0.19394654738365386.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 55.84it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 44.92it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 41.87it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 36.51it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 49.25it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 41.54it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 5: 100%|██████████| 2/2 [00:00<00:00, 42.14it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 7: 100%|██████████| 2/2 [00:00<00:00, 49.49it/s]


[I 2024-07-17 17:49:07,935] Trial 53 finished with value: 0.23028905295446184 and parameters: {'lr': 0.001, 'num_heads': 4, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 64}. Best is trial 46 with value: 0.19394654738365386.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 39.67it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 33.43it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 26.80it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 22.51it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 28.63it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 24.44it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 8: 100%|██████████| 2/2 [00:00<00:00, 34.70it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 37.40it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 33.68it/s]


[I 2024-07-17 17:49:10,774] Trial 54 finished with value: 0.223393749355245 and parameters: {'lr': 0.001, 'num_heads': 4, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 64}. Best is trial 46 with value: 0.19394654738365386.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 33.88it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 28.24it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 32.05it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 26.56it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 33.56it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 28.75it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 8: 100%|██████████| 2/2 [00:00<00:00, 36.03it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 7: 100%|██████████| 2/2 [00:00<00:00, 34.33it/s]


[I 2024-07-17 17:49:13,660] Trial 55 finished with value: 0.27267406674265005 and parameters: {'lr': 0.001, 'num_heads': 4, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 64}. Best is trial 46 with value: 0.19394654738365386.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 40.35it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 31.49it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 1/1 [00:00<00:00, 37.12it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 8: 100%|██████████| 1/1 [00:00<00:00, 43.31it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 2/2 [00:00<00:00, 36.30it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 5: 100%|██████████| 2/2 [00:00<00:00, 32.30it/s]

[I 2024-07-17 17:49:15,914] Trial 56 finished with value: 8.074552665933124 and parameters: {'lr': 0.01, 'num_heads': 4, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 64}. Best is trial 46 with value: 0.19394654738365386.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 8.6 K 
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 8.3 K 
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
91.4 K    Trainable params
0         Non-trainable params
91.4 K    Total params
0.366     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 44.54it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 36.97it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 8.6 K 
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 8.3 K 
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
91.4 K    Trainable params
0         Non-trainable params
91.4 K    Total params
0.366     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 39.51it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 31.48it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 8.6 K 
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 8.3 K 
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
91.4 K    Trainable params
0         Non-trainable params
91.4 K    Total params
0.366     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 8: 100%|██████████| 1/1 [00:00<00:00, 30.78it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 8.6 K 
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 8.3 K 
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
91.4 K    Trainable params
0         Non-trainable params
91.4 K    Total params
0.366     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 28.82it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 25.13it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 8.6 K 
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 8.3 K 
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
91.4 K    Trainable params
0         Non-trainable params
91.4 K    Total params
0.366     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 5: 100%|██████████| 2/2 [00:00<00:00, 30.49it/s]

[I 2024-07-17 17:49:19,538] Trial 57 finished with value: 0.7033978015678368 and parameters: {'lr': 0.001, 'num_heads': 4, 'dropout_prob': 0.5, 'hidden_units': 64, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 64}. Best is trial 46 with value: 0.19394654738365386.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 83.49it/s] 

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 61.58it/s]

GPU available: False, used: False





TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 35.81it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 27.84it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 57.93it/s] 

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 43.01it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 41.45it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 36.20it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 37.60it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 33.99it/s]


[I 2024-07-17 17:49:22,378] Trial 58 finished with value: 0.2669066875263125 and parameters: {'lr': 0.001, 'num_heads': 4, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 64}. Best is trial 46 with value: 0.19394654738365386.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 42.98it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 34.19it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 51.57it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 39.14it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 23.82it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 22.23it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 2/2 [00:00<00:00, 38.36it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 42.05it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 37.27it/s]


[I 2024-07-17 17:49:25,138] Trial 59 finished with value: 0.3000648618333367 and parameters: {'lr': 0.001, 'num_heads': 4, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 64}. Best is trial 46 with value: 0.19394654738365386.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 35.35it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 28.83it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 35.84it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 28.40it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 34.38it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 27.54it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 6: 100%|██████████| 2/2 [00:00<00:00, 42.75it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 2/2 [00:00<00:00, 34.51it/s]


[I 2024-07-17 17:49:27,699] Trial 60 finished with value: 0.30452507674750984 and parameters: {'lr': 0.001, 'num_heads': 4, 'dropout_prob': 0.3, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 64}. Best is trial 46 with value: 0.19394654738365386.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 39.48it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 33.62it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 34.28it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 26.55it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 35.37it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 29.32it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 5: 100%|██████████| 2/2 [00:00<00:00, 36.59it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 8: 100%|██████████| 2/2 [00:00<00:00, 33.74it/s]


[I 2024-07-17 17:49:30,595] Trial 61 finished with value: 0.18858144789630626 and parameters: {'lr': 0.001, 'num_heads': 4, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 64}. Best is trial 61 with value: 0.18858144789630626.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 26.39it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 21.70it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 37.05it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 29.86it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 32.30it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 25.42it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 37.54it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 32.86it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 6: 100%|██████████| 2/2 [00:00<00:00, 34.79it/s]


[I 2024-07-17 17:49:33,548] Trial 62 finished with value: 0.2708727500943576 and parameters: {'lr': 0.001, 'num_heads': 4, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 64}. Best is trial 61 with value: 0.18858144789630626.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 34.48it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 29.16it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 31.62it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 26.16it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 36.80it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 30.13it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 4: 100%|██████████| 2/2 [00:00<00:00, 39.22it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 40.13it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 34.94it/s]


[I 2024-07-17 17:49:36,267] Trial 63 finished with value: 0.19911526233070517 and parameters: {'lr': 0.001, 'num_heads': 4, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 64}. Best is trial 61 with value: 0.18858144789630626.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 40.12it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 30.31it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 34.15it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 26.96it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 30.51it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 26.48it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 64.10it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 56.52it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 61.24it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 54.39it/s]

[I 2024-07-17 17:49:39,160] Trial 64 finished with value: 0.23383833110374477 and parameters: {'lr': 0.001, 'num_heads': 4, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 64}. Best is trial 61 with value: 0.18858144789630626.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 7: 100%|██████████| 1/1 [00:00<00:00, 47.31it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 1/1 [00:00<00:00, 45.58it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 1/1 [00:00<00:00, 45.58it/s]






GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 6: 100%|██████████| 2/2 [00:00<00:00, 53.46it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 4: 100%|██████████| 2/2 [00:00<00:00, 37.68it/s]

[I 2024-07-17 17:49:40,705] Trial 65 finished with value: 1.099523532316726 and parameters: {'lr': 1e-05, 'num_heads': 4, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 64}. Best is trial 61 with value: 0.18858144789630626.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 2.1 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
135 K     Trainable params
0         Non-trainable params
135 K     Total params
0.541     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 53.03it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 44.84it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 2.1 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
135 K     Trainable params
0         Non-trainable params
135 K     Total params
0.541     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 55.57it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 45.48it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 2.1 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
135 K     Trainable params
0         Non-trainable params
135 K     Total params
0.541     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 46.61it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 36.44it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 2.1 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
135 K     Trainable params
0         Non-trainable params
135 K     Total params
0.541     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 6: 100%|██████████| 2/2 [00:00<00:00, 56.47it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 2.1 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
135 K     Trainable params
0         Non-trainable params
135 K     Total params
0.541     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 66.81it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 57.76it/s]


[I 2024-07-17 17:49:42,682] Trial 66 finished with value: 0.26244306453820376 and parameters: {'lr': 0.001, 'num_heads': 4, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 16, 'batch_size': 64}. Best is trial 61 with value: 0.18858144789630626.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 56.58it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 46.16it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 53.05it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 42.83it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 52.58it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 42.26it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 61.38it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 55.41it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 4: 100%|██████████| 2/2 [00:00<00:00, 45.32it/s]


[I 2024-07-17 17:49:44,756] Trial 67 finished with value: 0.2533099196490101 and parameters: {'lr': 0.001, 'num_heads': 2, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 64}. Best is trial 61 with value: 0.18858144789630626.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 8.6 K 
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 8.3 K 
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
91.4 K    Trainable params
0         Non-trainable params
91.4 K    Total params
0.366     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 19.94it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 17.41it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 8.6 K 
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 8.3 K 
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
91.4 K    Trainable params
0         Non-trainable params
91.4 K    Total params
0.366     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 1/1 [00:00<00:00,  5.84it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 8.6 K 
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 8.3 K 
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
91.4 K    Trainable params
0         Non-trainable params
91.4 K    Total params
0.366     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 4: 100%|██████████| 1/1 [00:00<00:00, 37.86it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 8.6 K 
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 8.3 K 
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
91.4 K    Trainable params
0         Non-trainable params
91.4 K    Total params
0.366     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 2/2 [00:00<00:00, 38.37it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 8.6 K 
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 8.3 K 
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
91.4 K    Trainable params
0         Non-trainable params
91.4 K    Total params
0.366     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 2/2 [00:00<00:00, 25.63it/s]

[I 2024-07-17 17:49:46,416] Trial 68 finished with value: 15.226121909830852 and parameters: {'lr': 0.01, 'num_heads': 4, 'dropout_prob': 0.1, 'hidden_units': 64, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 64}. Best is trial 61 with value: 0.18858144789630626.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 28.36it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 22.73it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 40.45it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 31.12it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 35.43it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 27.83it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 7: 100%|██████████| 2/2 [00:00<00:00, 33.31it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 4: 100%|██████████| 2/2 [00:00<00:00, 33.69it/s]


[I 2024-07-17 17:49:49,239] Trial 69 finished with value: 0.26160692204404373 and parameters: {'lr': 0.001, 'num_heads': 4, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 64}. Best is trial 61 with value: 0.18858144789630626.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.0 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 16.6 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
108 K     Trainable params
0         Non-trainable params
108 K     Total params
0.433     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 7: 100%|██████████| 1/1 [00:00<00:00, 31.54it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.0 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 16.6 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
108 K     Trainable params
0         Non-trainable params
108 K     Total params
0.433     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 34.05it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 29.52it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.0 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 16.6 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
108 K     Trainable params
0         Non-trainable params
108 K     Total params
0.433     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 30.67it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 25.13it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.0 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 16.6 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
108 K     Trainable params
0         Non-trainable params
108 K     Total params
0.433     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 2/2 [00:00<00:00, 36.65it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 17.0 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 16.6 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
108 K     Trainable params
0         Non-trainable params
108 K     Total params
0.433     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 37.17it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 33.72it/s]


[I 2024-07-17 17:49:51,940] Trial 70 finished with value: 0.6430470494806036 and parameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.5, 'hidden_units': 128, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 64}. Best is trial 61 with value: 0.18858144789630626.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 40.17it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 32.49it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 31.99it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 25.27it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 32.31it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 27.03it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 7: 100%|██████████| 2/2 [00:00<00:00, 37.66it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 39.15it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 34.95it/s]


[I 2024-07-17 17:49:55,049] Trial 71 finished with value: 0.18949483868029332 and parameters: {'lr': 0.001, 'num_heads': 4, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 64}. Best is trial 61 with value: 0.18858144789630626.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 36.71it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 28.61it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 31.90it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 26.78it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 31.11it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 26.81it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 5: 100%|██████████| 2/2 [00:00<00:00, 35.88it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 4: 100%|██████████| 2/2 [00:00<00:00, 36.46it/s]


[I 2024-07-17 17:49:57,749] Trial 72 finished with value: 0.20700840945363547 and parameters: {'lr': 0.001, 'num_heads': 4, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 64}. Best is trial 61 with value: 0.18858144789630626.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 45.08it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 37.89it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 31.96it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 26.76it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 32.61it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 25.87it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 8: 100%|██████████| 2/2 [00:00<00:00, 32.47it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 2/2 [00:00<00:00, 34.27it/s]


[I 2024-07-17 17:50:00,555] Trial 73 finished with value: 0.2257405125854696 and parameters: {'lr': 0.001, 'num_heads': 4, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 64}. Best is trial 61 with value: 0.18858144789630626.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 9.2 K 
1 | attention  | MultiheadAttention | 4.2 K 
2 | decoder    | Sequential         | 8.7 K 
3 | classifier | Sequential         | 2.3 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
24.5 K    Trainable params
0         Non-trainable params
24.5 K    Total params
0.098     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 43.98it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 34.73it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 9.2 K 
1 | attention  | MultiheadAttention | 4.2 K 
2 | decoder    | Sequential         | 8.7 K 
3 | classifier | Sequential         | 2.3 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
24.5 K    Trainable params
0         Non-trainable params
24.5 K    Total params
0.098     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 45.92it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 36.54it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 9.2 K 
1 | attention  | MultiheadAttention | 4.2 K 
2 | decoder    | Sequential         | 8.7 K 
3 | classifier | Sequential         | 2.3 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
24.5 K    Trainable params
0         Non-trainable params
24.5 K    Total params
0.098     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 44.03it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 37.14it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 9.2 K 
1 | attention  | MultiheadAttention | 4.2 K 
2 | decoder    | Sequential         | 8.7 K 
3 | classifier | Sequential         | 2.3 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
24.5 K    Trainable params
0         Non-trainable params
24.5 K    Total params
0.098     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 49.90it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 43.08it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 9.2 K 
1 | attention  | MultiheadAttention | 4.2 K 
2 | decoder    | Sequential         | 8.7 K 
3 | classifier | Sequential         | 2.3 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
24.5 K    Trainable params
0         Non-trainable params
24.5 K    Total params
0.098     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 6: 100%|██████████| 2/2 [00:00<00:00, 38.75it/s]

[I 2024-07-17 17:50:03,410] Trial 74 finished with value: 0.5326950982771096 and parameters: {'lr': 0.001, 'num_heads': 4, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 32, 'classifier_units': 64, 'batch_size': 64}. Best is trial 61 with value: 0.18858144789630626.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 41.93it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 36.08it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 40.91it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 37.75it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 4/4 [00:00<00:00, 41.11it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 5: 100%|██████████| 5/5 [00:00<00:00, 41.18it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 6: 100%|██████████| 6/6 [00:00<00:00, 46.55it/s]

[I 2024-07-17 17:50:07,741] Trial 75 finished with value: 0.1359277220447974 and parameters: {'lr': 0.001, 'num_heads': 4, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 16}. Best is trial 75 with value: 0.1359277220447974.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 53.01it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 46.34it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 58.46it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 54.00it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 4: 100%|██████████| 4/4 [00:00<00:00, 36.22it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 5/5 [00:00<00:00, 44.92it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 5/5 [00:00<00:00, 42.50it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 5: 100%|██████████| 6/6 [00:00<00:00, 43.70it/s]


[I 2024-07-17 17:50:12,255] Trial 76 finished with value: 0.13451480735448582 and parameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 16}. Best is trial 76 with value: 0.13451480735448582.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 2.1 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
135 K     Trainable params
0         Non-trainable params
135 K     Total params
0.541     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 59.01it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 51.03it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 2.1 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
135 K     Trainable params
0         Non-trainable params
135 K     Total params
0.541     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 40.52it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 37.69it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 2.1 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
135 K     Trainable params
0         Non-trainable params
135 K     Total params
0.541     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 6: 100%|██████████| 4/4 [00:00<00:00, 47.28it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 2.1 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
135 K     Trainable params
0         Non-trainable params
135 K     Total params
0.541     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 5/5 [00:00<00:00, 42.62it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 2.1 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
135 K     Trainable params
0         Non-trainable params
135 K     Total params
0.541     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 6: 100%|██████████| 6/6 [00:00<00:00, 56.72it/s]


[I 2024-07-17 17:50:16,491] Trial 77 finished with value: 0.2934783062880756 and parameters: {'lr': 0.001, 'num_heads': 4, 'dropout_prob': 0.3, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 16, 'batch_size': 16}. Best is trial 76 with value: 0.13451480735448582.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 8: 100%|██████████| 2/2 [00:00<00:00, 60.73it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 61.81it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 56.05it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 5: 100%|██████████| 4/4 [00:00<00:00, 60.56it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 5: 100%|██████████| 5/5 [00:00<00:00, 64.81it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 4: 100%|██████████| 6/6 [00:00<00:00, 60.23it/s]


[I 2024-07-17 17:50:19,281] Trial 78 finished with value: 1.099978349431081 and parameters: {'lr': 1e-05, 'num_heads': 2, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 16}. Best is trial 76 with value: 0.13451480735448582.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 61.31it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 53.15it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 63.38it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 56.96it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 7: 100%|██████████| 4/4 [00:00<00:00, 62.38it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 5/5 [00:00<00:00, 65.35it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 5/5 [00:00<00:00, 62.39it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 5: 100%|██████████| 6/6 [00:00<00:00, 63.81it/s]


[I 2024-07-17 17:50:23,262] Trial 79 finished with value: 0.1637085773790498 and parameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 16}. Best is trial 76 with value: 0.13451480735448582.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 40.27it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 34.69it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 37.87it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 35.10it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 6: 100%|██████████| 4/4 [00:00<00:00, 37.46it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 7: 100%|██████████| 5/5 [00:00<00:00, 43.58it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 4: 100%|██████████| 6/6 [00:00<00:00, 41.31it/s]


[I 2024-07-17 17:50:27,845] Trial 80 finished with value: 0.13831852558336405 and parameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 16}. Best is trial 76 with value: 0.13451480735448582.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 40.91it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 36.20it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 38.86it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 35.59it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 7: 100%|██████████| 4/4 [00:00<00:00, 47.81it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 5/5 [00:00<00:00, 50.51it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 5/5 [00:00<00:00, 46.60it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 5: 100%|██████████| 6/6 [00:00<00:00, 43.29it/s]


[I 2024-07-17 17:50:32,915] Trial 81 finished with value: 0.0960632309753097 and parameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 16}. Best is trial 81 with value: 0.0960632309753097.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 42.18it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 36.33it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 47.85it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 41.53it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 4: 100%|██████████| 4/4 [00:00<00:00, 39.31it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 5/5 [00:00<00:00, 40.94it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 4: 100%|██████████| 6/6 [00:00<00:00, 41.45it/s]


[I 2024-07-17 17:50:36,930] Trial 82 finished with value: 0.21326530240749447 and parameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 16}. Best is trial 81 with value: 0.0960632309753097.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 37.49it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 31.35it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 43.20it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 40.31it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 4: 100%|██████████| 4/4 [00:00<00:00, 42.86it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 4: 100%|██████████| 5/5 [00:00<00:00, 42.96it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 4: 100%|██████████| 6/6 [00:00<00:00, 42.17it/s]


[I 2024-07-17 17:50:41,135] Trial 83 finished with value: 0.12821233522876796 and parameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 16}. Best is trial 81 with value: 0.0960632309753097.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 43.92it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 37.99it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 45.55it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 42.35it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 4/4 [00:00<00:00, 37.44it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 4/4 [00:00<00:00, 35.73it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 5: 100%|██████████| 5/5 [00:00<00:00, 40.35it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 4: 100%|██████████| 6/6 [00:00<00:00, 49.09it/s]

[I 2024-07-17 17:50:46,589] Trial 84 finished with value: 0.11666364889947922 and parameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 16}. Best is trial 81 with value: 0.0960632309753097.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 39.95it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 35.60it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 41.03it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 37.44it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 6: 100%|██████████| 4/4 [00:00<00:00, 35.21it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 8: 100%|██████████| 5/5 [00:00<00:00, 40.15it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 6/6 [00:00<00:00, 33.86it/s]


[I 2024-07-17 17:50:51,894] Trial 85 finished with value: 0.14362008244313157 and parameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 16}. Best is trial 81 with value: 0.0960632309753097.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 39.00it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 35.19it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 56.03it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 50.65it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 4: 100%|██████████| 4/4 [00:00<00:00, 33.60it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 5/5 [00:00<00:00, 48.48it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 5/5 [00:00<00:00, 46.86it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 6: 100%|██████████| 6/6 [00:00<00:00, 65.38it/s]


[I 2024-07-17 17:50:56,959] Trial 86 finished with value: 0.09677550371826343 and parameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 16}. Best is trial 81 with value: 0.0960632309753097.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 4.6 K 
1 | attention  | MultiheadAttention | 4.2 K 
2 | decoder    | Sequential         | 4.4 K 
3 | classifier | Sequential         | 2.3 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
15.5 K    Trainable params
0         Non-trainable params
15.5 K    Total params
0.062     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 46.87it/s] 

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 41.90it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 4.6 K 
1 | attention  | MultiheadAttention | 4.2 K 
2 | decoder    | Sequential         | 4.4 K 
3 | classifier | Sequential         | 2.3 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
15.5 K    Trainable params
0         Non-trainable params
15.5 K    Total params
0.062     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 65.60it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 61.43it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 4.6 K 
1 | attention  | MultiheadAttention | 4.2 K 
2 | decoder    | Sequential         | 4.4 K 
3 | classifier | Sequential         | 2.3 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
15.5 K    Trainable params
0         Non-trainable params
15.5 K    Total params
0.062     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 4/4 [00:00<00:00, 73.87it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 4.6 K 
1 | attention  | MultiheadAttention | 4.2 K 
2 | decoder    | Sequential         | 4.4 K 
3 | classifier | Sequential         | 2.3 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
15.5 K    Trainable params
0         Non-trainable params
15.5 K    Total params
0.062     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 5/5 [00:00<00:00, 83.61it/s] 


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 4.6 K 
1 | attention  | MultiheadAttention | 4.2 K 
2 | decoder    | Sequential         | 4.4 K 
3 | classifier | Sequential         | 2.3 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
15.5 K    Trainable params
0         Non-trainable params
15.5 K    Total params
0.062     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 6/6 [00:00<00:00, 72.61it/s] 

[I 2024-07-17 17:50:59,532] Trial 87 finished with value: 1.1612743207325493 and parameters: {'lr': 0.01, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 128, 'embed_dim': 32, 'classifier_units': 64, 'batch_size': 16}. Best is trial 81 with value: 0.0960632309753097.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 60.71it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 54.31it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 76.44it/s] 

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 68.77it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 4: 100%|██████████| 4/4 [00:00<00:00, 59.65it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 4: 100%|██████████| 5/5 [00:00<00:00, 62.14it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 6: 100%|██████████| 6/6 [00:00<00:00, 40.49it/s]


[I 2024-07-17 17:51:03,278] Trial 88 finished with value: 0.11508932005620169 and parameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 16}. Best is trial 81 with value: 0.0960632309753097.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 8.6 K 
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 8.3 K 
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
91.4 K    Trainable params
0         Non-trainable params
91.4 K    Total params
0.366     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 4: 100%|██████████| 2/2 [00:00<00:00, 34.38it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 8.6 K 
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 8.3 K 
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
91.4 K    Trainable params
0         Non-trainable params
91.4 K    Total params
0.366     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 4: 100%|██████████| 3/3 [00:00<00:00, 34.83it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 8.6 K 
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 8.3 K 
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
91.4 K    Trainable params
0         Non-trainable params
91.4 K    Total params
0.366     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 4/4 [00:00<00:00, 43.49it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 4/4 [00:00<00:00, 40.25it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 8.6 K 
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 8.3 K 
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
91.4 K    Trainable params
0         Non-trainable params
91.4 K    Total params
0.366     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 5/5 [00:00<00:00, 42.93it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 5/5 [00:00<00:00, 40.83it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 8.6 K 
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 8.3 K 
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
91.4 K    Trainable params
0         Non-trainable params
91.4 K    Total params
0.366     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 6/6 [00:00<00:00, 46.54it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 6/6 [00:00<00:00, 44.81it/s]


[I 2024-07-17 17:51:08,755] Trial 89 finished with value: 0.6759958870532523 and parameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.5, 'hidden_units': 64, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 16}. Best is trial 81 with value: 0.0960632309753097.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 39.83it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 34.80it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 40.45it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 36.83it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 4/4 [00:00<00:00, 42.27it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 4/4 [00:00<00:00, 39.30it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 8: 100%|██████████| 5/5 [00:00<00:00, 35.50it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 6/6 [00:00<00:00, 49.90it/s]


[I 2024-07-17 17:51:13,836] Trial 90 finished with value: 0.141199752062485 and parameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 16}. Best is trial 81 with value: 0.0960632309753097.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 38.86it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 32.71it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 44.92it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 40.74it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 6: 100%|██████████| 4/4 [00:00<00:00, 43.75it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 4: 100%|██████████| 5/5 [00:00<00:00, 43.95it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 4: 100%|██████████| 6/6 [00:00<00:00, 41.67it/s]

[I 2024-07-17 17:51:18,090] Trial 91 finished with value: 0.1259503197714127 and parameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 16}. Best is trial 81 with value: 0.0960632309753097.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 42.20it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 37.95it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 43.30it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 39.96it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 4/4 [00:00<00:00, 41.64it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 5/5 [00:00<00:00, 53.11it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 5/5 [00:00<00:00, 49.62it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 6/6 [00:00<00:00, 43.01it/s]

[I 2024-07-17 17:51:22,496] Trial 92 finished with value: 0.13143346722874721 and parameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 16}. Best is trial 81 with value: 0.0960632309753097.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 40.78it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 34.97it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 44.30it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 39.98it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 7: 100%|██████████| 4/4 [00:00<00:00, 41.38it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 7: 100%|██████████| 5/5 [00:00<00:00, 41.89it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 5: 100%|██████████| 6/6 [00:00<00:00, 43.33it/s]

[I 2024-07-17 17:51:27,372] Trial 93 finished with value: 0.13737095097967894 and parameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 16}. Best is trial 81 with value: 0.0960632309753097.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 41.28it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 37.43it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 43.79it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 39.88it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 8: 100%|██████████| 4/4 [00:00<00:00, 42.77it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 5/5 [00:00<00:00, 44.95it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 5/5 [00:00<00:00, 42.46it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 6/6 [00:00<00:00, 66.08it/s]

[I 2024-07-17 17:51:32,391] Trial 94 finished with value: 0.2892074725463432 and parameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 16}. Best is trial 81 with value: 0.0960632309753097.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 59.88it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 53.48it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 61.68it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 56.72it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 6: 100%|██████████| 4/4 [00:00<00:00, 62.76it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 6: 100%|██████████| 5/5 [00:00<00:00, 63.44it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 6/6 [00:00<00:00, 66.16it/s]

[I 2024-07-17 17:51:35,441] Trial 95 finished with value: 0.16634088678213693 and parameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 16}. Best is trial 81 with value: 0.0960632309753097.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 2.1 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
135 K     Trainable params
0         Non-trainable params
135 K     Total params
0.541     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 41.15it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 37.32it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 2.1 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
135 K     Trainable params
0         Non-trainable params
135 K     Total params
0.541     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 69.19it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 63.36it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 2.1 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
135 K     Trainable params
0         Non-trainable params
135 K     Total params
0.541     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 4/4 [00:00<00:00, 50.22it/s] 

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 4/4 [00:00<00:00, 47.82it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 2.1 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
135 K     Trainable params
0         Non-trainable params
135 K     Total params
0.541     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 4: 100%|██████████| 5/5 [00:00<00:00, 61.90it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 2.1 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
135 K     Trainable params
0         Non-trainable params
135 K     Total params
0.541     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 4: 100%|██████████| 6/6 [00:00<00:00, 46.09it/s]


[I 2024-07-17 17:51:38,598] Trial 96 finished with value: 0.23832726138494068 and parameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.3, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 16, 'batch_size': 16}. Best is trial 81 with value: 0.0960632309753097.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 63.81it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 55.04it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 40.89it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 38.23it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 8: 100%|██████████| 4/4 [00:00<00:00, 40.86it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 5/5 [00:00<00:00, 42.99it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 5/5 [00:00<00:00, 40.88it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 6: 100%|██████████| 6/6 [00:00<00:00, 49.79it/s]

[I 2024-07-17 17:51:43,620] Trial 97 finished with value: 0.14384025931720487 and parameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 16}. Best is trial 81 with value: 0.0960632309753097.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 40.30it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 36.62it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 7: 100%|██████████| 3/3 [00:00<00:00, 41.85it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 3: 100%|██████████| 4/4 [00:00<00:00, 43.59it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 6: 100%|██████████| 5/5 [00:00<00:00, 38.32it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 6/6 [00:00<00:00, 46.74it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 6/6 [00:00<00:00, 44.86it/s]

[I 2024-07-17 17:51:48,513] Trial 98 finished with value: 1.0926545498486617 and parameters: {'lr': 1e-05, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 16}. Best is trial 81 with value: 0.0960632309753097.





GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 46.25it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 40.14it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 45.64it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 3/3 [00:00<00:00, 41.57it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 6: 100%|██████████| 4/4 [00:00<00:00, 39.36it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 6: 100%|██████████| 5/5 [00:00<00:00, 40.24it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 8: 100%|██████████| 6/6 [00:00<00:00, 40.77it/s]


[I 2024-07-17 17:51:53,931] Trial 99 finished with value: 0.13487873163124273 and parameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 16}. Best is trial 81 with value: 0.0960632309753097.


In [11]:
# Print best hyperparameters
print("Best hyperparameters:", study.best_params)

Best hyperparameters: {'lr': 0.001, 'num_heads': 1, 'dropout_prob': 0.1, 'hidden_units': 256, 'embed_dim': 128, 'classifier_units': 64, 'batch_size': 16}


In [12]:
ae_attention_model = AutoencoderAttentionClassifier(
    context_length=1, 
    num_classes=3, 
    num_features=2,
    lr=study.best_params['lr'],
    num_heads=study.best_params['num_heads'],
    dropout_prob=study.best_params['dropout_prob'],
    hidden_units=study.best_params['hidden_units'],
    embed_dim=study.best_params['embed_dim'],
    classifier_units=study.best_params['classifier_units']
    )

In [13]:
# Train the model
trainer = Trainer(max_epochs=20, callbacks=[checkpoint_callback])
trainer.fit(ae_attention_model, data_module.train_dataloader())

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(
  rank_zero_warn(


Epoch 19: 100%|██████████| 7/7 [00:00<00:00, 64.85it/s, v_num=42]

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 7/7 [00:00<00:00, 54.91it/s, v_num=42]


In [14]:
# Perform cross-validation
cv_results = cross_validate_model(
    data_module.val_features, 
    data_module.val_target, 
    AutoencoderAttentionClassifier, 
    context_length=1, 
    num_classes=3, 
    num_features=2,
    lr=study.best_params['lr'],
    num_heads=study.best_params['num_heads'],
    dropout_prob=study.best_params['dropout_prob'],
    hidden_units=study.best_params['hidden_units'],
    embed_dim=study.best_params['embed_dim'],
    classifier_units=study.best_params['classifier_units'])

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 49: 100%|██████████| 2/2 [00:00<00:00, 47.17it/s, v_num=43]

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 49: 100%|██████████| 2/2 [00:00<00:00, 30.31it/s, v_num=43]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(
  rank_zero_warn(


Epoch 49: 100%|██████████| 3/3 [00:00<00:00, 60.11it/s, v_num=44]

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 49: 100%|██████████| 3/3 [00:00<00:00, 38.64it/s, v_num=44]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(
  rank_zero_warn(


Epoch 49: 100%|██████████| 4/4 [00:00<00:00, 70.61it/s, v_num=45]

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 49: 100%|██████████| 4/4 [00:00<00:00, 53.95it/s, v_num=45]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(
  rank_zero_warn(


Epoch 49: 100%|██████████| 5/5 [00:00<00:00, 47.16it/s, v_num=46]

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 49: 100%|██████████| 5/5 [00:00<00:00, 39.73it/s, v_num=46]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(
  rank_zero_warn(


Epoch 49: 100%|██████████| 6/6 [00:00<00:00, 62.39it/s, v_num=47]

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 49: 100%|██████████| 6/6 [00:00<00:00, 52.69it/s, v_num=47]
Cross-Validation Classification Report:
           0.0  1.0  2.0  accuracy  macro avg  weighted avg
precision  1.0  1.0  1.0       1.0        1.0           1.0
recall     1.0  1.0  1.0       1.0        1.0           1.0
f1-score   1.0  1.0  1.0       1.0        1.0           1.0
support    7.2  5.2  3.6       1.0       16.0          16.0


In [15]:
print(cv_results)

           0.0  1.0  2.0  accuracy  macro avg  weighted avg
precision  1.0  1.0  1.0       1.0        1.0           1.0
recall     1.0  1.0  1.0       1.0        1.0           1.0
f1-score   1.0  1.0  1.0       1.0        1.0           1.0
support    7.2  5.2  3.6       1.0       16.0          16.0


In [18]:
# Perform MC Dropout predictions
mc_predictions = mc_dropout_predictions(ae_attention_model, data_module.test_dataloader())
# Calculate mean and standard deviation for uncertainty estimates
mean_predictions = mc_predictions.mean(axis=0)
std_predictions = mc_predictions.std(axis=0)
# Convert mean predictions to class labels
predicted_labels = np.argmax(mean_predictions, axis=1)

In [19]:
# Example output with probabilities and uncertainty
for i, (mean, std) in enumerate(zip(mean_predictions, std_predictions)):
    # softmax_probs = np.exp(mean) / np.sum(np.exp(mean)) # Softmax to get probabilities
    print(f'Sample {i}: Predicted Label = {predicted_labels[i]}, Probabilities = {mean}, Uncertainty (std) = {std}')

Sample 0: Predicted Label = 2, Probabilities = [8.794023e-13 2.155976e-05 9.999785e-01], Uncertainty (std) = [4.2819121e-12 4.6762838e-05 4.6762609e-05]
Sample 1: Predicted Label = 2, Probabilities = [1.6389518e-13 1.7555063e-05 9.9998277e-01], Uncertainty (std) = [1.0847810e-12 3.2813448e-05 3.2815027e-05]
Sample 2: Predicted Label = 0, Probabilities = [9.9996918e-01 3.1090702e-05 1.9146974e-13], Uncertainty (std) = [2.4332161e-04 2.4331795e-04 1.7549427e-12]
Sample 3: Predicted Label = 0, Probabilities = [9.9999928e-01 8.5255687e-07 3.7035114e-15], Uncertainty (std) = [4.3830778e-06 4.3823616e-06 3.5958063e-14]
Sample 4: Predicted Label = 2, Probabilities = [2.5558643e-13 3.4495219e-04 9.9965525e-01], Uncertainty (std) = [1.9162801e-12 2.9945208e-03 2.9945192e-03]
Sample 5: Predicted Label = 1, Probabilities = [3.3359454e-04 9.9914080e-01 5.2574102e-04], Uncertainty (std) = [0.00134689 0.00194804 0.00125005]
Sample 6: Predicted Label = 0, Probabilities = [9.9999785e-01 2.2749416e-06 

- Export the reults

In [20]:
import pandas as pd
# Save test predictions to a CSV
test_df = pd.DataFrame({
    'Prediction': predicted_labels,
    'Probability_0': [p[0] for p in mean_predictions],
    'Probability_1': [p[1] for p in mean_predictions],
    'Probability_2': [p[2] for p in mean_predictions],  # Adjust based on num_classes
    'Uncertainty_0': [u[0] for u in std_predictions],
    'Uncertainty_1': [u[1] for u in std_predictions],
    'Uncertainty_2': [u[2] for u in std_predictions] 
})

test_df.to_csv('test_predictions.csv', index=False)

- Export the model

In [21]:
from torch import save
save(ae_attention_model, 'autoencoder_attention_classifier.pth')