In [1]:
if 'autoreload' not in get_ipython().extension_manager.loaded:
    %load_ext autoreload 
%autoreload 2

import numpy as np
from os import PathLike
from typing import Union, List

from ray import tune
from ray.tune.integration.pytorch_lightning import TuneReportCallback
from ray.tune.schedulers import ASHAScheduler

from pytorch_lightning import Trainer, seed_everything
from pytorch_lightning.loggers import TensorBoardLogger
from torch.utils.data import DataLoader

import eugene as eu

Global seed set to 13


In [2]:
def hyperopt_with_tune(
    config: dict,
    sdata = None,
    target_keys: Union[str, List[str]] = None,
    train_key: str = "train_val",
    epochs: int = 10,
    gpus: int = None,
    num_workers: int = None,
    log_dir: PathLike = None,
    name: str = None,
    version: str = None,
    train_dataset: eu.dl.SeqDataset = None,
    val_dataset: eu.dl.SeqDataset = None,
    train_dataloader: DataLoader = None,
    val_dataloader: DataLoader = None,
    seq_transforms: List[str] = None,
    transform_kwargs: dict = {},
    seed: int = None,
    verbosity = None,
    **kwargs
):
    model = eu.models.get_model(config["arch"], config)
    gpus = gpus if gpus is not None else eu.settings.gpus
    num_workers = num_workers if num_workers is not None else eu.settings.dl_num_workers
    log_dir = log_dir if log_dir is not None else eu.settings.logging_dir
    name = name if name is not None else config["arch"]
    seed_everything(seed, workers=True) if seed is not None else seed_everything(eu.settings.seed)
    batch_size = config["batch_size"]
    if train_dataloader is not None:
        assert val_dataloader is not None
    elif train_dataset is not None:
        assert val_dataset is not None
        train_dataloader = DataLoader(
            train_dataset, batch_size=batch_size, num_workers=num_workers
        )
        val_dataloader = DataLoader(
            val_dataset, batch_size=batch_size, num_workers=num_workers
        )
    elif sdata is not None:
        assert target_keys is not None
        targs = sdata.seqs_annot[target_keys].values  
        if len(targs.shape) == 1:
            nan_mask = np.isnan(targs)
        else:
            nan_mask = np.any(np.isnan(targs), axis=1)
        print(f"Dropping {nan_mask.sum()} sequences with NaN targets.")
        sdata = sdata[~nan_mask]
        train_idx = np.where(sdata.seqs_annot[train_key] == True)[0]
        train_dataset = sdata[train_idx].to_dataset(
            target_keys=target_keys,
            seq_transforms=seq_transforms,
            transform_kwargs=transform_kwargs,
        )
        train_dataloader = train_dataset.to_dataloader(
            batch_size=batch_size, num_workers=num_workers, shuffle=True
        )
        val_idx = np.where(sdata.seqs_annot[train_key] == False)[0]
        val_dataset = sdata[val_idx].to_dataset(
            target_keys=target_keys,
            seq_transforms=seq_transforms,
            transform_kwargs=transform_kwargs,
        )
        val_dataloader = val_dataset.to_dataloader(
            batch_size=batch_size,
            num_workers=num_workers,
            shuffle=False,
        )
    else:
        raise ValueError("No data provided to train on.")
    logger = TensorBoardLogger(log_dir, name=name, version=version)
    callbacks = []
    metrics = {"loss": "val_loss"}
    callbacks.append(TuneReportCallback(metrics, on="validation_end"))
    print(callbacks)
    trainer = Trainer(
        max_epochs=epochs,
        gpus=gpus,
        logger=logger,
        progress_bar_refresh_rate=0,
        callbacks=callbacks,
    )
    trainer.fit(model, train_dataloaders=train_dataloader, val_dataloaders=val_dataloader)


In [3]:
sdata = eu.datasets.random1000()
eu.pp.ohe_seqs_sdata(sdata)
eu.pp.reverse_complement_seqs_sdata(sdata)
eu.pp.train_test_split_sdata(sdata)

One-hot encoding sequences:   0%|          | 0/1000 [00:00<?, ?it/s]

SeqData object modified:
	ohe_seqs: None -> 1000 ohe_seqs added
SeqData object modified:
	ohe_rev_seqs: None -> 1000 ohe_rev_seqs added
SeqData object modified:
    seqs_annot:
        + train_val


In [4]:
config = {
  "arch": "CNN",
  "input_len": 100,
  "output_dim": 1,
  "strand": "ss",
  "aggr": None,
  "lr": 1e-3,
  "batch_size": 64,
  "conv_kwargs": {
    "channels": [4, 16],
    "conv_kernels": [3],
    "pool_kernels": [2],
    "dropout_rates": [0.1]
  },
  "fc_kwargs": {
    "hidden_dims": [32]
  }
}

In [5]:
model = eu.models.get_model(config["arch"], config)
model

CNN(
  (hp_metric): R2Score()
  (convnet): BasicConv1D(
    (module): Sequential(
      (0): Conv1d(4, 16, kernel_size=(3,), stride=(1,), padding=valid)
      (1): ReLU()
      (2): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (3): Dropout(p=0.1, inplace=False)
    )
  )
  (fcn): BasicFullyConnectedModule(
    (module): Sequential(
      (0): Linear(in_features=784, out_features=32, bias=True)
      (1): ReLU()
      (2): Linear(in_features=32, out_features=1, bias=True)
    )
  )
)

In [16]:
eu.train.fit(
    model = model,
    sdata = sdata,
    target_keys = "activity_0", 
    train_key = "train_val",
    epochs = 10,
    gpus = 0,
    num_workers = 0
)

Global seed set to 13
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name      | Type                      | Params
--------------------------------------------------------
0 | hp_metric | R2Score                   | 0     
1 | convnet   | BasicConv1D               | 208   
2 | fcn       | BasicFullyConnectedModule | 25.2 K
--------------------------------------------------------
25.4 K    Trainable params
0         Non-trainable params
25.4 K    Total params
0.101     Total estimated model params size (MB)


Dropping 0 sequences with NaN targets.
No transforms given, assuming just need to tensorize.
No transforms given, assuming just need to tensorize.


Validation sanity check: 0it [00:00, ?it/s]

  f"The dataloader, {name}, does not have many workers which may be a bottleneck."
Global seed set to 13
  f"The dataloader, {name}, does not have many workers which may be a bottleneck."
  f"The number of training samples ({self.num_training_batches}) is smaller than the logging interval"


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

In [4]:
trainable = tune.with_parameters(
    hyperopt_with_tune,
    sdata=sdata,
    target_keys="activity_0",
    train_key="train_val",
    epochs=10,
    gpus=0,
    num_workers=0
)

In [5]:
tune_config = {
  "arch": "CNN",
  "input_len": 100,
  "output_dim": 1,
  "strand": tune.choice(["ss", "ds", "ts"]),
  "aggr": tune.choice(["max", "avg"]),
  "lr": tune.loguniform(1e-4, 1e-1),
  "batch_size": tune.choice([32, 64, 128]),
  "conv_kwargs": {
    "channels": [4, 16],
    "conv_kernels": [tune.choice([3, 5])],
    "pool_kernels": [tune.choice([2, 4])],
    "dropout_rates": [tune.choice([0.1, 0.2])]
  },
  "fc_kwargs": {
    "hidden_dims": [32]
  }
}

In [7]:
scheduler = ASHAScheduler(
    metric="loss",
    mode="min",
    max_t=10,
    grace_period=1,
    reduction_factor=2
)

In [8]:
analysis = tune.run(
    trainable,
    config=tune_config,
    metric="loss",
    mode="min",
    num_samples=10,
    local_dir=eu.settings.logging_dir,
    keep_checkpoints_num=1,
    checkpoint_score_attr="min-val_loss",
    name="test"
)

2022-10-19 22:58:26,560	INFO worker.py:1518 -- Started a local Ray instance.

from ray.air import session

def train(config):
    # ...
    session.report({"metric": metric}, checkpoint=checkpoint)

For more information please see https://docs.ray.io/en/master/ray-air/key-concepts.html#session



Trial name,status,loc,aggr,batch_size,conv_kwargs/conv_...,conv_kwargs/dropo...,conv_kwargs/pool_...,lr,strand,iter,total time (s),loss
hyperopt_with_tune_8b3ae_00000,TERMINATED,172.16.5.4:7710,max,128,3,0.2,2,0.000515969,ts,10,31.0604,0.0812072
hyperopt_with_tune_8b3ae_00001,TERMINATED,172.16.5.4:7808,max,32,3,0.1,2,0.0022927,ss,10,23.2306,0.0842177
hyperopt_with_tune_8b3ae_00002,TERMINATED,172.16.5.4:7919,avg,64,5,0.1,2,0.0146573,ts,10,13.4055,0.0802305
hyperopt_with_tune_8b3ae_00003,TERMINATED,172.16.5.4:8021,avg,128,5,0.2,4,0.0372549,ss,10,1.82449,0.0891374
hyperopt_with_tune_8b3ae_00004,TERMINATED,172.16.5.4:8021,max,32,3,0.1,2,0.0954421,ts,10,6.16415,0.0802142
hyperopt_with_tune_8b3ae_00005,TERMINATED,172.16.5.4:7710,max,64,3,0.1,4,0.0656628,ts,10,4.15178,0.0802464
hyperopt_with_tune_8b3ae_00006,TERMINATED,172.16.5.4:7919,avg,64,5,0.2,4,0.00775331,ss,10,3.22029,0.085105
hyperopt_with_tune_8b3ae_00007,TERMINATED,172.16.5.4:7808,max,128,5,0.2,4,0.000389489,ds,10,2.68956,0.082369
hyperopt_with_tune_8b3ae_00008,TERMINATED,172.16.5.4:7710,max,128,3,0.2,2,0.0735737,ts,10,2.39946,0.759937
hyperopt_with_tune_8b3ae_00009,TERMINATED,172.16.5.4:7808,avg,128,5,0.1,4,0.000654435,ts,10,2.29624,0.0821289


[2m[36m(pid=7710)[0m Global seed set to 13
[2m[36m(hyperopt_with_tune pid=7710)[0m Global seed set to 13
[2m[36m(hyperopt_with_tune pid=7710)[0m   f"Setting `Trainer(progress_bar_refresh_rate={progress_bar_refresh_rate})` is deprecated in v1.5 and"
[2m[36m(hyperopt_with_tune pid=7710)[0m GPU available: False, used: False
[2m[36m(hyperopt_with_tune pid=7710)[0m TPU available: False, using: 0 TPU cores
[2m[36m(hyperopt_with_tune pid=7710)[0m IPU available: False, using: 0 IPUs
[2m[36m(hyperopt_with_tune pid=7710)[0m   "The `on_keyboard_interrupt` callback hook was deprecated in v1.5 and will be removed in v1.7."
[2m[36m(hyperopt_with_tune pid=7710)[0m 
[2m[36m(hyperopt_with_tune pid=7710)[0m   | Name            | Type                      | Params
[2m[36m(hyperopt_with_tune pid=7710)[0m --------------------------------------------------------------
[2m[36m(hyperopt_with_tune pid=7710)[0m 0 | hp_metric       | R2Score                   | 0     
[2m[36m(h

[2m[36m(hyperopt_with_tune pid=7710)[0m Dropping 0 sequences with NaN targets.
[2m[36m(hyperopt_with_tune pid=7710)[0m No transforms given, assuming just need to tensorize.
[2m[36m(hyperopt_with_tune pid=7710)[0m No transforms given, assuming just need to tensorize.
[2m[36m(hyperopt_with_tune pid=7710)[0m [<ray.tune.integration.pytorch_lightning.TuneReportCallback object at 0x7fa851044790>]


[2m[36m(hyperopt_with_tune pid=7710)[0m Global seed set to 13
[2m[36m(hyperopt_with_tune pid=7710)[0m   f"The dataloader, {name}, does not have many workers which may be a bottleneck."
[2m[36m(hyperopt_with_tune pid=7710)[0m   f"The number of training samples ({self.num_training_batches}) is smaller than the logging interval"
[2m[36m(pid=7808)[0m Global seed set to 13
[2m[36m(hyperopt_with_tune pid=7808)[0m Global seed set to 13
[2m[36m(hyperopt_with_tune pid=7808)[0m   f"Setting `Trainer(progress_bar_refresh_rate={progress_bar_refresh_rate})` is deprecated in v1.5 and"
[2m[36m(hyperopt_with_tune pid=7808)[0m GPU available: False, used: False
[2m[36m(hyperopt_with_tune pid=7808)[0m TPU available: False, using: 0 TPU cores
[2m[36m(hyperopt_with_tune pid=7808)[0m IPU available: False, using: 0 IPUs
[2m[36m(hyperopt_with_tune pid=7808)[0m   "The `on_keyboard_interrupt` callback hook was deprecated in v1.5 and will be removed in v1.7."
[2m[36m(hyperopt_with_

[2m[36m(hyperopt_with_tune pid=7808)[0m Dropping 0 sequences with NaN targets.
[2m[36m(hyperopt_with_tune pid=7808)[0m No transforms given, assuming just need to tensorize.
[2m[36m(hyperopt_with_tune pid=7808)[0m No transforms given, assuming just need to tensorize.
[2m[36m(hyperopt_with_tune pid=7808)[0m [<ray.tune.integration.pytorch_lightning.TuneReportCallback object at 0x7f9f28e60f90>]


[2m[36m(pid=7919)[0m Global seed set to 13
[2m[36m(hyperopt_with_tune pid=7919)[0m Global seed set to 13
[2m[36m(hyperopt_with_tune pid=7919)[0m   f"Setting `Trainer(progress_bar_refresh_rate={progress_bar_refresh_rate})` is deprecated in v1.5 and"
[2m[36m(hyperopt_with_tune pid=7919)[0m GPU available: False, used: False
[2m[36m(hyperopt_with_tune pid=7919)[0m TPU available: False, using: 0 TPU cores
[2m[36m(hyperopt_with_tune pid=7919)[0m IPU available: False, using: 0 IPUs
[2m[36m(hyperopt_with_tune pid=7919)[0m   "The `on_keyboard_interrupt` callback hook was deprecated in v1.5 and will be removed in v1.7."
[2m[36m(hyperopt_with_tune pid=7919)[0m 
[2m[36m(hyperopt_with_tune pid=7919)[0m   | Name            | Type                      | Params
[2m[36m(hyperopt_with_tune pid=7919)[0m --------------------------------------------------------------
[2m[36m(hyperopt_with_tune pid=7919)[0m 0 | hp_metric       | R2Score                   | 0     
[2m[36m(h

[2m[36m(hyperopt_with_tune pid=7919)[0m Dropping 0 sequences with NaN targets.
[2m[36m(hyperopt_with_tune pid=7919)[0m No transforms given, assuming just need to tensorize.
[2m[36m(hyperopt_with_tune pid=7919)[0m No transforms given, assuming just need to tensorize.
[2m[36m(hyperopt_with_tune pid=7919)[0m [<ray.tune.integration.pytorch_lightning.TuneReportCallback object at 0x7fe8dc1b96d0>]


[2m[36m(pid=8021)[0m Global seed set to 13


Result for hyperopt_with_tune_8b3ae_00000:
  date: 2022-10-19_22-58-39
  done: false
  experiment_id: bee7ef4669814bf7a354bbd36ad9bd4f
  hostname: codespaces-97ce9f
  iterations_since_restore: 1
  loss: 0.1006212830543518
  node_ip: 172.16.5.4
  pid: 7710
  time_since_restore: 0.865459680557251
  time_this_iter_s: 0.865459680557251
  time_total_s: 0.865459680557251
  timestamp: 1666220319
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 8b3ae_00000
  warmup_time: 0.0035009384155273438
  
Result for hyperopt_with_tune_8b3ae_00001:
  date: 2022-10-19_22-58-48
  done: false
  experiment_id: 34146fb72cc94908923422e52e6f5993
  hostname: codespaces-97ce9f
  iterations_since_restore: 1
  loss: 0.08753872662782669
  node_ip: 172.16.5.4
  pid: 7808
  time_since_restore: 0.27142834663391113
  time_this_iter_s: 0.27142834663391113
  time_total_s: 0.27142834663391113
  timestamp: 1666220328
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 8b3ae_00001
  warmup_time: 

[2m[36m(hyperopt_with_tune pid=8021)[0m Global seed set to 13
[2m[36m(hyperopt_with_tune pid=8021)[0m   f"Setting `Trainer(progress_bar_refresh_rate={progress_bar_refresh_rate})` is deprecated in v1.5 and"
[2m[36m(hyperopt_with_tune pid=8021)[0m GPU available: False, used: False
[2m[36m(hyperopt_with_tune pid=8021)[0m TPU available: False, using: 0 TPU cores
[2m[36m(hyperopt_with_tune pid=8021)[0m IPU available: False, using: 0 IPUs
[2m[36m(hyperopt_with_tune pid=8021)[0m   "The `on_keyboard_interrupt` callback hook was deprecated in v1.5 and will be removed in v1.7."
[2m[36m(hyperopt_with_tune pid=8021)[0m 
[2m[36m(hyperopt_with_tune pid=8021)[0m   | Name      | Type                      | Params
[2m[36m(hyperopt_with_tune pid=8021)[0m --------------------------------------------------------
[2m[36m(hyperopt_with_tune pid=8021)[0m 0 | hp_metric | R2Score                   | 0     
[2m[36m(hyperopt_with_tune pid=8021)[0m 1 | convnet   | BasicConv1D     

Result for hyperopt_with_tune_8b3ae_00003:
  date: 2022-10-19_22-59-07
  done: false
  experiment_id: 18a4210a9ca8418f990c3772efc7e4f9
  hostname: codespaces-97ce9f
  iterations_since_restore: 1
  loss: 0.49401620030403137
  node_ip: 172.16.5.4
  pid: 8021
  time_since_restore: 0.24023842811584473
  time_this_iter_s: 0.24023842811584473
  time_total_s: 0.24023842811584473
  timestamp: 1666220347
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 8b3ae_00003
  warmup_time: 0.003967761993408203
  
Result for hyperopt_with_tune_8b3ae_00003:
  date: 2022-10-19_22-59-08
  done: true
  experiment_id: 18a4210a9ca8418f990c3772efc7e4f9
  experiment_tag: 3_aggr=avg,batch_size=128,0=5,0=0.2000,0=4,lr=0.0373,strand=ss
  hostname: codespaces-97ce9f
  iterations_since_restore: 10
  loss: 0.08913742005825043
  node_ip: 172.16.5.4
  pid: 8021
  time_since_restore: 1.824488878250122
  time_this_iter_s: 0.1521308422088623
  time_total_s: 1.824488878250122
  timestamp: 1666220348
  timestep

[2m[36m(hyperopt_with_tune pid=8021)[0m Global seed set to 13
[2m[36m(hyperopt_with_tune pid=8021)[0m GPU available: False, used: False
[2m[36m(hyperopt_with_tune pid=8021)[0m TPU available: False, using: 0 TPU cores
[2m[36m(hyperopt_with_tune pid=8021)[0m IPU available: False, using: 0 IPUs
[2m[36m(hyperopt_with_tune pid=8021)[0m 
[2m[36m(hyperopt_with_tune pid=8021)[0m   | Name            | Type                      | Params
[2m[36m(hyperopt_with_tune pid=8021)[0m --------------------------------------------------------------
[2m[36m(hyperopt_with_tune pid=8021)[0m 0 | hp_metric       | R2Score                   | 0     
[2m[36m(hyperopt_with_tune pid=8021)[0m 1 | convnet         | BasicConv1D               | 208   
[2m[36m(hyperopt_with_tune pid=8021)[0m 2 | reverse_convnet | BasicConv1D               | 208   
[2m[36m(hyperopt_with_tune pid=8021)[0m 3 | fcn             | BasicFullyConnectedModule | 25.2 K
[2m[36m(hyperopt_with_tune pid=8021)[0m 4 

Result for hyperopt_with_tune_8b3ae_00000:
  date: 2022-10-19_22-59-09
  done: true
  experiment_id: bee7ef4669814bf7a354bbd36ad9bd4f
  experiment_tag: 0_aggr=max,batch_size=128,0=3,0=0.2000,0=2,lr=0.0005,strand=ts
  hostname: codespaces-97ce9f
  iterations_since_restore: 10
  loss: 0.08120723813772202
  node_ip: 172.16.5.4
  pid: 7710
  time_since_restore: 31.060357809066772
  time_this_iter_s: 0.2776455879211426
  time_total_s: 31.060357809066772
  timestamp: 1666220349
  timesteps_since_restore: 0
  training_iteration: 10
  trial_id: 8b3ae_00000
  warmup_time: 0.0035009384155273438
  
[2m[36m(hyperopt_with_tune pid=7710)[0m Dropping 0 sequences with NaN targets.
[2m[36m(hyperopt_with_tune pid=7710)[0m No transforms given, assuming just need to tensorize.
[2m[36m(hyperopt_with_tune pid=7710)[0m No transforms given, assuming just need to tensorize.
[2m[36m(hyperopt_with_tune pid=7710)[0m [<ray.tune.integration.pytorch_lightning.TuneReportCallback object at 0x7fa8502c6bd0>]

[2m[36m(hyperopt_with_tune pid=7710)[0m Global seed set to 13
[2m[36m(hyperopt_with_tune pid=7710)[0m GPU available: False, used: False
[2m[36m(hyperopt_with_tune pid=7710)[0m TPU available: False, using: 0 TPU cores
[2m[36m(hyperopt_with_tune pid=7710)[0m IPU available: False, using: 0 IPUs
[2m[36m(hyperopt_with_tune pid=7710)[0m 
[2m[36m(hyperopt_with_tune pid=7710)[0m   | Name            | Type                      | Params
[2m[36m(hyperopt_with_tune pid=7710)[0m --------------------------------------------------------------
[2m[36m(hyperopt_with_tune pid=7710)[0m 0 | hp_metric       | R2Score                   | 0     
[2m[36m(hyperopt_with_tune pid=7710)[0m 1 | convnet         | BasicConv1D               | 208   
[2m[36m(hyperopt_with_tune pid=7710)[0m 2 | reverse_convnet | BasicConv1D               | 208   
[2m[36m(hyperopt_with_tune pid=7710)[0m 3 | fcn             | BasicFullyConnectedModule | 12.4 K
[2m[36m(hyperopt_with_tune pid=7710)[0m 4 

Result for hyperopt_with_tune_8b3ae_00005:
  date: 2022-10-19_22-59-10
  done: false
  experiment_id: bee7ef4669814bf7a354bbd36ad9bd4f
  hostname: codespaces-97ce9f
  iterations_since_restore: 1
  loss: 0.49822548031806946
  node_ip: 172.16.5.4
  pid: 7710
  time_since_restore: 0.7911510467529297
  time_this_iter_s: 0.7911510467529297
  time_total_s: 0.7911510467529297
  timestamp: 1666220350
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 8b3ae_00005
  warmup_time: 0.0035009384155273438
  
Result for hyperopt_with_tune_8b3ae_00002:
  date: 2022-10-19_22-59-10
  done: true
  experiment_id: b17153f5c6e14c5c976cbcadd31cc891
  experiment_tag: 2_aggr=avg,batch_size=64,0=5,0=0.1000,0=2,lr=0.0147,strand=ts
  hostname: codespaces-97ce9f
  iterations_since_restore: 10
  loss: 0.08023054152727127
  node_ip: 172.16.5.4
  pid: 7919
  time_since_restore: 13.405469179153442
  time_this_iter_s: 0.4621865749359131
  time_total_s: 13.405469179153442
  timestamp: 1666220350
  timesteps

[2m[36m(hyperopt_with_tune pid=7919)[0m Global seed set to 13
[2m[36m(hyperopt_with_tune pid=7919)[0m GPU available: False, used: False
[2m[36m(hyperopt_with_tune pid=7919)[0m TPU available: False, using: 0 TPU cores
[2m[36m(hyperopt_with_tune pid=7919)[0m IPU available: False, using: 0 IPUs
[2m[36m(hyperopt_with_tune pid=7919)[0m 
[2m[36m(hyperopt_with_tune pid=7919)[0m   | Name      | Type                      | Params
[2m[36m(hyperopt_with_tune pid=7919)[0m --------------------------------------------------------
[2m[36m(hyperopt_with_tune pid=7919)[0m 0 | hp_metric | R2Score                   | 0     
[2m[36m(hyperopt_with_tune pid=7919)[0m 1 | convnet   | BasicConv1D               | 336   
[2m[36m(hyperopt_with_tune pid=7919)[0m 2 | fcn       | BasicFullyConnectedModule | 12.4 K
[2m[36m(hyperopt_with_tune pid=7919)[0m --------------------------------------------------------
[2m[36m(hyperopt_with_tune pid=7919)[0m 12.7 K    Trainable params
[2m

[2m[36m(hyperopt_with_tune pid=7808)[0m Dropping 0 sequences with NaN targets.
[2m[36m(hyperopt_with_tune pid=7808)[0m No transforms given, assuming just need to tensorize.
[2m[36m(hyperopt_with_tune pid=7808)[0m No transforms given, assuming just need to tensorize.
[2m[36m(hyperopt_with_tune pid=7808)[0m [<ray.tune.integration.pytorch_lightning.TuneReportCallback object at 0x7f9f28f0cad0>]
Result for hyperopt_with_tune_8b3ae_00006:
  date: 2022-10-19_22-59-11
  done: false
  experiment_id: b17153f5c6e14c5c976cbcadd31cc891
  hostname: codespaces-97ce9f
  iterations_since_restore: 1
  loss: 0.08371169865131378
  node_ip: 172.16.5.4
  pid: 7919
  time_since_restore: 0.3613247871398926
  time_this_iter_s: 0.3613247871398926
  time_total_s: 0.3613247871398926
  timestamp: 1666220351
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 8b3ae_00006
  warmup_time: 0.017160892486572266
  


[2m[36m(hyperopt_with_tune pid=7808)[0m Global seed set to 13
[2m[36m(hyperopt_with_tune pid=7808)[0m   f"The number of training samples ({self.num_training_batches}) is smaller than the logging interval"


Result for hyperopt_with_tune_8b3ae_00007:
  date: 2022-10-19_22-59-11
  done: false
  experiment_id: 34146fb72cc94908923422e52e6f5993
  hostname: codespaces-97ce9f
  iterations_since_restore: 1
  loss: 0.12046148627996445
  node_ip: 172.16.5.4
  pid: 7808
  time_since_restore: 0.3523218631744385
  time_this_iter_s: 0.3523218631744385
  time_total_s: 0.3523218631744385
  timestamp: 1666220351
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 8b3ae_00007
  warmup_time: 0.003590106964111328
  
Result for hyperopt_with_tune_8b3ae_00005:
  date: 2022-10-19_22-59-13
  done: true
  experiment_id: bee7ef4669814bf7a354bbd36ad9bd4f
  experiment_tag: 5_aggr=max,batch_size=64,0=3,0=0.1000,0=4,lr=0.0657,strand=ts
  hostname: codespaces-97ce9f
  iterations_since_restore: 10
  loss: 0.0802464485168457
  node_ip: 172.16.5.4
  pid: 7710
  time_since_restore: 4.151783466339111
  time_this_iter_s: 0.3170301914215088
  time_total_s: 4.151783466339111
  timestamp: 1666220353
  timesteps_sin

[2m[36m(hyperopt_with_tune pid=7710)[0m Global seed set to 13
[2m[36m(hyperopt_with_tune pid=7710)[0m GPU available: False, used: False
[2m[36m(hyperopt_with_tune pid=7710)[0m TPU available: False, using: 0 TPU cores
[2m[36m(hyperopt_with_tune pid=7710)[0m IPU available: False, using: 0 IPUs
[2m[36m(hyperopt_with_tune pid=7710)[0m 
[2m[36m(hyperopt_with_tune pid=7710)[0m   | Name            | Type                      | Params
[2m[36m(hyperopt_with_tune pid=7710)[0m --------------------------------------------------------------
[2m[36m(hyperopt_with_tune pid=7710)[0m 0 | hp_metric       | R2Score                   | 0     
[2m[36m(hyperopt_with_tune pid=7710)[0m 1 | convnet         | BasicConv1D               | 208   
[2m[36m(hyperopt_with_tune pid=7710)[0m 2 | reverse_convnet | BasicConv1D               | 208   
[2m[36m(hyperopt_with_tune pid=7710)[0m 3 | fcn             | BasicFullyConnectedModule | 25.2 K
[2m[36m(hyperopt_with_tune pid=7710)[0m 4 

Result for hyperopt_with_tune_8b3ae_00007:
  date: 2022-10-19_22-59-13
  done: true
  experiment_id: 34146fb72cc94908923422e52e6f5993
  experiment_tag: 7_aggr=max,batch_size=128,0=5,0=0.2000,0=4,lr=0.0004,strand=ds
  hostname: codespaces-97ce9f
  iterations_since_restore: 10
  loss: 0.08236898481845856
  node_ip: 172.16.5.4
  pid: 7808
  time_since_restore: 2.689558744430542
  time_this_iter_s: 0.2525146007537842
  time_total_s: 2.689558744430542
  timestamp: 1666220353
  timesteps_since_restore: 0
  training_iteration: 10
  trial_id: 8b3ae_00007
  warmup_time: 0.003590106964111328
  
[2m[36m(hyperopt_with_tune pid=7808)[0m Dropping 0 sequences with NaN targets.
[2m[36m(hyperopt_with_tune pid=7808)[0m No transforms given, assuming just need to tensorize.
[2m[36m(hyperopt_with_tune pid=7808)[0m No transforms given, assuming just need to tensorize.
[2m[36m(hyperopt_with_tune pid=7808)[0m [<ray.tune.integration.pytorch_lightning.TuneReportCallback object at 0x7f9f28f24b50>]


[2m[36m(hyperopt_with_tune pid=7808)[0m Global seed set to 13
[2m[36m(hyperopt_with_tune pid=7808)[0m GPU available: False, used: False
[2m[36m(hyperopt_with_tune pid=7808)[0m TPU available: False, using: 0 TPU cores
[2m[36m(hyperopt_with_tune pid=7808)[0m IPU available: False, using: 0 IPUs
[2m[36m(hyperopt_with_tune pid=7808)[0m 
[2m[36m(hyperopt_with_tune pid=7808)[0m   | Name            | Type                      | Params
[2m[36m(hyperopt_with_tune pid=7808)[0m --------------------------------------------------------------
[2m[36m(hyperopt_with_tune pid=7808)[0m 0 | hp_metric       | R2Score                   | 0     
[2m[36m(hyperopt_with_tune pid=7808)[0m 1 | convnet         | BasicConv1D               | 336   
[2m[36m(hyperopt_with_tune pid=7808)[0m 2 | reverse_convnet | BasicConv1D               | 336   
[2m[36m(hyperopt_with_tune pid=7808)[0m 3 | fcn             | BasicFullyConnectedModule | 12.4 K
[2m[36m(hyperopt_with_tune pid=7808)[0m 4 

Result for hyperopt_with_tune_8b3ae_00008:
  date: 2022-10-19_22-59-14
  done: false
  experiment_id: bee7ef4669814bf7a354bbd36ad9bd4f
  hostname: codespaces-97ce9f
  iterations_since_restore: 1
  loss: 0.6516900658607483
  node_ip: 172.16.5.4
  pid: 7710
  time_since_restore: 0.4845616817474365
  time_this_iter_s: 0.4845616817474365
  time_total_s: 0.4845616817474365
  timestamp: 1666220354
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 8b3ae_00008
  warmup_time: 0.0035009384155273438
  
Result for hyperopt_with_tune_8b3ae_00006:
  date: 2022-10-19_22-59-14
  done: true
  experiment_id: b17153f5c6e14c5c976cbcadd31cc891
  experiment_tag: 6_aggr=avg,batch_size=64,0=5,0=0.2000,0=4,lr=0.0078,strand=ss
  hostname: codespaces-97ce9f
  iterations_since_restore: 10
  loss: 0.08510497957468033
  node_ip: 172.16.5.4
  pid: 7919
  time_since_restore: 3.2202892303466797
  time_this_iter_s: 0.44707512855529785
  time_total_s: 3.2202892303466797
  timestamp: 1666220354
  timesteps

2022-10-19 22:59:16,563	INFO tune.py:759 -- Total run time: 48.71 seconds (48.29 seconds for the tuning loop).


In [9]:
analysis.results_df

Unnamed: 0_level_0,loss,time_this_iter_s,done,timesteps_total,episodes_total,training_iteration,experiment_id,date,timestamp,time_total_s,...,config/output_dim,config/strand,config/aggr,config/lr,config/batch_size,config/conv_kwargs/channels,config/conv_kwargs/conv_kernels,config/conv_kwargs/pool_kernels,config/conv_kwargs/dropout_rates,config/fc_kwargs/hidden_dims
trial_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
8b3ae_00000,0.081207,0.277646,True,,,10,bee7ef4669814bf7a354bbd36ad9bd4f,2022-10-19_22-59-09,1666220349,31.060358,...,1,ts,max,0.000516,128,"[4, 16]",[3],[2],[0.2],[32]
8b3ae_00001,0.084218,0.564788,True,,,10,34146fb72cc94908923422e52e6f5993,2022-10-19_22-59-11,1666220351,23.230573,...,1,ss,max,0.002293,32,"[4, 16]",[3],[2],[0.1],[32]
8b3ae_00002,0.080231,0.462187,True,,,10,b17153f5c6e14c5c976cbcadd31cc891,2022-10-19_22-59-10,1666220350,13.405469,...,1,ts,avg,0.014657,64,"[4, 16]",[5],[2],[0.1],[32]
8b3ae_00003,0.089137,0.152131,True,,,10,18a4210a9ca8418f990c3772efc7e4f9,2022-10-19_22-59-08,1666220348,1.824489,...,1,ss,avg,0.037255,128,"[4, 16]",[5],[4],[0.2],[32]
8b3ae_00004,0.080214,0.392224,True,,,10,18a4210a9ca8418f990c3772efc7e4f9,2022-10-19_22-59-15,1666220355,6.164149,...,1,ts,max,0.095442,32,"[4, 16]",[3],[2],[0.1],[32]
8b3ae_00005,0.080246,0.31703,True,,,10,bee7ef4669814bf7a354bbd36ad9bd4f,2022-10-19_22-59-13,1666220353,4.151783,...,1,ts,max,0.065663,64,"[4, 16]",[3],[4],[0.1],[32]
8b3ae_00006,0.085105,0.447075,True,,,10,b17153f5c6e14c5c976cbcadd31cc891,2022-10-19_22-59-14,1666220354,3.220289,...,1,ss,avg,0.007753,64,"[4, 16]",[5],[4],[0.2],[32]
8b3ae_00007,0.082369,0.252515,True,,,10,34146fb72cc94908923422e52e6f5993,2022-10-19_22-59-13,1666220353,2.689559,...,1,ds,max,0.000389,128,"[4, 16]",[5],[4],[0.2],[32]
8b3ae_00008,0.759937,0.186556,True,,,10,bee7ef4669814bf7a354bbd36ad9bd4f,2022-10-19_22-59-16,1666220356,2.399456,...,1,ts,max,0.073574,128,"[4, 16]",[3],[2],[0.2],[32]
8b3ae_00009,0.082129,0.159029,True,,,10,34146fb72cc94908923422e52e6f5993,2022-10-19_22-59-16,1666220356,2.296237,...,1,ts,avg,0.000654,128,"[4, 16]",[5],[4],[0.1],[32]


In [11]:
analysis.results_df.columns

Index(['loss', 'time_this_iter_s', 'done', 'timesteps_total', 'episodes_total',
       'training_iteration', 'experiment_id', 'date', 'timestamp',
       'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore',
       'timesteps_since_restore', 'iterations_since_restore', 'warmup_time',
       'experiment_tag', 'config/arch', 'config/input_len',
       'config/output_dim', 'config/strand', 'config/aggr', 'config/lr',
       'config/batch_size', 'config/conv_kwargs/channels',
       'config/conv_kwargs/conv_kernels', 'config/conv_kwargs/pool_kernels',
       'config/conv_kwargs/dropout_rates', 'config/fc_kwargs/hidden_dims'],
      dtype='object')

---

In [9]:
def train_tune(config, train_dataset, val_dataset, epochs=10, gpus=0):
  model =  eu.models.FCN(
    input_len=100, 
    output_dim=1, 
    lr=config["lr"]
    )
  train_dl = train_dataset.to_dataloader(batch_size=config["batch_size"])
  val_dl = val_dataset.to_dataloader(batch_size=config["batch_size"])
  trainer = Trainer(
    max_epochs=epochs,
    gpus=gpus,
    progress_bar_refresh_rate=0,
    callbacks=[callback])
  trainer.fit(model, train_dataloaders=train_dl, val_dataloaders=val_dl)

In [10]:
trainable = tune.with_parameters(
    train_tune,
    train_dataset=sdataset_train,
    val_dataset=sdataset_val,
    epochs=10,
    gpus=0
)

In [48]:
best_trial = analysis.best_trial  # Get best trial
best_config = analysis.best_config  # Get best trial's hyperparameters
best_logdir = analysis.best_logdir  # Get best trial's logdir
best_checkpoint = analysis.best_checkpoint  # Get best trial's best checkpoint
best_result = analysis.best_result  # Get best trial's last results

2022-10-19 03:34:19,999	ERROR experiment_analysis.py:486 -- No checkpoints have been found for trial train_tune_86a07_00000.


In [49]:
best_trial

train_tune_86a07_00000

In [50]:
best_config

{'lr': 0.00010000831284081109, 'batch_size': 32}

In [51]:
best_logdir

'/home/vscode/ray_results/test/train_tune_86a07_00000_0_batch_size=32,lr=0.0001_2022-10-19_03-31-32'

In [52]:
best_checkpoint

In [53]:
best_result

{'loss': 0.11025925725698471,
 'time_this_iter_s': 0.11937999725341797,
 'done': True,
 'timesteps_total': None,
 'episodes_total': None,
 'training_iteration': 10,
 'trial_id': '86a07_00000',
 'experiment_id': '299e91b7003d4457b77be9e3b61a2294',
 'date': '2022-10-19_03-31-51',
 'timestamp': 1666150311,
 'time_total_s': 10.0547194480896,
 'pid': 23527,
 'hostname': 'codespaces-97ce9f',
 'node_ip': '172.16.5.4',
 'config': {'lr': 0.00010000831284081109, 'batch_size': 32},
 'time_since_restore': 10.0547194480896,
 'timesteps_since_restore': 0,
 'iterations_since_restore': 10,
 'warmup_time': 0.0036079883575439453,
 'experiment_tag': '0_batch_size=32,lr=0.0001'}

Bad pipe message: %s [b'Z\xe4~\xb8\x0c\\\x08\x1b`', b'\x93\xa5x\xa8\xf9\x18\xaf \xc7\x85\x1f\xd3\x93x\x86k9\xcd\xfe C\xf5\x80\xd7\xd2\x85\xaeE\x0e_\xbeHF\xe6\xc2p\xe0`W\xab\x00\x08\x13\x02\x13\x03\x13\x01\x00\xff\x01\x00\x00', b'\x00\x00\x0e\x00\x0c\x00\x00\t127.0.0.1\x00\x0b\x00\x04\x03\x00\x01\x02\x00\n\x00\x0c\x00\n\x00\x1d\x00\x17\x00\x1e\x00\x19\x00\x18\x00#\x00\x00\x00\x16\x00\x00\x00\x17\x00\x00\x00\r\x00\x1e\x00\x1c\x04\x03\x05\x03\x06\x03\x08\x07\x08\x08\x08\t\x08\n\x08\x0b\x08\x04\x08\x05\x08\x06\x04\x01\x05\x01\x06\x01\x00+\x00\x03\x02\x03\x04\x00-\x00\x02\x01\x01\x003\x00&\x00$\x00\x1d\x00 /\x934\x9f\xd5\x8eo7\x8d\xe5rv\xbc1z^B\x17%\x86\x00\x9cn\x82\xebF\xf6*Qy\t\x1e']
Bad pipe message: %s [b"q8\xf8\xce\xd9O\xe02\xaeh\x96\xe3\ru'\xa7\xff\xba O\xe0\xf9\x88\xe3\x04{\x1em\xf6\xe3Zt\xb5\x8e\xa3-\xd1\xba=\xd1`-\xfdC\xca\xde$\x1a\x85V\xc6\x00\x08\x13\x02\x13"]
Bad pipe message: %s [b'\x12\xae\xc8\xf1gV\r\x002\xe63\x8d\x9e\x1b\x14>O\xcc\x00\x00|\xc0,\xc00\x00\xa3\x00\x9f\xcc\xa9\x