In [1]:
import data.audio_dataset_v3 as data_utils
import models.inversion_v1 as model_utils
from abstract_model import AbstractModel

import os

import torch
import torch.nn as nn
from torch import optim

from argparse import Namespace
import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning import Trainer, seed_everything
from pytorch_lightning.callbacks import ModelCheckpoint

from ax.plot.contour import plot_contour
from ax.plot.trace import optimization_trace_single_method
from ax.service.managed_loop import optimize
from ax.utils.notebook.plotting import render, init_notebook_plotting

from ax.service.ax_client import AxClient

from pathlib import Path

from ipywidgets import IntProgress

In [2]:
from tensorboard.backend.event_processing import event_accumulator
import numpy as np

def _load_run(path):
    event_acc = event_accumulator.EventAccumulator(path)
    event_acc.Reload()
    data = {}

    for tag in sorted(event_acc.Tags()["scalars"]):
        x, y = [], []

        for scalar_event in event_acc.Scalars(tag):
            x.append(scalar_event.step)
            y.append(scalar_event.value)

        data[tag] = (np.asarray(x), np.asarray(y))
    return data

In [3]:
algo = "inversion_v1"
data_path = "/scratch/prs392/incubator/data/LibriSpeech/"
checkpoint_path = f"/scratch/prs392/incubator/checkpoints/openl3_librispeech/{algo}/"
experiment_name = "overfitting_10_audio_files_with_l1_loss"

In [4]:
d = os.path.join(checkpoint_path, experiment_name)
Path(d).mkdir(parents=True, exist_ok=True)
versions = [o for o in os.listdir(d) if os.path.isdir(os.path.join(d,o))]
versions = sorted(versions)

In [9]:
from pytorch_lightning.core.saving import load_hparams_from_yaml

list_existing_hparams = []
list_of_val_loss = []

for version in versions:
    hparam_path = os.path.join(checkpoint_path, experiment_name, version, 'hparams.yaml')
    hparams_new = load_hparams_from_yaml(hparam_path)
    
    print(hparams_new)
    print(min(_load_run(os.path.join(checkpoint_path, experiment_name, version))['val_loss'][1]))
    hparams_new.pop("train_num_audios", None)    
    hparams_new.pop("val_num_audios", None)    
    hparams_new.pop("test_num_audios", None)    
    
    list_existing_hparams.append(hparams_new)
    list_of_val_loss.append(min(_load_run(os.path.join(checkpoint_path, experiment_name, version))['val_loss'][1]))
    

In [10]:
list_existing_hparams, list_of_val_loss

([], [])

In [11]:
def train_evaluate(parameterization):
    
    AudioDataset = data_utils.AudioDataset
    InversionV1 = model_utils.InversionV1
    
    data_paths = {}
    data_paths['train'] = os.path.join(data_path, 'train-clean-360')
    data_paths['val'] = os.path.join(data_path, 'dev-clean')
    data_paths['test'] = os.path.join(data_path, 'test-clean')
    
    seed_everything(123)
    
    print(parameterization)
    
    for idx, existing_hparams in enumerate(list_existing_hparams):
        shared_items = {k: existing_hparams[k] for k in existing_hparams if k in parameterization and existing_hparams[k] == parameterization[k]}
        
        if len(existing_hparams) == len(shared_items):
            print("Val loss: " + str(list_of_val_loss[idx]))
            return {'val_loss' : (list_of_val_loss[idx], 0.0)}
        
    parameterization['train_num_audios'] = 10
    parameterization['val_num_audios'] = 10
    parameterization['test_num_audios'] = 10
    
    hparams = Namespace(**parameterization)
    
    
    
    model = AbstractModel(
                hparams=hparams,
                data_paths = data_paths, 
                dataset_model = AudioDataset,
                model = InversionV1(), 
                criterion = nn.L1Loss()
            )
    logger = TensorBoardLogger(checkpoint_path, name=experiment_name)
    
    checkpoint_callback = ModelCheckpoint(
        filepath=None,
        save_top_k=True,
        save_last = True,
        verbose=True,
        monitor='val_loss',
        mode='min',
        prefix=''
    )
    
    trainer = Trainer(
        logger=logger,
        default_root_dir=checkpoint_path,
        checkpoint_callback = checkpoint_callback,
        gpus = -1,
        distributed_backend='dp',
        max_epochs=100,
        check_val_every_n_epoch=1,
        fast_dev_run=False
    )
    trainer.fit(model)
    trainer.test(model)
    print("Val loss: " + str(model.best_validation_loss))
    return {'val_loss' : (model.best_validation_loss, 0.0)}

In [12]:
ax_client = AxClient()
ax_client.create_experiment(
    name="choose_optimizer_scheduler",
#     args = {
#         'batch_size': 4, # Compulsory
#         'lr': 0.02, # Compulsory
#         'scheduler_epoch': 3, # Compulsory
#         'scheduler_step_size': 0.1, # Compulsory
#         'train_num_audios': 10,
#         'val_num_audios': 10,
#         'test_num_audios': 10,
#     }
    parameters=[
        {"name": "batch_size", "type": "choice", "values": [8, 16, 32]},
        {"name": "lr", "type": "range", "bounds": [1e-6, 0.1], "log_scale": True},
        {"name": "lr_type", "type": "choice", "values": ['adam', 'sgd']},
        {"name": "scheduler_epoch", "type": "choice", "values": [3, 5, 7, 9]},
        {"name": "scheduler_step_size", "type": "range", "bounds": [0.1, 1.0]},
    ],
    objective_name="val_loss",
    minimize=True,
)

[INFO 07-08 23:23:36] ax.service.ax_client: Starting optimization with verbose logging. To disable logging, set the `verbose_logging` argument to `False`. Note that float values in the logs are rounded to 2 decimal points.
[INFO 07-08 23:23:36] ax.modelbridge.dispatch_utils: Using Sobol generation strategy.


In WithDBSettings, db settings: None


In [13]:
total_number_of_trials = 100

for params in list_existing_hparams:
    parameters, trial_index = ax_client.attach_trial(params)
    ax_client.complete_trial(trial_index=trial_index, raw_data=train_evaluate(parameters))
    total_number_of_trials -= 1
    
for _ in range(total_number_of_trials - len(list_existing_hparams)):        
    parameters, trial_index = ax_client.get_next_trial()
    ax_client.complete_trial(trial_index=trial_index, raw_data=train_evaluate(parameters))
    

[INFO 07-08 23:23:43] ax.service.ax_client: Generated new trial 0 with parameters {'lr': 0.0, 'scheduler_step_size': 0.63, 'batch_size': 16, 'lr_type': 'sgd', 'scheduler_epoch': 7}.


{'lr': 0.001214606426666185, 'scheduler_step_size': 0.6331423196010292, 'batch_size': 16, 'lr_type': 'sgd', 'scheduler_epoch': 7}


NameError: name 'ModelCheckpoint' is not defined