In [9]:
import silence_tensorflow.auto
import tensorflow as tf
import ray
import os
import pandas as pd
from epigenomic_dataset import load_all_tasks
from epigenomic_dataset.utils import normalize_epigenomic_data
from crr_prediction.baseline_models import deep_enhancers
from crr_prediction.sequences import build_mlp_sequences
from crr_prediction.meta_models import build_mlp_meta_model
from crr_prediction.train_meta_models import train
from ucsc_genomes_downloader import Genome
from meta_models.tuner import RayHyperOptTuner
from meta_models.utils import stratified_holdouts, get_minimum_gpu_rate_per_trial,enable_subgpu_training
from plot_keras_history import plot_history

In [2]:
from meta_models.utils import patch_global_checkpoints_interval
patch_global_checkpoints_interval()

In [3]:
# Which data to load
window_size=256
cell_line="GM12878"
# Holdouts stuff
random_state=42
test_size=0.2
valid_size=0.2
n_splits=2
# BO
num_samples=10
random_search_steps=5
#
batch_size = 256

In [4]:
# Starting up Ray
ray.init()

2021-01-18 08:16:32,071	INFO services.py:1174 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


{'node_ip_address': '192.168.127.128',
 'raylet_ip_address': '192.168.127.128',
 'redis_address': '192.168.127.128:6379',
 'object_store_address': '/tmp/ray/session_2021-01-18_08-16-31_454960_4372/sockets/plasma_store',
 'raylet_socket_name': '/tmp/ray/session_2021-01-18_08-16-31_454960_4372/sockets/raylet',
 'webui_url': '127.0.0.1:8265',
 'session_dir': '/tmp/ray/session_2021-01-18_08-16-31_454960_4372',
 'metrics_export_port': 41854,
 'node_id': 'e331ff4b3fac87f4a27d2541a40d002fb5211d19de6b9fa4c04d5b10'}

In [10]:
enable_subgpu_training()

In [12]:
build_sequences = build_mlp_sequences
build_meta_model = build_mlp_meta_model
total_threads = 80
resolution = 10
for (X, y), task in load_all_tasks(
    cell_line=cell_line,
    window_size=window_size,
):
    for holdout_number, train_x, test_x, train_y, test_y in stratified_holdouts(
        n_splits=n_splits,
        random_state=random_state,
        train_size=1-test_size,
        X=X,
        y=y,
        task_name=task
    ):
        subtrain_x, valid_x, subtrain_y, valid_y = stratified_holdouts(
            random_state=random_state,
            train_size=1-valid_size,
            X=train_x,
            y=train_y
        )
        train, test, subtrain, valid = build_sequences(
            train_x=train_x,
            test_x=test_x,
            subtrain_x=subtrain_x,
            valid_x=valid_x,
            train_y=train_y,
            test_y=test_y,
            subtrain_y=subtrain_y,
            valid_y=valid_y,
            batch_size=batch_size,
            random_state=random_state
        )
        tuner = RayHyperOptTuner(
            meta_model=build_meta_model(subtrain.features),
            resolution=resolution
        )
        tuning_analysis = tuner.tune(
            train=subtrain.rasterize(verbose=False),
            validation_data=valid.rasterize(verbose=False),
            name=f"{task}-{holdout_number}",
            num_samples=num_samples,
            random_search_steps=random_search_steps,
            total_threads=total_threads,
            verbose=1
        )

        os.makedirs(f"results/tuning_analyses/{task}/{cell_line}", exist_ok=True)
        tuning_analysis.to_csv(
            f"results/tuning_analyses/{task}/{cell_line}/{holdout_number}.csv",
            index=False
        )
        
        history = tuner.fit(train=train.rasterize(verbose=False), verbose=True)

        os.makedirs(f"results/training_histories/{task}/{cell_line}", exist_ok=True)
        history.to_csv(
            f"results/training_histories/{task}/{cell_line}/{holdout_number}.csv",
            index=False
        )

        train_performance = tuner.evaluate(train, verbose=False)
        test_performance = tuner.evaluate(test, verbose=False)
        subtrain_performance = tuner.evaluate(subtrain, verbose=False)
        valid_performance = tuner.evaluate(valid, verbose=False)

        metadata = {
            "task": task,
            "cell_line": cell_line,
            "holdout_number": holdout_number,
            **tuner.optimal_configuration
        }

        performance = pd.DataFrame([
            {
                "run_type": "train",
                **metadata,
                **train_performance,
            },
            {
                "run_type": "test",
                **metadata,
                **test_performance,
            },
            {
                "run_type": "subtrain",
                **metadata,
                **subtrain_performance,
            },
            {
                "run_type": "valid",
                **metadata,
                **valid_performance,
            }
        ])

ERRO:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/IPython/core/interactiveshell.py", line 3319, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-12-6a858be0aec2>", line 46, in <module>
    verbose=1
  File "/usr/local/lib/python3.6/dist-packages/meta_models/tuner/ray_tuner.py", line 142, in tune
    verbose=verbose
  File "/usr/local/lib/python3.6/dist-packages/ray/tune/tune.py", line 420, in run
    runner.step()
  File "/usr/local/lib/python3.6/dist-packages/ray/tune/trial_runner.py", line 361, in step
    self._process_events()  # blocking
  File "/usr/local/lib/python3.6/dist-packages/ray/tune/trial_runner.py", line 478, in _process_events
    trial = self.trial_executor.get_next_available_trial()  # blocking
  File "/usr/local/lib/python3.6/dist-packages/ray/tune/ray_trial_executor.py", line 489, in get_next_available_trial
    [result_id], _ = ray.wait(shuffled_results)
  File "/usr/local/lib/python3.6/dist-p

KeyboardInterrupt: 