# GRCh38 DNase 12 kb CAE Final Test

**Tasks:** evaluate whether the final network always learns or randomly fails

In [7]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import json
import numpy as np
import os
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
    
# The base directory is one level up
base = '..'
name = 'cnn-final-test'
dataset = 'cnn-search'
settings_filepath = '../settings-grch38-chip-12kb.json'
search_filepath = '../{}.json'.format(name)

with open(settings_filepath, "r") as f:
    settings = json.load(f)

with open(search_filepath, "r") as f:
    search = json.load(f)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Create training jobs

In [13]:
from jobs import jobs

jobs(
    os.path.relpath(search_filepath, base),
    os.path.relpath(settings_filepath, base),
    dataset=dataset,
    name=name,
    cluster='seasdgx1',
    epochs=5,
    batch_size=256,
    repeat=5,
    base=base,
    clear=True,
    verbose=False
)

HBox(children=(IntProgress(value=0, description='Jobs', max=4, style=ProgressStyle(description_width='initial'…

Created slurm file for training 4 neural networks


## Test training run of the most complex CAE

In [4]:
from train import train_on_single_dataset

with open(os.path.join(base, 'definitions-{}.json'.format(name)), 'r') as f:
    definitions = json.load(f)

train_on_single_dataset(
    settings,
    'merged',
    definitions=definitions,
    definition_idx=1,
    epochs=2,
    batch_size=256,
    base=base,
    clear=True,
)

HBox(children=(IntProgress(value=0, description='Training', max=2, style=ProgressStyle(description_width='init…

HBox(children=(IntProgress(value=0, description='Epoch 0', max=20668, style=ProgressStyle(description_width='i…

KeyboardInterrupt: 

In [None]:
from ae.utils import check_status

okay, not_found, outdated = check_status(
    name, 'training', 'cnn-search', base=base
)

if okay:
    print('Trainings completed')
else:
    print('Trainings did not finish. Missing {}'.format(len(not_found)))

## Create evaluation jobs

In [None]:
from evaluate import create_jobs

create_jobs(
    name,
    name=name,
    dataset=dataset,
    cluster='holyseas',
    base=base,
    clear=True,
    incl_dtw=False,
)

In a terminal run: `sbatch evaluate-cnn-test.slurm`

In [None]:
from ae.utils import check_status

okay, not_found, outdated = check_status(
    name, 'evaluation', 'cnn-search', base=base
)

if okay:
    print('Evaluation completed')
else:
    print('Evaluation did not finish. Missing {}'.format(len(not_found)))

## Compare

In [None]:
from compare import compare
    
performance = compare(
    'definitions-{}.json'.format(name),
    dataset_name=dataset,
    base=base,
    clear=False,
    verbose=False,
    silent=False,
    remove_common_prefix_from_df=True,
)

In [None]:
import qgrid

qgw = qgrid.show_grid(performance)
qgw

In [None]:
from IPython.core.display import Image, display

for model in qgw.get_selected_df().iterrows():
    print(model[0])
    display(Image(os.path.join(base, 'models', 'cf-{}---predictions-{}.png'.format(model[0], dataset))))