### Run immumeML quickstart with Dask using the YAML specs

In [3]:
import logging
import os
import shutil
import sys
import warnings
from pathlib import Path

import yaml

from immuneML.app.ImmuneMLApp import ImmuneMLApp
from immuneML.environment.EnvironmentSettings import EnvironmentSettings
from immuneML.util.PathBuilder import PathBuilder


#### Build path

In [6]:
def build_path(path: str = None):
    if path is None:
        path = EnvironmentSettings.root_path / "quickstart/"
        if os.path.isdir(path):
            shutil.rmtree(path)
        PathBuilder.build(path)
    else:
        path = PathBuilder.build(path)
    return path


In [7]:
path = build_path()

In [8]:
path

WindowsPath('C:/Users/karth/Desktop/PhD projects/immuneML/immuneML/quickstart')

In [213]:
specs = {
    "definitions": {
        "datasets": {
            "my_synthetic_dataset": {"format": "RandomRepertoireDataset", "params": {"labels": {}}}
        },
        "motifs": {"my_motif": {"seed": "AA", "instantiation": "GappedKmer"}},
        "signals": {"my_signal": {"motifs": ["my_motif"], "implanting": "HealthySequence"}},
        "simulations": {"my_simulation": {"my_implantng": {"signals": ["my_signal"], "dataset_implanting_rate": 0.5,
                                                           "repertoire_implanting_rate": 0.1}}}
    },
    "instructions": {"simulation_instruction": {"type": "Simulation", "dataset": "my_synthetic_dataset", "simulation": "my_simulation",
                                                "export_formats": ["AIRR"]}}
}


In [11]:
specs_file = path / "simulation_specs.yaml"
with specs_file.open("w") as file:
    yaml.dump(specs, file)


In [12]:
app = ImmuneMLApp(specs_file, path / "result")

app.run()

2022-04-19 11:22:20.523200: Setting temporary cache path to ..\..\quickstart\result\cache
2022-04-19 11:22:20.524198: ImmuneML: parsing the specification...

2022-04-19 11:22:22.721248: Full specification is available at ..\..\quickstart\result\full_simulation_specs.yaml.

2022-04-19 11:22:22.722249: ImmuneML: starting the analysis...

2022-04-19 11:22:22.723249: Instruction 1/1 has started.




2022-04-19 11:22:37.650383: Instruction 1/1 has finished.
2022-04-19 11:22:37.685362: Generating HTML reports...
2022-04-19 11:22:38.472894: HTML reports are generated.
2022-04-19 11:22:38.476886: ImmuneML: finished analysis.



[SimulationState(signals=[<immuneML.simulation.implants.Signal.Signal object at 0x000001A56F5729D0>], simulation=<immuneML.simulation.Simulation.Simulation object at 0x000001A54B9E7400>, dataset=<immuneML.data_model.dataset.RepertoireDataset.RepertoireDataset object at 0x000001A54B8D1DF0>, formats=['AIRR'], paths={'my_synthetic_dataset': {'AIRR': WindowsPath('../../quickstart/result/simulation_instruction/exported_dataset/airr')}}, resulting_dataset=<immuneML.data_model.dataset.RepertoireDataset.RepertoireDataset object at 0x000001A56F7E6340>, result_path=WindowsPath('../../quickstart/result/simulation_instruction'), name='simulation_instruction')]

In [177]:
result1path = Path(path / "result1")


In [17]:
spec1path = Path(path / "simulation_specs.yaml")


In [22]:
import datetime
import re
from pathlib import Path

import yaml
from yaml import MarkedYAMLError


In [18]:
with spec1path.open("r") as file:
    workflow_specification = yaml.safe_load(file)


In [19]:
workflow_specification

{'definitions': {'datasets': {'my_synthetic_dataset': {'format': 'RandomRepertoireDataset',
    'params': {'labels': {}}}},
  'motifs': {'my_motif': {'instantiation': 'GappedKmer', 'seed': 'AA'}},
  'signals': {'my_signal': {'implanting': 'HealthySequence',
    'motifs': ['my_motif']}},
  'simulations': {'my_simulation': {'my_implantng': {'dataset_implanting_rate': 0.5,
     'repertoire_implanting_rate': 0.1,
     'signals': ['my_signal']}}}},
 'instructions': {'simulation_instruction': {'dataset': 'my_synthetic_dataset',
   'export_formats': ['AIRR'],
   'simulation': 'my_simulation',
   'type': 'Simulation'}}}

In [28]:
def check_keys(specs: dict):
    for i, key in enumerate(specs.keys()):
        key_to_check = str(key)
        print(i, key)
        assert re.match(r'^[A-Za-z0-9_]+$', key_to_check), \
            f"ImmuneMLParser: the keys in the specification can contain only letters, numbers and underscore. Error with key: {key}"
        if isinstance(specs[key], dict) and key not in ["column_mapping", "metadata_column_mapping"]:
            check_keys(specs[key])


In [30]:
check_keys(workflow_specification)

0 definitions
0 datasets
0 my_synthetic_dataset
0 format
1 params
0 labels
1 motifs
0 my_motif
0 instantiation
1 seed
2 signals
0 my_signal
0 implanting
1 motifs
3 simulations
0 my_simulation
0 my_implantng
0 dataset_implanting_rate
1 repertoire_implanting_rate
2 signals
1 instructions
0 simulation_instruction
0 dataset
1 export_formats
2 simulation
3 type


### Definition parser

In [127]:
from immuneML.dsl.symbol_table.SymbolTable import SymbolTable
from immuneML.dsl.definition_parsers.MotifParser import MotifParser
from immuneML.dsl.definition_parsers.SimulationParser import SimulationParser
from immuneML.dsl.definition_parsers.SignalParser import SignalParser
from immuneML.dsl.import_parsers.ImportParser import ImportParser
from immuneML.dsl.definition_parsers.DefinitionParserOutput import DefinitionParserOutput
from immuneML.dsl.definition_parsers.EncodingParser import EncodingParser
from immuneML.dsl.definition_parsers.MLParser import MLParser
from immuneML.dsl.definition_parsers.MotifParser import MotifParser
from immuneML.dsl.definition_parsers.PreprocessingParser import PreprocessingParser
from immuneML.dsl.definition_parsers.ReportParser import ReportParser
from immuneML.dsl.definition_parsers.SignalParser import SignalParser
from immuneML.dsl.definition_parsers.SimulationParser import SimulationParser
from immuneML.util.ParameterValidator import ParameterValidator
import copy
from immuneML.util.ReflectionHandler import ReflectionHandler
from immuneML.dsl.ObjectParser import ObjectParser
from immuneML.dsl.DefaultParamsLoader import DefaultParamsLoader


In [41]:
symbol_table = SymbolTable()


In [46]:
def call_if_exists(key: str, method, specs: dict, symbol_table: SymbolTable):
    if key in specs:
            return method(specs[key], symbol_table)
    else:
            return symbol_table, {}

In [49]:
specs = workflow_specification["definitions"]
specs


{'datasets': {'my_synthetic_dataset': {'format': 'RandomRepertoireDataset',
   'params': {'labels': {}}}},
 'motifs': {'my_motif': {'instantiation': 'GappedKmer', 'seed': 'AA'}},
 'signals': {'my_signal': {'implanting': 'HealthySequence',
   'motifs': ['my_motif']}},
 'simulations': {'my_simulation': {'my_implantng': {'dataset_implanting_rate': 0.5,
    'repertoire_implanting_rate': 0.1,
    'signals': ['my_signal']}}}}

#### Motif parser

In [98]:
from immuneML.simulation.motif_instantiation_strategy.MotifInstantiationStrategy import MotifInstantiationStrategy
from immuneML.data_model.receptor.receptor_sequence.Chain import Chain
from immuneML.simulation.implants.Motif import Motif


In [63]:
motifs = specs["motifs"]

In [65]:
for key in motifs.keys():

    print(key)





my_motif


In [91]:
motifs.keys()

AttributeError: 'dict_keys' object has no attribute 'next'

In [66]:
motifs["my_motif"]


{'instantiation': 'GappedKmer', 'seed': 'AA'}

In [108]:
motif_dict = copy.deepcopy(motifs["my_motif"])
motif_dict

{'instantiation': 'GappedKmer', 'seed': 'AA'}

In [109]:
valid_values = ReflectionHandler.all_nonabstract_subclass_basic_names(
    MotifInstantiationStrategy, "Instantiation", "motif_instantiation_strategy/")

valid_values

['GappedKmer']

In [110]:
instantiation_object = ObjectParser.parse_object(motifs["my_motif"]["instantiation"], valid_values, "Instantiation",
                                                 "motif_instantiation_strategy", "MotifParser", "my_motif")

instantiation_object

<immuneML.simulation.motif_instantiation_strategy.GappedKmerInstantiation.GappedKmerInstantiation at 0x1a56f796190>

In [111]:
motif_dict["instantiation"] = instantiation_object
motif_dict["identifier"] = "my_motif"  # motifs.keys()


In [112]:
if "name_chain1" in motifs["my_motif"]:
    motif_dict["name_chain1"] = Chain[motifs["my_motif"]["name_chain1"].upper()]


In [113]:
motif_dict

{'instantiation': <immuneML.simulation.motif_instantiation_strategy.GappedKmerInstantiation.GappedKmerInstantiation at 0x1a56f796190>,
 'seed': 'AA',
 'identifier': 'my_motif'}

### Motif

In [114]:
motif = Motif(**motif_dict)

In [115]:
motif

Motif(identifier='my_motif', instantiation=<immuneML.simulation.motif_instantiation_strategy.GappedKmerInstantiation.GappedKmerInstantiation object at 0x000001A56F796190>, seed='AA', seed_chain1=None, name_chain1=None, seed_chain2=None, name_chain2=None)

In [120]:
symbol_table.add("my_motif", SymbolType.MOTIF , motif )




In [126]:
symbol_table.get("my_motif")


Motif(identifier='my_motif', instantiation=<immuneML.simulation.motif_instantiation_strategy.GappedKmerInstantiation.GappedKmerInstantiation object at 0x000001A56F796190>, seed='AA', seed_chain1=None, name_chain1=None, seed_chain2=None, name_chain2=None)

In [54]:
from immuneML.dsl.symbol_table.SymbolType import SymbolType


def parse_motifs(motifs: dict, symbol_table: SymbolTable):

    valid_motif_keys = ["seed", "instantiation", "seed_chain1",
          "seed_chain2", "name_chain1", "name_chain2"]
    for key in motifs.keys():

            ParameterValidator.assert_keys(motifs[key].keys(
            ), valid_motif_keys, "MotifParser", key, exclusive=False)

            motif = MotifParser._parse_motif(key, motifs[key])
            symbol_table.add(key, SymbolType.MOTIF, motif)

    return symbol_table, motifs


def _parse_motif(key: str, motif_item: dict):

        motif_dict = copy.deepcopy(motif_item)

        valid_values = ReflectionHandler.all_nonabstract_subclass_basic_names(MotifInstantiationStrategy, "Instantiation", "motif_instantiation_strategy/")
        instantiation_object = ObjectParser.parse_object(motif_item["instantiation"], valid_values, "Instantiation",
                                                         "motif_instantiation_strategy", "MotifParser", key)
        motif_dict["instantiation"] = instantiation_object
        motif_dict["identifier"] = key

        if "name_chain1" in motif_item:
            motif_dict["name_chain1"] = Chain[motif_item["name_chain1"].upper()]
        if "name_chain2" in motif_item:
            motif_dict["name_chain2"] = Chain[motif_item["name_chain2"].upper()]

        assert "seed" in motif_dict or all(el in motif_dict for el in ["name_chain1", "name_chain2", "seed_chain1", "seed_chain2"]), \
            "MotifParser: please check the documentation for motif definition. Either parameter `seed` has to be set (for simulation in single " \
            "chain data) or all of the parameters `name_chain1`, `name_chain2`, `seed_chain1`, `seed_chain2` (for simulation for paired chain data)."

        motif = Motif(**motif_dict)

        return motif


### Signal Parser

In [139]:
from immuneML.simulation.implants.Signal import Signal


In [150]:
VALID_KEYS = ["motifs", "implanting"]
signals = specs["signals"]
signals

{'my_signal': {'implanting': 'HealthySequence', 'motifs': ['my_motif']}}

In [156]:
defaults = DefaultParamsLoader.load(
    "signal_implanting_strategy/", 'HealthySequenceImplanting')

defaults

{'implanting_computation': 'round', 'sequence_position_weights': None}

In [146]:
for key, signal_spec in signals.items():
    ParameterValidator.assert_keys_present(
        signal_spec.keys(), SignalParser.VALID_KEYS, "SignalParser", key)

    print(key, signal_spec)
    
    implanting_strategy = SignalParser._get_implanting_strategy(key, signal_spec)

    print(implanting_strategy)

    ParameterValidator.assert_keys(signal_spec["motifs"], symbol_table.get_keys_by_type(SymbolType.MOTIF), "SignalParser",
                                   f"motifs in signal {key}", False)
               
    print( symbol_table.get_keys_by_type(SymbolType.MOTIF))

    signal_motifs = [symbol_table.get(motif_id)
                     for motif_id in signal_spec["motifs"]]

    print(signal_motifs)


    signal = Signal (key, signal_motifs, implanting_strategy)

    print(signal)

    symbol_table.add(key, SymbolType.SIGNAL, signal)

    print(symbol_table.get("my_signal"))



my_signal {'implanting': 'HealthySequence', 'motifs': ['my_motif']}
<immuneML.simulation.signal_implanting_strategy.HealthySequenceImplanting.HealthySequenceImplanting object at 0x000001A57017F4C0>
['my_motif']
[Motif(identifier='my_motif', instantiation=<immuneML.simulation.motif_instantiation_strategy.GappedKmerInstantiation.GappedKmerInstantiation object at 0x000001A56F796190>, seed='AA', seed_chain1=None, name_chain1=None, seed_chain2=None, name_chain2=None)]
Signal id: my_signal; motifs: my_motif - AA
Signal id: my_signal; motifs: my_motif - AA




In [None]:
VALID_KEYS = ["motifs", "implanting"]
def parse_signals(signals: dict, symbol_table: SymbolTable):

    for key, signal_spec in signals.items():
        ParameterValidator.assert_keys_present(
            signal_spec.keys(), SignalParser.VALID_KEYS, "SignalParser", key)
        implanting_strategy = SignalParser._get_implanting_strategy(
            key, signal_spec)

        ParameterValidator.assert_keys(signal_spec["motifs"], symbol_table.get_keys_by_type(SymbolType.MOTIF), "SignalParser",
                                       f"motifs in signal {key}", False)

        signal_motifs = [symbol_table.get(
            motif_id) for motif_id in signal_spec["motifs"]]
        signal = Signal(key, signal_motifs, implanting_strategy)
        symbol_table.add(key, SymbolType.SIGNAL, signal)

        return symbol_table, signals

def _get_implanting_strategy(key: str, signal: dict) -> SignalImplantingStrategy:

        valid_strategies = [cls[:-10] for cls in
                            ReflectionHandler.discover_classes_by_partial_name("Implanting", "simulation/signal_implanting_strategy/")]
        ParameterValidator.assert_in_valid_list(
            signal["implanting"], valid_strategies, "SignalParser", key)

        defaults = DefaultParamsLoader.load(
            "signal_implanting_strategy/", f"{signal['implanting']}Implanting")
        signal = {**defaults, **signal}

        ParameterValidator.assert_keys_present(list(signal.keys()), [
                                               "motifs", "implanting", "sequence_position_weights"], SignalParser.__name__, key)

        implanting_comp = None
        if 'implanting_computation' in signal:
            implanting_comp = signal['implanting_computation'].lower()
            ParameterValidator.assert_in_valid_list(implanting_comp, [el.name.lower() for el in ImplantingComputation], SignalParser.__name__,
                                                    'implanting_computation')
            implanting_comp = ImplantingComputation[implanting_comp.upper()]

        implanting_strategy = ReflectionHandler.get_class_by_name(f"{signal['implanting']}Implanting")(GappedMotifImplanting(),
                                                                                                       signal["sequence_position_weights"],
                                                                                                       implanting_comp)

        return implanting_strategy


    


In [144]:
from immuneML.simulation.sequence_implanting.GappedMotifImplanting import GappedMotifImplanting
from immuneML.simulation.signal_implanting_strategy.ImplantingComputation import ImplantingComputation
from immuneML.simulation.signal_implanting_strategy.SignalImplantingStrategy import SignalImplantingStrategy


In [145]:
def _get_implanting_strategy(key: str, signal: dict) -> SignalImplantingStrategy:

    valid_strategies = [cls[:-10] for cls in
                        ReflectionHandler.discover_classes_by_partial_name("Implanting", "simulation/signal_implanting_strategy/")]
    ParameterValidator.assert_in_valid_list(
        signal["implanting"], valid_strategies, "SignalParser", key)

    defaults = DefaultParamsLoader.load(
        "signal_implanting_strategy/", f"{signal['implanting']}Implanting")
    signal = {**defaults, **signal}

    ParameterValidator.assert_keys_present(list(signal.keys()), [
          "motifs", "implanting", "sequence_position_weights"], SignalParser.__name__, key)

    implanting_comp = None
    if 'implanting_computation' in signal:
            implanting_comp = signal['implanting_computation'].lower()
            ParameterValidator.assert_in_valid_list(implanting_comp, [el.name.lower() for el in ImplantingComputation], SignalParser.__name__,
                                                    'implanting_computation')
            implanting_comp = ImplantingComputation[implanting_comp.upper()]

    implanting_strategy = ReflectionHandler.get_class_by_name(f"{signal['implanting']}Implanting")(GappedMotifImplanting(),
                                                                                                       signal["sequence_position_weights"],
                                                                                                       implanting_comp)

    return implanting_strategy


### Simulation parsers

In [161]:
simulations = specs["simulations"]
simulations


{'my_simulation': {'my_implantng': {'dataset_implanting_rate': 0.5,
   'repertoire_implanting_rate': 0.1,
   'signals': ['my_signal']}}}

In [168]:
from immuneML.simulation.Implanting import Implanting
from immuneML.simulation.Simulation import Simulation


In [171]:
for key,simulation in simulations.items():

    print(key,simulation)

    location = "SimulationParser"
    valid_implanting_keys = ["dataset_implanting_rate",
                             "repertoire_implanting_rate", "signals", "is_noise"]
    implantings = []
    for impl_key, implanting in simulation.items():


        ParameterValidator.assert_keys(
            implanting.keys(), valid_implanting_keys, location, impl_key, exclusive=False)

        ParameterValidator.assert_keys(implanting["signals"], symbol_table.get_keys_by_type(
            SymbolType.SIGNAL), location, impl_key, False)

        implanting_params = copy.deepcopy(implanting)

        print(implanting_params)

        implanting_params["signals"] = [symbol_table.get(
            signal) for signal in implanting["signals"]]
        implanting_params["name"] = impl_key

        implantings.append(Implanting(**implanting_params))

        print("appended" , implanting)

    symbol_table.add(key, SymbolType.SIMULATION, Simulation(implantings))

    print("test", symbol_table.get("my_simulation"))









my_simulation {'my_implantng': {'dataset_implanting_rate': 0.5, 'repertoire_implanting_rate': 0.1, 'signals': ['my_signal']}}
{'dataset_implanting_rate': 0.5, 'repertoire_implanting_rate': 0.1, 'signals': ['my_signal']}
appended {'dataset_implanting_rate': 0.5, 'repertoire_implanting_rate': 0.1, 'signals': ['my_signal']}
test my_implantng:
 dataset_implanting_rate: 0.5, 
repertoire_implanting_rate: 0.1, 
signals: 'Signal id: my_signal; motifs: my_motif - AA'


In [None]:

ParameterValidator.assert_keys(
    implanting.keys(), valid_implanting_keys, location, impl_key, exclusive=False)
ParameterValidator.assert_keys(implanting["signals"], symbol_table.get_keys_by_type(
    SymbolType.SIGNAL), location, impl_key, False)


#### Import parser

In [173]:
specs

{'datasets': {'my_synthetic_dataset': {'format': 'RandomRepertoireDataset',
   'params': {'labels': {}}}},
 'motifs': {'my_motif': {'instantiation': 'GappedKmer', 'seed': 'AA'}},
 'signals': {'my_signal': {'implanting': 'HealthySequence',
   'motifs': ['my_motif']}},
 'simulations': {'my_simulation': {'my_implantng': {'dataset_implanting_rate': 0.5,
    'repertoire_implanting_rate': 0.1,
    'signals': ['my_signal']}}}}

In [179]:
from immuneML.IO.dataset_import.DataImport import DataImport


In [189]:

keyword = "datasets"
valid_keys = ["format", "params"]
workflow_specification = specs
location = "ImportParser"
for key in workflow_specification[keyword].keys():
    print(key, workflow_specification[keyword]
          [key],  symbol_table, result1path)

    valid_formats = ReflectionHandler.all_nonabstract_subclass_basic_names(
        DataImport, "Import", "IO/dataset_import/")

    print(valid_formats)

    ParameterValidator.assert_in_valid_list(
        workflow_specification[keyword]
        [key]["format"], valid_formats, location, "format")
    
    import_cls = ReflectionHandler.get_class_by_name(
        "{}Import".format(workflow_specification[keyword]
                          [key]["format"]))
    params = ImportParser._prepare_params(workflow_specification[keyword]
                                          [key], result1path, key)

    print(import_cls, params)

    print("is_repertoire" in params)

    dataset = import_cls.import_dataset(params, key)

    dataset.name = key

    print(dataset)


    symbol_table.add(key, SymbolType.DATASET, dataset)

    print(symbol_table.get("my_synthetic_dataset"))

















my_synthetic_dataset {'format': 'RandomRepertoireDataset', 'params': {'repertoire_count': 100, 'sequence_count_probabilities': {10: 0.5, 20: 0.5}, 'sequence_length_probabilities': {10: 0.5, 12: 0.5}, 'labels': {}, 'result_path': WindowsPath('C:/Users/karth/Desktop/PhD projects/immuneML/immuneML/quickstart/result1/datasets/my_synthetic_dataset')}} SymbolTable() C:\Users\karth\Desktop\PhD projects\immuneML\immuneML\quickstart\result1
['ImmuneML', 'TenxGenomics', 'OLGA', 'RandomRepertoireDataset', 'ImmunoSEQSample', 'RandomSequenceDataset', 'SingleLineReceptor', 'AIRR', 'ImmunoSEQRearrangement', 'VDJdb', 'MiXCR', 'Generic', 'IReceptor', 'RandomReceptorDataset', 'IGoR']
<class 'immuneML.IO.dataset_import.RandomRepertoireDatasetImport.RandomRepertoireDatasetImport'> {'repertoire_count': 100, 'sequence_count_probabilities': {10: 0.5, 20: 0.5}, 'sequence_length_probabilities': {10: 0.5, 12: 0.5}, 'labels': {}, 'result_path': WindowsPath('C:/Users/karth/Desktop/PhD projects/immuneML/immuneML/q

In [197]:
symbol_table.__dict__

{'_items': {'my_motif': <immuneML.dsl.symbol_table.SymbolTableEntry.SymbolTableEntry at 0x1a5702c9ac0>,
  'my_signal': <immuneML.dsl.symbol_table.SymbolTableEntry.SymbolTableEntry at 0x1a57040ef10>,
  'my_simulation': <immuneML.dsl.symbol_table.SymbolTableEntry.SymbolTableEntry at 0x1a57040e3a0>,
  'my_synthetic_dataset': <immuneML.dsl.symbol_table.SymbolTableEntry.SymbolTableEntry at 0x1a570187fa0>}}

In [195]:
from immuneML.dsl.definition_parsers.DefinitionParser import DefinitionParser


In [198]:
specs

{'datasets': {'my_synthetic_dataset': {'format': 'RandomRepertoireDataset',
   'params': {'repertoire_count': 100,
    'sequence_count_probabilities': {10: 0.5, 20: 0.5},
    'sequence_length_probabilities': {10: 0.5, 12: 0.5},
    'labels': {},
    'result_path': WindowsPath('C:/Users/karth/Desktop/PhD projects/immuneML/immuneML/quickstart/result1/datasets/my_synthetic_dataset')}}},
 'motifs': {'my_motif': {'instantiation': 'GappedKmer', 'seed': 'AA'}},
 'signals': {'my_signal': {'implanting': 'HealthySequence',
   'motifs': ['my_motif']}},
 'simulations': {'my_simulation': {'my_implantng': {'dataset_implanting_rate': 0.5,
    'repertoire_implanting_rate': 0.1,
    'signals': ['my_signal']}}}}

In [196]:

symbol_table, specs_preprocessing = DefinitionParser._call_if_exists(
    PreprocessingParser.keyword, PreprocessingParser.parse, specs, symbol_table)
symbol_table, specs_encoding = DefinitionParser._call_if_exists(
    "encodings", EncodingParser.parse, specs, symbol_table)
symbol_table, specs_ml = DefinitionParser._call_if_exists(
    "ml_methods", MLParser.parse, specs, symbol_table)
symbol_table, specs_report = DefinitionParser._call_if_exists(
    "reports", ReportParser.parse_reports, specs, symbol_table)


### Definition Parser create spec defs 

In [199]:
specs_import = specs["datasets"]
specs_simulation = specs["simulations"]
specs_motifs = specs["motifs"]
specs_signals = specs["signals"]

In [201]:

def create_specs_defs(specs_datasets: dict, simulation: dict, preprocessings: dict, motifs: dict, signals: dict,
                       encodings: dict, ml_methods: dict, reports: dict):

     return {
         "datasets": specs_datasets, "simulations": simulation, PreprocessingParser.keyword: preprocessings, "motifs": motifs, "signals": signals,
         "encodings": encodings, "ml_methods": ml_methods, "reports": reports
     }



specs_defs =create_specs_defs(specs_import, specs_simulation, specs_preprocessing, specs_motifs, specs_signals,
                                                specs_encoding, specs_ml, specs_report)


In [202]:
specs_defs

{'datasets': {'my_synthetic_dataset': {'format': 'RandomRepertoireDataset',
   'params': {'repertoire_count': 100,
    'sequence_count_probabilities': {10: 0.5, 20: 0.5},
    'sequence_length_probabilities': {10: 0.5, 12: 0.5},
    'labels': {},
    'result_path': WindowsPath('C:/Users/karth/Desktop/PhD projects/immuneML/immuneML/quickstart/result1/datasets/my_synthetic_dataset')}}},
 'simulations': {'my_simulation': {'my_implantng': {'dataset_implanting_rate': 0.5,
    'repertoire_implanting_rate': 0.1,
    'signals': ['my_signal']}}},
 'preprocessing_sequences': {},
 'motifs': {'my_motif': {'instantiation': 'GappedKmer', 'seed': 'AA'}},
 'signals': {'my_signal': {'implanting': 'HealthySequence',
   'motifs': ['my_motif']}},
 'encodings': {},
 'ml_methods': {},
 'reports': {}}

### Definition parser output

In [203]:
from immuneML.dsl.definition_parsers.DefinitionParserOutput import DefinitionParserOutput


class DefinitionParserOutput:

    def __init__(self, symbol_table: SymbolTable, specification: dict):
        assert any(len(symbol_table.get_by_type(symbol_type)) > 0 for symbol_type in [t for t in SymbolType]), \
            "DefinitionParserOutput: symbol table has not been populated by objects of any type."

        self.symbol_table = symbol_table
        self.specification = specification


In [212]:
workflow_specification

{'datasets': {'my_synthetic_dataset': {'format': 'RandomRepertoireDataset',
   'params': {'repertoire_count': 100,
    'sequence_count_probabilities': {10: 0.5, 20: 0.5},
    'sequence_length_probabilities': {10: 0.5, 12: 0.5},
    'labels': {},
    'result_path': WindowsPath('C:/Users/karth/Desktop/PhD projects/immuneML/immuneML/quickstart/result1/datasets/my_synthetic_dataset')}}},
 'motifs': {'my_motif': {'instantiation': 'GappedKmer', 'seed': 'AA'}},
 'signals': {'my_signal': {'implanting': 'HealthySequence',
   'motifs': ['my_motif']}},
 'simulations': {'my_simulation': {'my_implantng': {'dataset_implanting_rate': 0.5,
    'repertoire_implanting_rate': 0.1,
    'signals': ['my_signal']}}}}

In [211]:
specs

{'datasets': {'my_synthetic_dataset': {'format': 'RandomRepertoireDataset',
   'params': {'repertoire_count': 100,
    'sequence_count_probabilities': {10: 0.5, 20: 0.5},
    'sequence_length_probabilities': {10: 0.5, 12: 0.5},
    'labels': {},
    'result_path': WindowsPath('C:/Users/karth/Desktop/PhD projects/immuneML/immuneML/quickstart/result1/datasets/my_synthetic_dataset')}}},
 'motifs': {'my_motif': {'instantiation': 'GappedKmer', 'seed': 'AA'}},
 'signals': {'my_signal': {'implanting': 'HealthySequence',
   'motifs': ['my_motif']}},
 'simulations': {'my_simulation': {'my_implantng': {'dataset_implanting_rate': 0.5,
    'repertoire_implanting_rate': 0.1,
    'signals': ['my_signal']}}}}

In [214]:
def_parser_output , specs_defs = DefinitionParserOutput(symbol_table=symbol_table,
                       specification=specs), specs_defs


In [207]:
specs_defs

{'datasets': {'my_synthetic_dataset': {'format': 'RandomRepertoireDataset',
   'params': {'repertoire_count': 100,
    'sequence_count_probabilities': {10: 0.5, 20: 0.5},
    'sequence_length_probabilities': {10: 0.5, 12: 0.5},
    'labels': {},
    'result_path': WindowsPath('C:/Users/karth/Desktop/PhD projects/immuneML/immuneML/quickstart/result1/datasets/my_synthetic_dataset')}}},
 'simulations': {'my_simulation': {'my_implantng': {'dataset_implanting_rate': 0.5,
    'repertoire_implanting_rate': 0.1,
    'signals': ['my_signal']}}},
 'preprocessing_sequences': {},
 'motifs': {'my_motif': {'instantiation': 'GappedKmer', 'seed': 'AA'}},
 'signals': {'my_signal': {'implanting': 'HealthySequence',
   'motifs': ['my_motif']}},
 'encodings': {},
 'ml_methods': {},
 'reports': {}}

In [208]:
symbol_table.__dict__

{'_items': {'my_motif': <immuneML.dsl.symbol_table.SymbolTableEntry.SymbolTableEntry at 0x1a5702c9ac0>,
  'my_signal': <immuneML.dsl.symbol_table.SymbolTableEntry.SymbolTableEntry at 0x1a57040ef10>,
  'my_simulation': <immuneML.dsl.symbol_table.SymbolTableEntry.SymbolTableEntry at 0x1a57040e3a0>,
  'my_synthetic_dataset': <immuneML.dsl.symbol_table.SymbolTableEntry.SymbolTableEntry at 0x1a570187fa0>}}

In [209]:
specs_defs

{'datasets': {'my_synthetic_dataset': {'format': 'RandomRepertoireDataset',
   'params': {'repertoire_count': 100,
    'sequence_count_probabilities': {10: 0.5, 20: 0.5},
    'sequence_length_probabilities': {10: 0.5, 12: 0.5},
    'labels': {},
    'result_path': WindowsPath('C:/Users/karth/Desktop/PhD projects/immuneML/immuneML/quickstart/result1/datasets/my_synthetic_dataset')}}},
 'simulations': {'my_simulation': {'my_implantng': {'dataset_implanting_rate': 0.5,
    'repertoire_implanting_rate': 0.1,
    'signals': ['my_signal']}}},
 'preprocessing_sequences': {},
 'motifs': {'my_motif': {'instantiation': 'GappedKmer', 'seed': 'AA'}},
 'signals': {'my_signal': {'implanting': 'HealthySequence',
   'motifs': ['my_motif']}},
 'encodings': {},
 'ml_methods': {},
 'reports': {}}

In [215]:
def_parser_output.specification

{'definitions': {'datasets': {'my_synthetic_dataset': {'format': 'RandomRepertoireDataset',
    'params': {'labels': {}}}},
  'motifs': {'my_motif': {'seed': 'AA', 'instantiation': 'GappedKmer'}},
  'signals': {'my_signal': {'motifs': ['my_motif'],
    'implanting': 'HealthySequence'}},
  'simulations': {'my_simulation': {'my_implantng': {'signals': ['my_signal'],
     'dataset_implanting_rate': 0.5,
     'repertoire_implanting_rate': 0.1}}}},
 'instructions': {'simulation_instruction': {'type': 'Simulation',
   'dataset': 'my_synthetic_dataset',
   'simulation': 'my_simulation',
   'export_formats': ['AIRR']}}}

In [206]:
def_parser_output.__dict__

{'symbol_table': SymbolTable(),
 'specification': {'datasets': {'my_synthetic_dataset': {'format': 'RandomRepertoireDataset',
    'params': {'repertoire_count': 100,
     'sequence_count_probabilities': {10: 0.5, 20: 0.5},
     'sequence_length_probabilities': {10: 0.5, 12: 0.5},
     'labels': {},
     'result_path': WindowsPath('C:/Users/karth/Desktop/PhD projects/immuneML/immuneML/quickstart/result1/datasets/my_synthetic_dataset')}}},
  'motifs': {'my_motif': {'instantiation': 'GappedKmer', 'seed': 'AA'}},
  'signals': {'my_signal': {'implanting': 'HealthySequence',
    'motifs': ['my_motif']}},
  'simulations': {'my_simulation': {'my_implantng': {'dataset_implanting_rate': 0.5,
     'repertoire_implanting_rate': 0.1,
     'signals': ['my_signal']}}}}}

### Instruction parsers

In [217]:
keyword = "instructions"

In [216]:
specification = def_parser_output.specification
symbol_table = def_parser_output.symbol_table

In [218]:
keyword in specification

True

In [219]:
for key in specification[keyword]:

    print(key, specification[keyword][key], symbol_table, result1path)

    # specification[keyword][key] , symbol_table = parse_instruction(key, specification[keyword][key], symbol_table,result1path )


simulation_instruction {'type': 'Simulation', 'dataset': 'my_synthetic_dataset', 'simulation': 'my_simulation', 'export_formats': ['AIRR']} SymbolTable() C:\Users\karth\Desktop\PhD projects\immuneML\immuneML\quickstart\result1


In [220]:
valid_instructions = [
    cls[:-6] for cls in ReflectionHandler.discover_classes_by_partial_name("Parser", "dsl/instruction_parsers/")]


In [221]:
valid_instructions

['DatasetExport',
 'ExploratoryAnalysis',
 'MLApplication',
 'Simulation',
 'Subsampling',
 'TrainMLModel']

In [227]:
specification[keyword][key]["type"]


'Simulation'

In [224]:
key

'simulation_instruction'

In [233]:
path = "instructions/"
class_name = 'Simulation'


In [234]:
filepath = EnvironmentSettings.default_params_path / path / \
    f"{DefaultParamsLoader.convert_to_snake_case(class_name)}_params.yaml"


In [235]:
filepath

WindowsPath('C:/Users/karth/Desktop/PhD projects/immuneML/immuneML/immuneML/config/default_params/instructions/simulation_params.yaml')

In [236]:
with filepath.open("r") as file:
    params = yaml.load(file, Loader=yaml.FullLoader)


In [237]:
params

{'export_formats': ['AIRR']}

In [225]:
default_params = DefaultParamsLoader.load(
    "instructions/", specification[keyword][key]["type"])


In [226]:
default_params

{'export_formats': ['AIRR']}

In [238]:
instruction = {**default_params, **specification[keyword][key]}


In [264]:
instruction

{'export_formats': ['AIRR'],
 'type': 'Simulation',
 'dataset': 'my_synthetic_dataset',
 'simulation': 'my_simulation'}

In [240]:
parser = ReflectionHandler.get_class_by_name("{}Parser".format(instruction["type"]), "instruction_parsers/")()


In [243]:
parser.__class__

immuneML.dsl.instruction_parsers.SimulationParser.SimulationParser

### Simulation parser

In [244]:
from pathlib import Path

from immuneML.IO.dataset_export.DataExporter import DataExporter
from immuneML.dsl.symbol_table.SymbolTable import SymbolTable
from immuneML.dsl.symbol_table.SymbolType import SymbolType
from immuneML.util.ParameterValidator import ParameterValidator
from immuneML.util.ReflectionHandler import ReflectionHandler
from immuneML.workflows.instructions.SimulationInstruction import SimulationInstruction


In [245]:
ParameterValidator.assert_keys(instruction.keys(), [
                               "dataset", "simulation", "type", "export_formats"], "SimulationParser", 'simulation_instruction')


In [249]:
def parse_exporters(instruction):
    if instruction["export_formats"] is not None:
        class_path = "dataset_export/"
        exporters = [ReflectionHandler.get_class_by_name(
            f"{item}Exporter", class_path) for item in instruction["export_formats"]]
    else:
            exporters = None

    return exporters




In [250]:
signals = [signal.item for signal in symbol_table.get_by_type(
    SymbolType.SIGNAL)]

simulation = symbol_table.get(instruction["simulation"])

dataset = symbol_table.get(instruction["dataset"])

exporters = parse_exporters(instruction)






In [251]:
exporters

[immuneML.IO.dataset_export.AIRRExporter.AIRRExporter]

In [252]:
dataset , simulation, signals

(<immuneML.data_model.dataset.RepertoireDataset.RepertoireDataset at 0x1a55bacbb80>,
 <immuneML.simulation.Simulation.Simulation at 0x1a57040ed90>,
 [<immuneML.simulation.implants.Signal.Signal at 0x1a56f8dd6a0>])

In [None]:
process = SimulationInstruction(
    signals=signals, simulation=simulation, dataset=dataset, name='simulation_instruction', exporters=exporters)


### Simulation instruction

In [257]:
from immuneML.simulation.SimulationState import SimulationState
from immuneML.workflows.steps.SignalImplanter import SignalImplanter


In [254]:
state = SimulationState(signals, simulation, dataset,
                        name='simulation_instruction')


In [255]:
state.__dict__

{'signals': [<immuneML.simulation.implants.Signal.Signal at 0x1a56f8dd6a0>],
 'simulation': <immuneML.simulation.Simulation.Simulation at 0x1a57040ed90>,
 'dataset': <immuneML.data_model.dataset.RepertoireDataset.RepertoireDataset at 0x1a55bacbb80>,
 'formats': None,
 'paths': None,
 'resulting_dataset': None,
 'result_path': None,
 'name': 'simulation_instruction'}

In [258]:
state.result_path = result1path / state.name

state.resulting_dataset = SignalImplanter.run(state)

def export_dataset():

    dataset_name = state.resulting_dataset.name if state.resulting_dataset.name is not None else state.resulting_dataset.identifier

    paths = {dataset_name : {}}

    formats = []

    if exporters is not None and len(exporters) > 0:

        for exporter in exporters:

            export_format = exporter.__name__[:-8]
            path = state.result_path / \
                f"exported_dataset/{exporter.__name__.replace('Exporter', '').lower()}/"
            exporter.export(state.resulting_dataset,
                            path)

            paths[dataset_name][export_format] = path

            formats.append(export_format)
        
    state.paths = paths

    state.formats = formats

export_dataset()



state






            

            














SimulationState(signals=[<immuneML.simulation.implants.Signal.Signal object at 0x000001A56F8DD6A0>], simulation=<immuneML.simulation.Simulation.Simulation object at 0x000001A57040ED90>, dataset=<immuneML.data_model.dataset.RepertoireDataset.RepertoireDataset object at 0x000001A55BACBB80>, formats=['AIRR'], paths={'my_synthetic_dataset': {'AIRR': WindowsPath('C:/Users/karth/Desktop/PhD projects/immuneML/immuneML/quickstart/result1/simulation_instruction/exported_dataset/airr')}}, resulting_dataset=<immuneML.data_model.dataset.RepertoireDataset.RepertoireDataset object at 0x000001A5704DA9A0>, result_path=WindowsPath('C:/Users/karth/Desktop/PhD projects/immuneML/immuneML/quickstart/result1/simulation_instruction'), name='simulation_instruction')

In [261]:
state

SimulationState(signals=[<immuneML.simulation.implants.Signal.Signal object at 0x000001A56F8DD6A0>], simulation=<immuneML.simulation.Simulation.Simulation object at 0x000001A57040ED90>, dataset=<immuneML.data_model.dataset.RepertoireDataset.RepertoireDataset object at 0x000001A55BACBB80>, formats=['AIRR'], paths={'my_synthetic_dataset': {'AIRR': WindowsPath('C:/Users/karth/Desktop/PhD projects/immuneML/immuneML/quickstart/result1/simulation_instruction/exported_dataset/airr')}}, resulting_dataset=<immuneML.data_model.dataset.RepertoireDataset.RepertoireDataset object at 0x000001A5704DA9A0>, result_path=WindowsPath('C:/Users/karth/Desktop/PhD projects/immuneML/immuneML/quickstart/result1/simulation_instruction'), name='simulation_instruction')

In [259]:
instruction_object = state

In [262]:
instruction_object

SimulationState(signals=[<immuneML.simulation.implants.Signal.Signal object at 0x000001A56F8DD6A0>], simulation=<immuneML.simulation.Simulation.Simulation object at 0x000001A57040ED90>, dataset=<immuneML.data_model.dataset.RepertoireDataset.RepertoireDataset object at 0x000001A55BACBB80>, formats=['AIRR'], paths={'my_synthetic_dataset': {'AIRR': WindowsPath('C:/Users/karth/Desktop/PhD projects/immuneML/immuneML/quickstart/result1/simulation_instruction/exported_dataset/airr')}}, resulting_dataset=<immuneML.data_model.dataset.RepertoireDataset.RepertoireDataset object at 0x000001A5704DA9A0>, result_path=WindowsPath('C:/Users/karth/Desktop/PhD projects/immuneML/immuneML/quickstart/result1/simulation_instruction'), name='simulation_instruction')

In [263]:
symbol_table.add('simulation_instruction' , SymbolType.INSTRUCTION, instruction_object)

In [265]:
instruction

{'export_formats': ['AIRR'],
 'type': 'Simulation',
 'dataset': 'my_synthetic_dataset',
 'simulation': 'my_simulation'}

In [None]:
instruction_object = parser.parse('simulation_instruction' , instruction , symbol_table , result1path )


### Output Parser

In [266]:
specs

{'definitions': {'datasets': {'my_synthetic_dataset': {'format': 'RandomRepertoireDataset',
    'params': {'labels': {}}}},
  'motifs': {'my_motif': {'seed': 'AA', 'instantiation': 'GappedKmer'}},
  'signals': {'my_signal': {'motifs': ['my_motif'],
    'implanting': 'HealthySequence'}},
  'simulations': {'my_simulation': {'my_implantng': {'signals': ['my_signal'],
     'dataset_implanting_rate': 0.5,
     'repertoire_implanting_rate': 0.1}}}},
 'instructions': {'simulation_instruction': {'type': 'Simulation',
   'dataset': 'my_synthetic_dataset',
   'simulation': 'my_simulation',
   'export_formats': ['AIRR']}}}

In [267]:
symbol_table.__dict__

{'_items': {'my_motif': <immuneML.dsl.symbol_table.SymbolTableEntry.SymbolTableEntry at 0x1a5702c9ac0>,
  'my_signal': <immuneML.dsl.symbol_table.SymbolTableEntry.SymbolTableEntry at 0x1a57040ef10>,
  'my_simulation': <immuneML.dsl.symbol_table.SymbolTableEntry.SymbolTableEntry at 0x1a57040e3a0>,
  'my_synthetic_dataset': <immuneML.dsl.symbol_table.SymbolTableEntry.SymbolTableEntry at 0x1a570187fa0>,
  'simulation_instruction': <immuneML.dsl.symbol_table.SymbolTableEntry.SymbolTableEntry at 0x1a570582160>}}

In [270]:
def parse(specs, symbol_table):
    if "output" in specs:
        ParameterValidator.assert_keys(
                specs["output"], ["format"], "OutputParser", "output")
        ParameterValidator.assert_in_valid_list(
                specs["output"]["format"], ["HTML"], "OutputParser", "format")
    else:
        specs["output"] = {"format": "HTML"}
        symbol_table.add("output", SymbolType.OUTPUT, specs["output"])

    return specs["output"]



                
parse(specs, symbol_table)

{'format': 'HTML'}

In [271]:
specs

{'definitions': {'datasets': {'my_synthetic_dataset': {'format': 'RandomRepertoireDataset',
    'params': {'labels': {}}}},
  'motifs': {'my_motif': {'seed': 'AA', 'instantiation': 'GappedKmer'}},
  'signals': {'my_signal': {'motifs': ['my_motif'],
    'implanting': 'HealthySequence'}},
  'simulations': {'my_simulation': {'my_implantng': {'signals': ['my_signal'],
     'dataset_implanting_rate': 0.5,
     'repertoire_implanting_rate': 0.1}}}},
 'instructions': {'simulation_instruction': {'type': 'Simulation',
   'dataset': 'my_synthetic_dataset',
   'simulation': 'my_simulation',
   'export_formats': ['AIRR']}},
 'output': {'format': 'HTML'}}

### ImmuneML parser output specs

In [272]:
from immuneML.dsl.ImmuneMLParser import ImmuneMLParser


In [273]:
path = ImmuneMLParser._output_specs(
    spec1path, result1path, specs_defs, instruction, {'format': 'HTML'} )


2022-04-20 11:31:31.095886: Full specification is available at C:\Users\karth\Desktop\PhD projects\immuneML\immuneML\quickstart\result1\full_simulation_specs.yaml.



In [274]:
symbol_table.__dict__

{'_items': {'my_motif': <immuneML.dsl.symbol_table.SymbolTableEntry.SymbolTableEntry at 0x1a5702c9ac0>,
  'my_signal': <immuneML.dsl.symbol_table.SymbolTableEntry.SymbolTableEntry at 0x1a57040ef10>,
  'my_simulation': <immuneML.dsl.symbol_table.SymbolTableEntry.SymbolTableEntry at 0x1a57040e3a0>,
  'my_synthetic_dataset': <immuneML.dsl.symbol_table.SymbolTableEntry.SymbolTableEntry at 0x1a570187fa0>,
  'simulation_instruction': <immuneML.dsl.symbol_table.SymbolTableEntry.SymbolTableEntry at 0x1a570582160>,
  'output': <immuneML.dsl.symbol_table.SymbolTableEntry.SymbolTableEntry at 0x1a570596d00>}}

In [275]:
path

WindowsPath('C:/Users/karth/Desktop/PhD projects/immuneML/immuneML/quickstart/result1/full_simulation_specs.yaml')

#### Sematic Model

In [None]:
from immuneML.dsl.semantic_model.SemanticModel import SemanticModel


In [None]:

instructions = symbol_table.get_by_type(SymbolType.INSTRUCTION)
output = symbol_table.get("output")
model = SemanticModel(
    [instruction.item for instruction in instructions],result_path, output)
result = model.run()

clear_cache()


In [None]:
import datetime
from pathlib import Path

from immuneML.util.ReflectionHandler import ReflectionHandler
from immuneML.workflows.instructions.Instruction import Instruction


class SemanticModel:

    def __init__(self, instructions: list, result_path: Path, output=None):
        assert all(isinstance(instruction, Instruction) for instruction in instructions), \
            "SemanticModel: error occurred in parsing: check instruction definitions in the configuration file."
        self.instructions = instructions
        self.result_path = result_path
        self.output = output

    def run(self):
        instruction_states = self.run_instructions()
        if self.output is not None:
            self.build_reports(instruction_states)
        return instruction_states

    def build_reports(self, instruction_states):
        report_builder = self.make_report_builder()
        print(
            f"{datetime.datetime.now()}: Generating {self.output['format']} reports...", flush=True)
        result_path = report_builder.build(
            instruction_states, self.result_path)
        print(
            f"{datetime.datetime.now()}: {self.output['format']} reports are generated.", flush=True)
        return result_path

    def run_instructions(self) -> list:
        instruction_states = []
        for index, instruction in enumerate(self.instructions):
            print("{}: Instruction {}/{} has started.".format(datetime.datetime.now(),
                  index+1, len(self.instructions)), flush=True)
            result = instruction.run(result_path=self.result_path)
            instruction_states.append(result)
            print("{}: Instruction {}/{} has finished.".format(datetime.datetime.now(),
                  index+1, len(self.instructions)), flush=True)
        return instruction_states

    def make_report_builder(self):
        report_builder = ReflectionHandler.get_class_by_name(
            f"{self.output['format']}Builder", "presentation/")
        return report_builder


In [47]:
def parse(workflow_specification, symbol_table , result1path):
    specs = workflow_specification["definitions"]

    symbol_table, specs_motifs = call_if_exists(
        "motifs", MotifParser.parse_motifs, specs, symbol_table)

    symbol_table, specs_signals = call_if_exists(
        "signals", SignalParser.parse_signals, specs, symbol_table)

    symbol_table, specs_simulation = call_if_exists(
        "simulations", SimulationParser.parse_simulations, specs, symbol_table)

    symbol_table, specs_preprocessing = call_if_exists(
        PreprocessingParser.keyword, PreprocessingParser.parse, specs, symbol_table)
        
    symbol_table, specs_encoding = call_if_exists(
        "encodings", EncodingParser.parse, specs, symbol_table)

    symbol_table, specs_ml = call_if_exists(
        "ml_methods", MLParser.parse, specs, symbol_table)

    symbol_table, specs_report = call_if_exists(
        "reports", ReportParser.parse_reports, specs, symbol_table)

    symbol_table, specs_import = ImportParser.parse(
        specs, symbol_table, result1path)


{'datasets': {'my_synthetic_dataset': {'format': 'RandomRepertoireDataset',
   'params': {'labels': {}}}},
 'motifs': {'my_motif': {'instantiation': 'GappedKmer', 'seed': 'AA'}},
 'signals': {'my_signal': {'implanting': 'HealthySequence',
   'motifs': ['my_motif']}},
 'simulations': {'my_simulation': {'my_implantng': {'dataset_implanting_rate': 0.5,
    'repertoire_implanting_rate': 0.1,
    'signals': ['my_signal']}}}}