In [24]:
from immuneML.data_model.dataset.ReceptorDataset import ReceptorDataset
from immuneML.data_model.dataset.RepertoireDataset import RepertoireDataset
from immuneML.data_model.dataset.SequenceDataset import SequenceDataset
from immuneML.environment.EnvironmentSettings import EnvironmentSettings
from immuneML.simulation.dataset_generation.RandomDatasetGenerator import RandomDatasetGenerator


In [25]:
path = EnvironmentSettings.tmp_test_path / \
    "random_repertoire1_dataset_generation/"


In [None]:
definitions:
  datasets:
    my_synthetic_dataset:
      format: RandomRepertoireDataset
      params:
        labels: {}
  motifs:
    my_motif:
      instantiation: GappedKmer
      seed: AA
  signals:
    my_signal:
      implanting: HealthySequence
      motifs:
      - my_motif
  simulations:
    my_simulation:
      my_implantng:
        dataset_implanting_rate: 0.5
        repertoire_implanting_rate: 0.1
        signals:
        - my_signal
instructions:
  simulation_instruction:
    dataset: my_synthetic_dataset
    export_formats:
    - AIRR
    simulation: my_simulation
    type: Simulation


In [26]:
path

WindowsPath('C:/Users/karth/Desktop/PhD projects/immuneML/immuneML/test/tmp/random_repertoire1_dataset_generation')

In [27]:
dataset = RandomDatasetGenerator.generate_repertoire_dataset(repertoire_count=100,
                                                             sequence_count_probabilities={
                                                                 100: 0.5, 120: 0.5},
                                                             sequence_length_probabilities={12: 0.33, 14: 0.33, 15: 0.33}, labels={}, path=path)


In [28]:
path

WindowsPath('C:/Users/karth/Desktop/PhD projects/immuneML/immuneML/test/tmp/random_repertoire1_dataset_generation')

In [29]:
from immuneML.workflows.steps.SignalImplanter import SignalImplanter
from immuneML.simulation.SimulationState import SimulationState
from immuneML.simulation.Implanting import Implanting
from immuneML.simulation.Simulation import Simulation

from immuneML.simulation.implants.Motif import Motif
from immuneML.simulation.implants.Signal import Signal
from immuneML.simulation.motif_instantiation_strategy.GappedKmerInstantiation import GappedKmerInstantiation
from immuneML.simulation.sequence_implanting.GappedMotifImplanting import GappedMotifImplanting
from immuneML.simulation.signal_implanting_strategy.HealthySequenceImplanting import HealthySequenceImplanting
from immuneML.simulation.signal_implanting_strategy.ImplantingComputation import ImplantingComputation


In [30]:
result_path = path / "simulation_data"


In [31]:
signal = Signal("my_signal", [Motif(
    "my_motif", GappedKmerInstantiation(), "AA")],   implanting_strategy=HealthySequenceImplanting(GappedMotifImplanting(), implanting_computation=ImplantingComputation.ROUND))


In [32]:
simulation = Simulation([Implanting(dataset_implanting_rate=0.5,
                        repertoire_implanting_rate=0.1, signals=[signal], name="my_simulation")])


In [33]:
input_params = SimulationState(dataset=dataset, result_path=result_path,
                               simulation=simulation, signals=[signal], formats=["AIRR"])


In [34]:
new_dataset = SignalImplanter.run(input_params)




In [35]:
from immuneML.IO.dataset_export.AIRRExporter import AIRRExporter


In [36]:
path_exported = path / "exported"


In [37]:
exported_dataset = AIRRExporter.export(new_dataset, path_exported)


In [38]:
path_exported

WindowsPath('C:/Users/karth/Desktop/PhD projects/immuneML/immuneML/test/tmp/random_repertoire1_dataset_generation/exported')

In [39]:
type(exported_dataset)

NoneType

<h2> ML processs </h2>

{'state': TrainMLModelState(dataset=<immuneML.data_model.dataset.RepertoireDataset.RepertoireDataset object at 0x0000023FF7526FA0>, hp_strategy=<immuneML.hyperparameter_optimization.strategy.GridSearch.GridSearch object at 0x0000023FF75009D0>, hp_settings=[<immuneML.hyperparameter_optimization.HPSetting.HPSetting object at 0x0000023FF55A9700>, <immuneML.hyperparameter_optimization.HPSetting.HPSetting object at 0x0000023FF3DF26D0>], assessment=<immuneML.hyperparameter_optimization.config.SplitConfig.SplitConfig object at 0x0000023FF7500DF0>, selection=<immuneML.hyperparameter_optimization.config.SplitConfig.SplitConfig object at 0x0000023FF75004F0>, metrics={<Metric.ACCURACY: 'accuracy_score'>}, optimization_metric=<Metric.BALANCED_ACCURACY: 'balanced_accuracy_score'>, label_configuration=<immuneML.environment.LabelConfiguration.LabelConfiguration object at 0x0000023FF75000D0>, path=WindowsPath('C:/Users/karth/Desktop/PhD projects/immuneML/immuneML/analysis_runs/7445dd22466e58dfe647730a635b0547'), context={'dataset': <immuneML.data_model.dataset.RepertoireDataset.RepertoireDataset object at 0x0000023FF7526FA0>}, number_of_processes=3, reports={'hprep': <immuneML.reports.train_ml_model_reports.MLSettingsPerformance.MLSettingsPerformance object at 0x0000023FF74A3A00>}, name='machine_learning_instruction', refit_optimal_model=False, optimal_hp_items={}, optimal_hp_item_paths={}, assessment_states=[], report_results=[])}

In [22]:
specs = {
    "definitions": {
        "datasets": {
            "d1": {
                "format": "AIRR",
                "params": {
                    "path": str(path / "result1/simulation_instruction/exported_dataset/airr/"),
                    "metadata_file": str(path / "result1/simulation_instruction/exported_dataset/airr/metadata.csv")
                }
            }
        },
        "encodings": {
            "e1": {
                "KmerFrequency": {
                    "k": 3
                }
            },
            "e2": {
                "KmerFrequency": {
                    "k": 2
                }
            }
        },
        "ml_methods": {
            "simpleLR": {
                "LogisticRegression": {
                    "C": 0.1,
                    "penalty": "l1",
                    "max_iter": 200
                }}
        },
        "reports": {
            "rep1": {
                "SequenceLengthDistribution": {
                    "batch_size": 3
                }
            },
            "hprep": "MLSettingsPerformance",
            "coef": "Coefficients"
        }
    },
    "instructions": {
        "machine_learning_instruction": {
            "type": "TrainMLModel",
            "settings": [
                {
                    "encoding": "e1",
                    "ml_method": "simpleLR"
                },
                {
                    "encoding": "e2",
                    "ml_method": "simpleLR"
                }
            ],
            "assessment": {
                "split_strategy": "random",
                "split_count": 1,
                "training_percentage": 0.7,
                "reports": {
                    "data_splits": ["rep1"],
                    'models': ["coef"]
                }
            },
            "selection": {
                "split_strategy": "random",
                "split_count": 1,
                "training_percentage": 0.7,
                "reports": {
                    "data_splits": ["rep1"],
                    "models": [],
                }
            },
            "labels": ["my_signal"],
            "dataset": "d1",
            "strategy": "GridSearch",
            "metrics": ["accuracy"],
            "reports": ["hprep"],
            "number_of_processes": 3,
            "optimization_metric": "balanced_accuracy",
            "refit_optimal_model": False
        }
    }
}


<h2> Run HPAssessment </h2>

In [40]:
Ml_result_path = path / "ML_result"


<h3>DatasetImport </h3>

  from .autonotebook import tqdm as notebook_tqdm


In [41]:
from immuneML.IO.dataset_import.AIRRImport import AIRRImport


In [42]:
column_mapping = {
    "junction": "sequences",
    "junction_aa": "sequence_aas",
    "v_call": "v_alleles",
    "j_call": "j_alleles",
    "locus": "chains",
    "duplicate_count": "counts",
    "sequence_id": "sequence_identifiers"
}


In [43]:
params = {"is_repertoire": True, "result_path": Ml_result_path / "results", "path": path_exported, "metadata_file": path_exported / "metadata.csv",
          "import_out_of_frame": False, "import_with_stop_codon": False, "import_illegal_characters": False,
          "import_productive": True, "region_type": "IMGT_CDR3", "import_empty_nt_sequences": True, "import_empty_aa_sequences": False,
          "column_mapping": column_mapping,
          "separator": "\t"}


In [44]:
params

{'is_repertoire': True,
 'result_path': WindowsPath('C:/Users/karth/Desktop/PhD projects/immuneML/immuneML/test/tmp/random_repertoire1_dataset_generation/ML_result/results'),
 'path': WindowsPath('C:/Users/karth/Desktop/PhD projects/immuneML/immuneML/test/tmp/random_repertoire1_dataset_generation/exported'),
 'metadata_file': WindowsPath('C:/Users/karth/Desktop/PhD projects/immuneML/immuneML/test/tmp/random_repertoire1_dataset_generation/exported/metadata.csv'),
 'import_out_of_frame': False,
 'import_with_stop_codon': False,
 'import_illegal_characters': False,
 'import_productive': True,
 'region_type': 'IMGT_CDR3',
 'import_empty_nt_sequences': True,
 'import_empty_aa_sequences': False,
 'column_mapping': {'junction': 'sequences',
  'junction_aa': 'sequence_aas',
  'v_call': 'v_alleles',
  'j_call': 'j_alleles',
  'locus': 'chains',
  'duplicate_count': 'counts',
  'sequence_id': 'sequence_identifiers'},
 'separator': '\t'}

In [45]:
dataset = AIRRImport.import_dataset(params, "airr_repertoire_dataset_1")


In [46]:
type(dataset)

immuneML.data_model.dataset.RepertoireDataset.RepertoireDataset

<h3>Assessment</h3>

In [47]:
from immuneML.hyperparameter_optimization.config.ReportConfig import ReportConfig


In [48]:
datasplits = {
    "SequenceLengthDistribution": {
        "batch_size": 3
    }
}
models1 = {"coef": "Coefficients"}


In [49]:
reportconfig1 = ReportConfig(
    data_splits=datasplits, models=models1)


In [50]:
from immuneML.hyperparameter_optimization.config.SplitConfig import SplitConfig
from immuneML.hyperparameter_optimization.config.SplitConfig import SplitType

assessment = SplitConfig(
    split_strategy=SplitType.RANDOM, split_count=1, training_percentage=0.7, reports=reportconfig1)


<h3> Label configurator </h3>

In [51]:
from immuneML.environment.LabelConfiguration import LabelConfiguration
from immuneML.environment.Label import Label
label_configuration = LabelConfiguration(labels=[Label("my_signal")])


<h3>Split Data </h3>

In [52]:
from immuneML.hyperparameter_optimization.core.HPUtil import HPUtil


  from .autonotebook import tqdm as notebook_tqdm


In [53]:
airr_data = Ml_result_path / "results"

In [54]:
train_val_datasets, test_datasets = HPUtil.split_data(dataset, assessment, airr_data, label_configuration )

In [55]:
n_splits = len(train_val_datasets)


In [56]:
n_splits

1

In [57]:
for index in range(n_splits):
    print(train_val_datasets[index], test_datasets[index], index, n_splits)


<immuneML.data_model.dataset.RepertoireDataset.RepertoireDataset object at 0x000002B39C9B0760> <immuneML.data_model.dataset.RepertoireDataset.RepertoireDataset object at 0x000002B3AC9B16D0> 0 1


In [58]:
assesment_data = Ml_result_path / "assesment"

In [59]:
assesment_data

WindowsPath('C:/Users/karth/Desktop/PhD projects/immuneML/immuneML/test/tmp/random_repertoire1_dataset_generation/ML_result/assesment')

In [60]:
from immuneML.util.PathBuilder import PathBuilder


In [61]:
current_path = assesment_data / f"split_{index + 1}"
PathBuilder.build(current_path)




WindowsPath('C:/Users/karth/Desktop/PhD projects/immuneML/immuneML/test/tmp/random_repertoire1_dataset_generation/ML_result/assesment/split_1')

In [62]:
from immuneML.hyperparameter_optimization.states.HPAssessmentState import HPAssessmentState


In [63]:
assessment_state = HPAssessmentState(
    index, train_val_datasets[index], test_datasets[index], current_path,label_configuration)


<h3> Evaluate HP settings </h3>

In [64]:
assessment_state

<immuneML.hyperparameter_optimization.states.HPAssessmentState.HPAssessmentState at 0x2b3b4fc1190>

In [65]:
current_path

WindowsPath('C:/Users/karth/Desktop/PhD projects/immuneML/immuneML/test/tmp/random_repertoire1_dataset_generation/ML_result/assesment/split_1')

In [None]:
from immuneML.hyperparameter_optimization.config.SplitConfig import SplitConfig


In [69]:
datasplits

{'SequenceLengthDistribution': {'batch_size': 3}}

In [68]:
reportconfig2 = ReportConfig(
    data_splits=datasplits)


In [70]:
split_config_selection = SplitConfig(
    split_strategy=SplitType.RANDOM, split_count=1, training_percentage=.07, reports=reportconfig2)


In [67]:
selection_path = current_path / f"selection_random"
PathBuilder.build(selection_path)

WindowsPath('C:/Users/karth/Desktop/PhD projects/immuneML/immuneML/test/tmp/random_repertoire1_dataset_generation/ML_result/assesment/split_1/selection_random')

In [71]:
split_config_selection.split_strategy

<SplitType.RANDOM: 2>

In [72]:
index

0

In [74]:
train_val_datasets[index]

<immuneML.data_model.dataset.RepertoireDataset.RepertoireDataset at 0x2b39c9b0760>

In [None]:
split_config_selection

In [73]:
label_configuration

<immuneML.environment.LabelConfiguration.LabelConfiguration at 0x2b3ac9b5b50>

In [75]:
train_datasets, val_datasets = HPUtil.split_data(
    train_val_datasets[index],split_config_selection , selection_path, label_configuration)


In [76]:
n_labels = label_configuration.get_label_count()

In [77]:
n_labels

1

In [114]:
train_val_datasets[index]

<immuneML.data_model.dataset.RepertoireDataset.RepertoireDataset at 0x2b39c9b0760>

In [78]:
label_configuration.get_label_objects()


[<immuneML.environment.Label.Label at 0x2b3ac9b50a0>]

In [80]:
assessment_states = []

In [81]:
assessment_states.append(assessment_state)

In [82]:
assessment_states

[<immuneML.hyperparameter_optimization.states.HPAssessmentState.HPAssessmentState at 0x2b3b4fc1190>]

<h3> HP setting and Strategy </h3>

In [207]:
from immuneML.encodings.kmer_frequency.KmerFrequencyEncoder import KmerFrequencyEncoder
from immuneML.hyperparameter_optimization.HPSetting import HPSetting
from immuneML.hyperparameter_optimization.strategy.GridSearch import GridSearch
from immuneML.ml_methods.LogisticRegression import LogisticRegression

from immuneML.hyperparameter_optimization.HPSetting import HPSetting
from immuneML.encodings.kmer_frequency.KmerFreqRepertoireEncoder import KmerFreqRepertoireEncoder
from immuneML.encodings.kmer_frequency.KmerFrequencyEncoder import KmerFrequencyEncoder

from immuneML.encodings.kmer_frequency.sequence_encoding.SequenceEncodingType import SequenceEncodingType
from immuneML.analysis.data_manipulation.NormalizationType import NormalizationType

from immuneML.hyperparameter_optimization.HPSetting import HPSetting
from immuneML.encodings.kmer_frequency.KmerFreqRepertoireEncoder import KmerFreqRepertoireEncoder
from immuneML.encodings.kmer_frequency.KmerFrequencyEncoder import KmerFrequencyEncoder
from immuneML.util.ReadsType import ReadsType

from immuneML.encodings.kmer_frequency.sequence_encoding.SequenceEncodingType import SequenceEncodingType
from immuneML.analysis.data_manipulation.NormalizationType import NormalizationType


In [208]:
encoder1 = KmerFreqRepertoireEncoder(NormalizationType.RELATIVE_FREQUENCY, ReadsType.UNIQUE, SequenceEncodingType.CONTINUOUS_KMER, 3,
                                    scale_to_zero_mean=True, scale_to_unit_variance=True)

encoder2 = KmerFreqRepertoireEncoder(NormalizationType.RELATIVE_FREQUENCY, ReadsType.UNIQUE, SequenceEncodingType.CONTINUOUS_KMER, 2,
                                            scale_to_zero_mean=True, scale_to_unit_variance=True)


In [248]:
hp_setting1 = HPSetting(encoder1, encoder_params= {"normalization_type": "relative_frequency", "reads": "unique", "sequence_encoding": "continuous_kmer",
                                                   "k": 3, "scale_to_zero_mean": True, "scale_to_unit_variance": True},  encoder_name="e1", ml_method=LogisticRegression(
    {"C": 0.1, "penalty": "l1", "max_iter": 200}), ml_params={"model_selection_cv": False, "model_selection_n_folds": -1}, ml_method_name="simpleLR", preproc_sequence=[])
hp_setting2 = HPSetting(encoder2, encoder_params={"normalization_type": "relative_frequency", "reads": "unique", "sequence_encoding": "continuous_kmer",
                                                  "k": 2, "scale_to_zero_mean": True, "scale_to_unit_variance": True},  encoder_name="e2", ml_method=LogisticRegression(
    {"C": 0.1, "penalty": "l1", "max_iter": 200}), ml_params={"model_selection_cv": False, "model_selection_n_folds": -1}, ml_method_name="simpleLR", preproc_sequence=[])


In [249]:
# hp_settings = [HPSetting(encoder=KmerFreqRepertoireEncoder(reads=ReadsType.UNIQUE, sequence_encoding=SequenceEncodingType.GAPPED_KMER, normalization_type=NormalizationType.RELATIVE_FREQUENCY), encoder_params={"k": 3, "reads": ReadsType.UNIQUE, "sequence_encoding": SequenceEncodingType.GAPPED_KMER, "normalization_type": NormalizationType.RELATIVE_FREQUENCY}, encoder_name="e1", ml_method=LogisticRegression(
#     {"C": 0.1, "penalty": "l1", "max_iter": 200}),
#                          ml_params={ }, ml_method_name="simpleLR",
#                          preproc_sequence=[]),
#                HPSetting(encoder=KmerFrequencyEncoder(), encoder_params={"normalization_type": "relative_frequency", "reads": "unique", "sequence_encoding": "continuous_kmer",
#                                                                        "k": 2, "scale_to_zero_mean": True, "scale_to_unit_variance": True}, encoder_name="e2", ml_method=LogisticRegression(),
#                          ml_params={}, ml_method_name="simpleLR",
#                          preproc_sequence=[])]


hp_settings = [hp_setting1,hp_setting2]
HPStrategy = GridSearch(hp_settings)


{'normalization_type': 'relative_frequency',
 'reads': 'unique',
 'sequence_encoding': 'continuous_kmer',
 'k': 3,
 'scale_to_zero_mean': True,
 'scale_to_unit_variance': True}

In [250]:
from immuneML.hyperparameter_optimization.states.HPSelectionState import HPSelectionState


In [251]:
number_of_processes = 3

context = {'dataset': dataset}


In [252]:
from immuneML.ml_metrics.Metric import Metric
metrics = {Metric.ACCURACY}
optimisatization_metric = Metric.BALANCED_ACCURACY

In [253]:
metrics

{<Metric.ACCURACY: 'accuracy_score'>}

In [254]:
label

<immuneML.environment.Label.Label at 0x2b3ac9b50a0>

In [255]:
index1 = 0

In [241]:

def _set_paths(path):
        if path is None:
            raise RuntimeError(
                "MLProcess: path is not set, stopping execution...")
        ml_details_path = path / "ml_details.yaml"
        ml_score_path = path / "ml_score.csv"
        train_predictions_path = path / "train_predictions.csv"
        test_predictions_path = path / "test_predictions.csv"
        report_path = PathBuilder.build(path / "reports")


In [256]:
ml_details_path = path / "ml_details.yaml"
ml_score_path = path / "ml_score.csv"
train_predictions_path = path / "train_predictions.csv"
test_predictions_path = path / "test_predictions.csv"
report_path = PathBuilder.build(path / "reports")


In [257]:
label

<immuneML.environment.Label.Label at 0x2b3ac9b50a0>

In [258]:
def MLProcess(train_dataset=train_datasets[index1], test_dataset=val_datasets[index1], encoding_reports=split_config_selection.reports.encoding_reports.values(), label_config=LabelConfiguration([label]),report_context=context, number_of_processes=number_of_processes, metrics=metrics,
            optimization_metric=optimisatization_metric, ml_reports=split_config_selection.reports.model_reports.values(), label=label, path=selection_path / f"split_{index1 + 1}" / f"{label.name}_{hp_setting.get_key()}", hp_setting=hp_setting):

    print(f"{datetime.datetime.now()}: Evaluating hyperparameter setting: {hp_setting}...", flush=True)

    PathBuilder.build(path)
    _set_paths(path)
    processed_dataset = HPUtil.preprocess_dataset(train_dataset, hp_setting.preproc_sequence, path / "preprocessed_train_dataset" , report_context) 

    type(processed_dataset)

    encode_train_dataset = HPUtil.encode_dataset(processed_dataset, hp_setting, path /"encoded_datasets", learn_model= True, context = context, number_of_processes=number_of_processes, label_configuration=LabelConfiguration([label]) )

    

In [259]:
path=selection_path / f"split_{index1 + 1}" / f"{label.name}_{hp_setting.get_key()}"

In [260]:
processed_dataset = HPUtil.preprocess_dataset(
    train_datasets[index1], hp_setting.preproc_sequence, path / "preprocessed_train_dataset", context)


In [261]:
hp_setting.preproc_sequence

[]

In [262]:
Data

NameError: name 'Data' is not defined

In [None]:
encode_train_dataset = HPUtil.encode_dataset(processed_dataset, hp_setting, path /"encoded_datasets", learn_model= True, context = context, number_of_processes=number_of_processes, label_configuration=LabelConfiguration([label]) )

2022-05-14 15:52:15.302251: Encoding started...
2022-05-14 15:52:19.202251: Encoding finished.




In [279]:
hp_setting.ml_params = {"model_selection_cv": False,
                        "model_selection_n_folds": 0}


In [280]:
hp_setting.ml_params

{'model_selection_cv': False, 'model_selection_n_folds': 0}

In [281]:
hp_setting.ml_method

<immuneML.ml_methods.LogisticRegression.LogisticRegression at 0x2b3b56339d0>

In [289]:
from immuneML.workflows.steps.MLMethodTrainer import MLMethodTrainer


In [290]:
hp_setting.ml_method

<immuneML.ml_methods.LogisticRegression.LogisticRegression at 0x2b3b7f77340>

In [292]:
from immuneML.workflows.steps.MLMethodTrainerParams import MLMethodTrainerParams


In [294]:
import copy


In [296]:
input_params = MLMethodTrainerParams(
    method=copy.deepcopy(hp_setting.ml_method),
    result_path=path / "ml_method",

    dataset=encode_train_dataset,

    label=label,

    train_predictions_path=train_predictions_path,

    ml_details_path=ml_details_path,

    model_selection_cv=hp_setting.ml_params["model_selection_cv"],

    model_selection_n_folds=hp_setting.ml_params["model_selection_n_folds"],

    cores_for_training=number_of_processes,

    optimization_metric=optimisatization_metric.name.lower()
)


In [305]:
method = copy.deepcopy(input_params.method)

In [308]:
method.result_path = input_params.result_path


In [2]:
method.result_path

NameError: name 'method' is not defined

In [310]:
method.fit(input_params.dataset.encoded_data ,input_params.label,input_params.cores_for_training  )

In [312]:
method.store(input_params.result_path, input_params.dataset.encoded_data.feature_names, input_params.ml_details_path)

In [1]:
method

NameError: name 'method' is not defined

In [315]:
classes = method.get_classes()

In [317]:
classes.__dir__

<function NoneType.__dir__(self, /)>

In [318]:
for cls_index, cls  in enumerate(classes):
    print(cls_index, cls)

TypeError: 'NoneType' object is not iterable

In [298]:
method.result_path = input_params.result_path



<immuneML.workflows.steps.MLMethodTrainerParams.MLMethodTrainerParams at 0x2b3ad4862b0>

In [299]:
input_params.dataset.encoded_data

<immuneML.data_model.encoded_data.EncodedData.EncodedData at 0x2b3b55d41f0>

In [300]:
input_params.label

<immuneML.environment.Label.Label at 0x2b3ac9b50a0>

In [301]:
input_params.cores_for_training

3

In [295]:
method = MLMethodTrainer.run(MLMethodTrainerParams(
    method = copy.deepcopy(hp_setting.ml_method),
    result_path= path / "ml_method" ,

    dataset = encode_train_dataset,

    label = label ,

    train_predictions_path = train_predictions_path,

    ml_details_path = ml_details_path,

    model_selection_cv = hp_setting.ml_params["model_selection_cv"],

    model_selection_n_folds=hp_setting.ml_params["model_selection_n_folds"],

    cores_for_training = number_of_processes,

    optimization_metric= optimisatization_metric.name.lower()
))


2022-05-14 16:20:36.279693: ML model training started...


TypeError: 'NoneType' object is not iterable

In [282]:
method = HPUtil.train_method(label , encode_train_dataset, hp_setting, path ,train_predictions_path, ml_details_path , number_of_processes, optimisatization_metric )

2022-05-14 16:09:50.682230: ML model training started...


TypeError: 'NoneType' object is not iterable

In [263]:
MLProcess()

2022-05-14 16:02:50.752743: Evaluating hyperparameter setting: e1_simpleLR...
2022-05-14 16:02:50.754773: Encoding started...
2022-05-14 16:02:50.790741: Encoding finished.


In [283]:
def evaluate_hp_setting(hp_setting,train_datasets, val_datasets, selection_path, label, index):
    performances = []

    for index1 in range(split_config_selection.split_count):

        hp_item = MLProcess(
            train_dataset=train_datasets[index1], test_dataset=val_datasets[index1], encoding_reports=split_config_selection.reports.encoding_reports.values(), label_config=LabelConfiguration([label]),
            report_context=context, number_of_processes=number_of_processes, metrics=metrics,
            optimization_metric=optimisatization_metric, ml_reports=split_config_selection.reports.model_reports.values(), label=label, path=selection_path / f"split_{index1 + 1}" / f"{label.name}_{hp_setting.get_key()}", hp_setting=hp_setting).run(index1 + 1)

        assessment_states[index].label_states[label.name].selection_state.hp_items[hp_setting.get_key(
        )].append(hp_item)

        performance = hp_item.performance[optimisatization_metric.name.lower(
        )] if hp_item.performance is not None else None

        performance.append(performance)

    return HPUtil.get_average_performance(performances)


In [284]:
from immuneML.reports.ReportUtil import ReportUtil


In [285]:
def run_selection_reports(dataset, train_datasets: list, val_datasets: list, selection_state: HPSelectionState):
    path = selection_state.path
    data_split_reports = split_config_selection.reports.data_split_reports.values()
    for index in range(len(train_datasets)):
        split_reports_path = path / f"split_{index + 1}"

        selection_state.train_data_reports += ReportUtil.run_data_reports(train_datasets[index], data_split_reports,
                                                                             split_reports_path / "data_reports_train",
                                                                             number_of_processes, context)
        selection_state.val_data_reports += ReportUtil.run_data_reports(val_datasets[index], data_split_reports,
                                                                            split_reports_path / "data_reports_test",
                                                                            number_of_processes, context)

    data_reports = split_config_selection.reports.data_reports.values()
    selection_state.data_reports = ReportUtil.run_data_reports(dataset, data_reports, path / "reports",
                                                                number_of_processes, context)


In [286]:
import datetime


In [287]:
index

0

In [288]:
for idx, label in enumerate(label_configuration.get_label_objects()):

       print(f"{datetime.datetime.now()}: Hyperparameter optimization: running the inner loop of nested CV: selection for label {label.name} "
              f"(label {idx + 1} / {n_labels}).\n", flush=True)

       selection_state = HPSelectionState(train_datasets, val_datasets, selection_path, HPStrategy)
       assessment_states[index].label_states[label.name].selection_state = selection_state

       hp_setting = selection_state.hp_strategy.generate_next_setting()
       while hp_setting is not None:
            performance = evaluate_hp_setting( hp_setting, train_datasets, val_datasets, selection_path, label, index)
            hp_setting = selection_state.hp_strategy.generate_next_setting(
                    hp_setting, performance)

       run_selection_reports(
           train_val_datasets[index], train_datasets, val_datasets, selection_state)

       print(f"{datetime.datetime.now()}: Hyperparameter optimization: running the inner loop of nested CV: completed selection for "
                  f"label {label.name} (label {idx + 1} / {n_labels}).\n", flush=True)


2022-05-14 16:10:45.793016: Hyperparameter optimization: running the inner loop of nested CV: selection for label my_signal (label 1 / 1).

2022-05-14 16:10:45.796015: Evaluating hyperparameter setting: e1_simpleLR...
2022-05-14 16:10:45.798011: Encoding started...
2022-05-14 16:10:45.841013: Encoding finished.


AttributeError: 'NoneType' object has no attribute 'run'