In [46]:
from typing import List
import yaml
from MDRMF import Dataset, MoleculeLoader, Featurizer

class Experimenter:

    def __init__(self, config_file: str):
        self.config_file = config_file
        self.experiments = self._load_config()
    
    def _load_config(self) -> List[dict]:
        with open(self.config_file, 'r') as stream:
            try:
                config = yaml.safe_load(stream)
            except yaml.YAMLError as exc:
                print(exc)
                return []

        # If there is only one experiment, make it into a list
        if isinstance(config, dict):
            config = [config]

        return [config]
    
    def conduct_all_experiments(self):
        for config in self.experiments:
            if 'Experiment' in config:
                exp_config = config['Experiment']
                self.conduct_experiment(exp_config)
    
    def conduct_experiment(self, exp_config: dict):
        # If there is a dataset use this
        if 'dataset' in exp_config:
            dataset_file = exp_config['dataset']
            dataset_model = Dataset.load(dataset_file)
            dataset_eval = Dataset.load(dataset_file)
        elif 'data' in exp_config:
            data_conf = exp_config['data']

            datafile = data_conf['datafile']
            SMILES = data_conf['SMILES_col']
            scores = data_conf['scores_col']
            ids = data_conf['ids_col']

            data = MoleculeLoader(datafile, SMILES, scores)
            feat = Featurizer(data)
            

In [47]:
exp = Experimenter("test.yaml")

In [49]:
exp.experiments

[[{'Experiment': {'name': 'Exp01',
    'data': {'datafile': 'data10k.csv',
     'SMILES': 'SMILES',
     'scores': 'r_i_docking_score',
     'ids': 'SMILES'},
    'featurizer': {'name': 'morgan', 'nBits': 512, 'radius': 2},
    'model': {'name': 'RFModeller',
     'iterations': 60,
     'initial_sample_size': 30,
     'acquisition_size': 30,
     'acquisition_method': 'greedy'},
    'metrics': {'names': ['top-k', 'R2_k'], 'k': [100, 50]},
    'repeat': 5}},
  {'Experiment': {'name': 'Exp02',
    'dataset': 'dataset.pkl',
    'model': {'name': 'RFModel',
     'iterations': 90,
     'initial_sample_size': 20,
     'acquisition_size': 20,
     'acquisition_method': 'greedy'},
    'metrics': {'names': ['top-k', 'R2_k'], 'k': [100, 50]},
    'repeat': 5}},
  {'Dataset': {'name': 'dataset01',
    'featurizer': {'name': 'morgan', 'nBits': 256, 'radius': 2}}}]]